|
import pickle |
|
import numpy as np |
|
import io |
|
import math |
|
import cv2 |
|
from skimage.feature import hog |
|
from fastapi import FastAPI, File, UploadFile, HTTPException |
|
from PIL import Image |
|
|
|
app = FastAPI( |
|
title="Enhanced Peruvian Sign Language (LSP) Recognition API", |
|
description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.", |
|
version="2.0.0" |
|
) |
|
|
|
MODEL_FILENAME = 'lsp_som_model_enhanced.pkl' |
|
try: |
|
with open(MODEL_FILENAME, 'rb') as f: |
|
model_data = pickle.load(f) |
|
som = model_data['som'] |
|
label_map = model_data['label_map'] |
|
CLASSES = model_data['classes'] |
|
IMG_SIZE = model_data['img_size'] |
|
HOG_PARAMS = model_data['feature_extraction_params'] |
|
print(f"β
Model '{MODEL_FILENAME}' and assets loaded successfully.") |
|
print(f" - Classes: {CLASSES}") |
|
print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}") |
|
print(f" - Feature Extractor: {HOG_PARAMS['type']}") |
|
except FileNotFoundError: |
|
print(f"β FATAL ERROR: Model file '{MODEL_FILENAME}' not found.") |
|
som = None |
|
|
|
def preprocess_and_extract_features_from_bytes(image_bytes: bytes): |
|
try: |
|
nparr = np.frombuffer(image_bytes, np.uint8) |
|
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) |
|
if img_bgr is None: |
|
raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.") |
|
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) |
|
|
|
ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb) |
|
skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135])) |
|
|
|
contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
if not contours: |
|
raise ValueError("No contours found in the image. The hand sign may not be clear enough.") |
|
|
|
largest_contour = max(contours, key=cv2.contourArea) |
|
x, y, w, h = cv2.boundingRect(largest_contour) |
|
cropped_hand = img_rgb[y:y+h, x:x+w] |
|
|
|
resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE)) |
|
gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY) |
|
|
|
hog_features = hog(gray_hand, |
|
orientations=HOG_PARAMS['orientations'], |
|
pixels_per_cell=HOG_PARAMS['pixels_per_cell'], |
|
cells_per_block=HOG_PARAMS['cells_per_block'], |
|
transform_sqrt=HOG_PARAMS['transform_sqrt'], |
|
block_norm=HOG_PARAMS['block_norm']) |
|
|
|
return hog_features |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}") |
|
|
|
|
|
@app.get("/", tags=["Status"]) |
|
def read_root(): |
|
return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"} |
|
|
|
@app.post("/predict", tags=["Prediction"]) |
|
async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")): |
|
if not som: |
|
raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.") |
|
|
|
image_bytes = await file.read() |
|
|
|
feature_vector = preprocess_and_extract_features_from_bytes(image_bytes) |
|
|
|
winner_neuron = som.winner(feature_vector) |
|
predicted_index = label_map.get(winner_neuron, -1) |
|
|
|
is_best_guess = False |
|
if predicted_index == -1: |
|
is_best_guess = True |
|
min_dist = float('inf') |
|
for mapped_pos, mapped_label in label_map.items(): |
|
dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2) |
|
if dist < min_dist: |
|
min_dist = dist |
|
predicted_index = mapped_label |
|
|
|
if predicted_index != -1: |
|
predicted_letter = CLASSES[predicted_index] |
|
prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match" |
|
else: |
|
predicted_letter = "Unknown" |
|
prediction_type = "Critical Error: No mapped neurons found on the entire map." |
|
|
|
response = { |
|
"filename": file.filename, |
|
"predicted_letter": predicted_letter, |
|
"prediction_type": prediction_type, |
|
"winner_neuron_on_map": [int(coord) for coord in winner_neuron] |
|
} |
|
print(f"[LOG] Prediction successful. Response: {response}") |
|
return response |