import pickle import numpy as np import io import math import cv2 from skimage.feature import hog from fastapi import FastAPI, File, UploadFile, HTTPException from PIL import Image app = FastAPI( title="Enhanced Peruvian Sign Language (LSP) Recognition API", description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.", version="2.0.0" ) MODEL_FILENAME = 'lsp_som_model_enhanced.pkl' try: with open(MODEL_FILENAME, 'rb') as f: model_data = pickle.load(f) som = model_data['som'] label_map = model_data['label_map'] CLASSES = model_data['classes'] IMG_SIZE = model_data['img_size'] HOG_PARAMS = model_data['feature_extraction_params'] # Load HOG parameters print(f"✅ Model '{MODEL_FILENAME}' and assets loaded successfully.") print(f" - Classes: {CLASSES}") print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}") print(f" - Feature Extractor: {HOG_PARAMS['type']}") except FileNotFoundError: print(f"❌ FATAL ERROR: Model file '{MODEL_FILENAME}' not found.") som = None def preprocess_and_extract_features_from_bytes(image_bytes: bytes): try: nparr = np.frombuffer(image_bytes, np.uint8) img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) if img_bgr is None: raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.") img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb) skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135])) contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: raise ValueError("No contours found in the image. The hand sign may not be clear enough.") largest_contour = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(largest_contour) cropped_hand = img_rgb[y:y+h, x:x+w] resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE)) gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY) hog_features = hog(gray_hand, orientations=HOG_PARAMS['orientations'], pixels_per_cell=HOG_PARAMS['pixels_per_cell'], cells_per_block=HOG_PARAMS['cells_per_block'], transform_sqrt=HOG_PARAMS['transform_sqrt'], block_norm=HOG_PARAMS['block_norm']) return hog_features except Exception as e: raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}") @app.get("/", tags=["Status"]) def read_root(): return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"} @app.post("/predict", tags=["Prediction"]) async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")): if not som: raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.") image_bytes = await file.read() feature_vector = preprocess_and_extract_features_from_bytes(image_bytes) winner_neuron = som.winner(feature_vector) predicted_index = label_map.get(winner_neuron, -1) is_best_guess = False if predicted_index == -1: is_best_guess = True min_dist = float('inf') for mapped_pos, mapped_label in label_map.items(): dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2) if dist < min_dist: min_dist = dist predicted_index = mapped_label if predicted_index != -1: predicted_letter = CLASSES[predicted_index] prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match" else: predicted_letter = "Unknown" prediction_type = "Critical Error: No mapped neurons found on the entire map." response = { "filename": file.filename, "predicted_letter": predicted_letter, "prediction_type": prediction_type, "winner_neuron_on_map": [int(coord) for coord in winner_neuron] } print(f"[LOG] Prediction successful. Response: {response}") return response