Spaces:

axvg
/

alsp2

Sleeping

File size: 4,433 Bytes

f1acbf1

import pickle
import numpy as np
import io
import math
import cv2
from skimage.feature import hog
from fastapi import FastAPI, File, UploadFile, HTTPException
from PIL import Image

app = FastAPI(
    title="Enhanced Peruvian Sign Language (LSP) Recognition API",
    description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.",
    version="2.0.0"
)

MODEL_FILENAME = 'lsp_som_model_enhanced.pkl'
try:
    with open(MODEL_FILENAME, 'rb') as f:
        model_data = pickle.load(f)
    som = model_data['som']
    label_map = model_data['label_map']
    CLASSES = model_data['classes']
    IMG_SIZE = model_data['img_size']
    HOG_PARAMS = model_data['feature_extraction_params'] # Load HOG parameters
    print(f"✅ Model '{MODEL_FILENAME}' and assets loaded successfully.")
    print(f"    - Classes: {CLASSES}")
    print(f"    - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}")
    print(f"    - Feature Extractor: {HOG_PARAMS['type']}")
except FileNotFoundError:
    print(f"❌ FATAL ERROR: Model file '{MODEL_FILENAME}' not found.")
    som = None

def preprocess_and_extract_features_from_bytes(image_bytes: bytes):
    try:
        nparr = np.frombuffer(image_bytes, np.uint8)
        img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        if img_bgr is None:
            raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.")
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb)
        skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135]))

        contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours:
            raise ValueError("No contours found in the image. The hand sign may not be clear enough.")

        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        cropped_hand = img_rgb[y:y+h, x:x+w]
        
        resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE))
        gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY)

        hog_features = hog(gray_hand,
                           orientations=HOG_PARAMS['orientations'],
                           pixels_per_cell=HOG_PARAMS['pixels_per_cell'],
                           cells_per_block=HOG_PARAMS['cells_per_block'],
                           transform_sqrt=HOG_PARAMS['transform_sqrt'],
                           block_norm=HOG_PARAMS['block_norm'])
        
        return hog_features

    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}")


@app.get("/", tags=["Status"])
def read_root():
    return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"}

@app.post("/predict", tags=["Prediction"])
async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")):
    if not som:
        raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.")

    image_bytes = await file.read()

    feature_vector = preprocess_and_extract_features_from_bytes(image_bytes)

    winner_neuron = som.winner(feature_vector)
    predicted_index = label_map.get(winner_neuron, -1)

    is_best_guess = False
    if predicted_index == -1:
        is_best_guess = True
        min_dist = float('inf')
        for mapped_pos, mapped_label in label_map.items():
            dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2)
            if dist < min_dist:
                min_dist = dist
                predicted_index = mapped_label
    
    if predicted_index != -1:
        predicted_letter = CLASSES[predicted_index]
        prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match"
    else:
        predicted_letter = "Unknown"
        prediction_type = "Critical Error: No mapped neurons found on the entire map."

    response = {
        "filename": file.filename,
        "predicted_letter": predicted_letter,
        "prediction_type": prediction_type,
        "winner_neuron_on_map": [int(coord) for coord in winner_neuron]
    }
    print(f"[LOG] Prediction successful. Response: {response}")
    return response