alsp2 / main.py
Alex Vega
init
f1acbf1
import pickle
import numpy as np
import io
import math
import cv2
from skimage.feature import hog
from fastapi import FastAPI, File, UploadFile, HTTPException
from PIL import Image
app = FastAPI(
title="Enhanced Peruvian Sign Language (LSP) Recognition API",
description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.",
version="2.0.0"
)
MODEL_FILENAME = 'lsp_som_model_enhanced.pkl'
try:
with open(MODEL_FILENAME, 'rb') as f:
model_data = pickle.load(f)
som = model_data['som']
label_map = model_data['label_map']
CLASSES = model_data['classes']
IMG_SIZE = model_data['img_size']
HOG_PARAMS = model_data['feature_extraction_params'] # Load HOG parameters
print(f"βœ… Model '{MODEL_FILENAME}' and assets loaded successfully.")
print(f" - Classes: {CLASSES}")
print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}")
print(f" - Feature Extractor: {HOG_PARAMS['type']}")
except FileNotFoundError:
print(f"❌ FATAL ERROR: Model file '{MODEL_FILENAME}' not found.")
som = None
def preprocess_and_extract_features_from_bytes(image_bytes: bytes):
try:
nparr = np.frombuffer(image_bytes, np.uint8)
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img_bgr is None:
raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.")
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb)
skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135]))
contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
raise ValueError("No contours found in the image. The hand sign may not be clear enough.")
largest_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(largest_contour)
cropped_hand = img_rgb[y:y+h, x:x+w]
resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE))
gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY)
hog_features = hog(gray_hand,
orientations=HOG_PARAMS['orientations'],
pixels_per_cell=HOG_PARAMS['pixels_per_cell'],
cells_per_block=HOG_PARAMS['cells_per_block'],
transform_sqrt=HOG_PARAMS['transform_sqrt'],
block_norm=HOG_PARAMS['block_norm'])
return hog_features
except Exception as e:
raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}")
@app.get("/", tags=["Status"])
def read_root():
return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"}
@app.post("/predict", tags=["Prediction"])
async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")):
if not som:
raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.")
image_bytes = await file.read()
feature_vector = preprocess_and_extract_features_from_bytes(image_bytes)
winner_neuron = som.winner(feature_vector)
predicted_index = label_map.get(winner_neuron, -1)
is_best_guess = False
if predicted_index == -1:
is_best_guess = True
min_dist = float('inf')
for mapped_pos, mapped_label in label_map.items():
dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2)
if dist < min_dist:
min_dist = dist
predicted_index = mapped_label
if predicted_index != -1:
predicted_letter = CLASSES[predicted_index]
prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match"
else:
predicted_letter = "Unknown"
prediction_type = "Critical Error: No mapped neurons found on the entire map."
response = {
"filename": file.filename,
"predicted_letter": predicted_letter,
"prediction_type": prediction_type,
"winner_neuron_on_map": [int(coord) for coord in winner_neuron]
}
print(f"[LOG] Prediction successful. Response: {response}")
return response