File size: 4,433 Bytes
f1acbf1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import pickle
import numpy as np
import io
import math
import cv2
from skimage.feature import hog
from fastapi import FastAPI, File, UploadFile, HTTPException
from PIL import Image
app = FastAPI(
title="Enhanced Peruvian Sign Language (LSP) Recognition API",
description="Upload an image of a Peruvian Sign Language alphabet sign to predict the corresponding letter using an enhanced Self-Organizing Map (SOM) with HOG features.",
version="2.0.0"
)
MODEL_FILENAME = 'lsp_som_model_enhanced.pkl'
try:
with open(MODEL_FILENAME, 'rb') as f:
model_data = pickle.load(f)
som = model_data['som']
label_map = model_data['label_map']
CLASSES = model_data['classes']
IMG_SIZE = model_data['img_size']
HOG_PARAMS = model_data['feature_extraction_params'] # Load HOG parameters
print(f"β
Model '{MODEL_FILENAME}' and assets loaded successfully.")
print(f" - Classes: {CLASSES}")
print(f" - Expected Image Size for Processing: {IMG_SIZE}x{IMG_SIZE}")
print(f" - Feature Extractor: {HOG_PARAMS['type']}")
except FileNotFoundError:
print(f"β FATAL ERROR: Model file '{MODEL_FILENAME}' not found.")
som = None
def preprocess_and_extract_features_from_bytes(image_bytes: bytes):
try:
nparr = np.frombuffer(image_bytes, np.uint8)
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img_bgr is None:
raise ValueError("Could not decode image bytes. The file may be corrupt or not an image.")
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb)
skin_mask = cv2.inRange(ycrcb, np.array([0, 135, 85]), np.array([255, 180, 135]))
contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
raise ValueError("No contours found in the image. The hand sign may not be clear enough.")
largest_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(largest_contour)
cropped_hand = img_rgb[y:y+h, x:x+w]
resized_hand = cv2.resize(cropped_hand, (IMG_SIZE, IMG_SIZE))
gray_hand = cv2.cvtColor(resized_hand, cv2.COLOR_RGB2GRAY)
hog_features = hog(gray_hand,
orientations=HOG_PARAMS['orientations'],
pixels_per_cell=HOG_PARAMS['pixels_per_cell'],
cells_per_block=HOG_PARAMS['cells_per_block'],
transform_sqrt=HOG_PARAMS['transform_sqrt'],
block_norm=HOG_PARAMS['block_norm'])
return hog_features
except Exception as e:
raise HTTPException(status_code=400, detail=f"Image processing failed. Error: {e}")
@app.get("/", tags=["Status"])
def read_root():
return {"status": "ok", "message": "Welcome to the Enhanced LSP Recognition API!"}
@app.post("/predict", tags=["Prediction"])
async def predict_sign(file: UploadFile = File(..., description="An image file of a Peruvian Sign Language sign.")):
if not som:
raise HTTPException(status_code=503, detail="Model is not available. API cannot process requests.")
image_bytes = await file.read()
feature_vector = preprocess_and_extract_features_from_bytes(image_bytes)
winner_neuron = som.winner(feature_vector)
predicted_index = label_map.get(winner_neuron, -1)
is_best_guess = False
if predicted_index == -1:
is_best_guess = True
min_dist = float('inf')
for mapped_pos, mapped_label in label_map.items():
dist = math.sqrt((winner_neuron[0] - mapped_pos[0])**2 + (winner_neuron[1] - mapped_pos[1])**2)
if dist < min_dist:
min_dist = dist
predicted_index = mapped_label
if predicted_index != -1:
predicted_letter = CLASSES[predicted_index]
prediction_type = "Nearest Neighbor Guess" if is_best_guess else "Direct Match"
else:
predicted_letter = "Unknown"
prediction_type = "Critical Error: No mapped neurons found on the entire map."
response = {
"filename": file.filename,
"predicted_letter": predicted_letter,
"prediction_type": prediction_type,
"winner_neuron_on_map": [int(coord) for coord in winner_neuron]
}
print(f"[LOG] Prediction successful. Response: {response}")
return response |