Spaces:
Sleeping
Sleeping
# 檔案路徑: app/services/reference_detection_service.py | |
import logging | |
import numpy as np | |
from PIL import Image | |
from typing import Dict, Any, List, Optional, Tuple | |
from ultralytics import YOLO | |
import io | |
# 設置日誌 | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# 參考物尺寸表 (cm) | |
REFERENCE_OBJECTS = { | |
"plate": {"diameter": 24.0, "type": "circular"}, # 標準餐盤直徑 | |
"bowl": {"diameter": 15.0, "type": "circular"}, # 標準碗直徑 | |
"spoon": {"length": 15.0, "type": "linear"}, # 湯匙長度 | |
"fork": {"length": 20.0, "type": "linear"}, # 叉子長度 | |
"knife": {"length": 20.0, "type": "linear"}, # 刀子長度 | |
"coin": {"diameter": 2.4, "type": "circular"}, # 硬幣直徑 | |
"credit_card": {"length": 8.5, "width": 5.4, "type": "rectangular"}, # 信用卡 | |
"default": {"diameter": 24.0, "type": "circular"} # 預設參考物 | |
} | |
class ReferenceDetectionService: | |
def __init__(self): | |
"""初始化參考物偵測服務""" | |
self.yolo_model = None | |
self._load_model() | |
def _load_model(self): | |
"""載入 YOLO 模型""" | |
try: | |
logger.info("正在載入 YOLO 參考物偵測模型...") | |
# 使用 YOLOv8n 作為基礎模型 | |
self.yolo_model = YOLO("yolov8n.pt") | |
logger.info("YOLO 模型載入完成!") | |
except Exception as e: | |
logger.error(f"YOLO 模型載入失敗: {str(e)}") | |
raise | |
def detect_reference_objects(self, image: Image.Image) -> List[Dict[str, Any]]: | |
""" | |
使用 YOLO 偵測圖片中的參考物 | |
Args: | |
image: PIL Image 物件 | |
Returns: | |
List[Dict]: 包含參考物資訊的列表 | |
""" | |
try: | |
results = self.yolo_model(image) | |
reference_objects = [] | |
for result in results[0].boxes.data.tolist(): | |
x1, y1, x2, y2, conf, class_id = result | |
label = self.yolo_model.model.names[int(class_id)].lower() | |
# 只關注參考物類別 | |
if self._is_reference_object(label) and conf > 0.3: | |
reference_objects.append({ | |
"label": label, | |
"bbox": [x1, y1, x2, y2], | |
"confidence": conf, | |
"area": (x2 - x1) * (y2 - y1), # 像素面積 | |
"dimensions": self._get_reference_dimensions(label) | |
}) | |
# 按信心度排序,優先選擇高信心度的參考物 | |
reference_objects.sort(key=lambda x: x["confidence"], reverse=True) | |
logger.info(f"偵測到 {len(reference_objects)} 個參考物: {[obj['label'] for obj in reference_objects]}") | |
return reference_objects | |
except Exception as e: | |
logger.error(f"參考物偵測失敗: {str(e)}") | |
return [] | |
def _is_reference_object(self, label: str) -> bool: | |
"""判斷是否為參考物""" | |
reference_labels = [ | |
"plate", "bowl", "spoon", "fork", "knife", | |
"coin", "credit card", "card", "phone", "remote" | |
] | |
return any(ref_label in label for ref_label in reference_labels) | |
def _get_reference_dimensions(self, label: str) -> Dict[str, Any]: | |
"""取得參考物的實際尺寸""" | |
for ref_name, dimensions in REFERENCE_OBJECTS.items(): | |
if ref_name in label: | |
return dimensions | |
return REFERENCE_OBJECTS["default"] | |
def calculate_pixel_ratio(self, reference_object: Dict[str, Any]) -> float: | |
""" | |
根據參考物計算像素到實際距離的比例 | |
Args: | |
reference_object: 參考物資訊 | |
Returns: | |
float: 像素比例 (cm/pixel) | |
""" | |
try: | |
bbox = reference_object["bbox"] | |
dimensions = reference_object["dimensions"] | |
# 計算參考物在圖片中的像素尺寸 | |
pixel_width = bbox[2] - bbox[0] | |
pixel_height = bbox[3] - bbox[1] | |
if dimensions["type"] == "circular": | |
# 圓形參考物(如餐盤、碗、硬幣) | |
pixel_diameter = min(pixel_width, pixel_height) # 取較小值作為直徑 | |
actual_diameter = dimensions["diameter"] | |
pixel_ratio = actual_diameter / pixel_diameter | |
elif dimensions["type"] == "linear": | |
# 線性參考物(如餐具) | |
pixel_length = max(pixel_width, pixel_height) # 取較大值作為長度 | |
actual_length = dimensions["length"] | |
pixel_ratio = actual_length / pixel_length | |
elif dimensions["type"] == "rectangular": | |
# 矩形參考物(如信用卡) | |
pixel_length = max(pixel_width, pixel_height) | |
actual_length = dimensions["length"] | |
pixel_ratio = actual_length / pixel_length | |
else: | |
# 預設情況 | |
pixel_ratio = 0.01 # 100像素 = 1cm | |
logger.info(f"參考物 {reference_object['label']} 像素比例: {pixel_ratio:.4f} cm/pixel") | |
return pixel_ratio | |
except Exception as e: | |
logger.error(f"計算像素比例失敗: {str(e)}") | |
return 0.01 # 預設值 | |
def get_best_reference_object(self, reference_objects: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: | |
""" | |
從偵測到的參考物中選擇最佳的參考物 | |
Args: | |
reference_objects: 參考物列表 | |
Returns: | |
Optional[Dict]: 最佳參考物,如果沒有則返回 None | |
""" | |
if not reference_objects: | |
return None | |
# 優先選擇餐盤或碗,因為它們通常最穩定 | |
priority_objects = ["plate", "bowl"] | |
for obj in reference_objects: | |
if any(priority in obj["label"] for priority in priority_objects): | |
return obj | |
# 如果沒有優先參考物,選擇信心度最高的 | |
return reference_objects[0] | |
# 全域服務實例 | |
reference_service = ReferenceDetectionService() | |
def detect_reference_objects_from_image(image_bytes: bytes) -> Tuple[List[Dict[str, Any]], Optional[float]]: | |
""" | |
從圖片中偵測參考物並計算像素比例 | |
Args: | |
image_bytes: 圖片二進位數據 | |
Returns: | |
Tuple[List[Dict], Optional[float]]: (參考物列表, 像素比例) | |
""" | |
try: | |
image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
# 偵測參考物 | |
reference_objects = reference_service.detect_reference_objects(image) | |
# 選擇最佳參考物 | |
best_reference = reference_service.get_best_reference_object(reference_objects) | |
# 計算像素比例 | |
pixel_ratio = None | |
if best_reference: | |
pixel_ratio = reference_service.calculate_pixel_ratio(best_reference) | |
return reference_objects, pixel_ratio | |
except Exception as e: | |
logger.error(f"參考物偵測失敗: {str(e)}") | |
return [], None |