# 檔案路徑: app/services/reference_detection_service.py import logging import numpy as np from PIL import Image from typing import Dict, Any, List, Optional, Tuple from ultralytics import YOLO import io # 設置日誌 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 參考物尺寸表 (cm) REFERENCE_OBJECTS = { "plate": {"diameter": 24.0, "type": "circular"}, # 標準餐盤直徑 "bowl": {"diameter": 15.0, "type": "circular"}, # 標準碗直徑 "spoon": {"length": 15.0, "type": "linear"}, # 湯匙長度 "fork": {"length": 20.0, "type": "linear"}, # 叉子長度 "knife": {"length": 20.0, "type": "linear"}, # 刀子長度 "coin": {"diameter": 2.4, "type": "circular"}, # 硬幣直徑 "credit_card": {"length": 8.5, "width": 5.4, "type": "rectangular"}, # 信用卡 "default": {"diameter": 24.0, "type": "circular"} # 預設參考物 } class ReferenceDetectionService: def __init__(self): """初始化參考物偵測服務""" self.yolo_model = None self._load_model() def _load_model(self): """載入 YOLO 模型""" try: logger.info("正在載入 YOLO 參考物偵測模型...") # 使用 YOLOv8n 作為基礎模型 self.yolo_model = YOLO("yolov8n.pt") logger.info("YOLO 模型載入完成!") except Exception as e: logger.error(f"YOLO 模型載入失敗: {str(e)}") raise def detect_reference_objects(self, image: Image.Image) -> List[Dict[str, Any]]: """ 使用 YOLO 偵測圖片中的參考物 Args: image: PIL Image 物件 Returns: List[Dict]: 包含參考物資訊的列表 """ try: results = self.yolo_model(image) reference_objects = [] for result in results[0].boxes.data.tolist(): x1, y1, x2, y2, conf, class_id = result label = self.yolo_model.model.names[int(class_id)].lower() # 只關注參考物類別 if self._is_reference_object(label) and conf > 0.3: reference_objects.append({ "label": label, "bbox": [x1, y1, x2, y2], "confidence": conf, "area": (x2 - x1) * (y2 - y1), # 像素面積 "dimensions": self._get_reference_dimensions(label) }) # 按信心度排序,優先選擇高信心度的參考物 reference_objects.sort(key=lambda x: x["confidence"], reverse=True) logger.info(f"偵測到 {len(reference_objects)} 個參考物: {[obj['label'] for obj in reference_objects]}") return reference_objects except Exception as e: logger.error(f"參考物偵測失敗: {str(e)}") return [] def _is_reference_object(self, label: str) -> bool: """判斷是否為參考物""" reference_labels = [ "plate", "bowl", "spoon", "fork", "knife", "coin", "credit card", "card", "phone", "remote" ] return any(ref_label in label for ref_label in reference_labels) def _get_reference_dimensions(self, label: str) -> Dict[str, Any]: """取得參考物的實際尺寸""" for ref_name, dimensions in REFERENCE_OBJECTS.items(): if ref_name in label: return dimensions return REFERENCE_OBJECTS["default"] def calculate_pixel_ratio(self, reference_object: Dict[str, Any]) -> float: """ 根據參考物計算像素到實際距離的比例 Args: reference_object: 參考物資訊 Returns: float: 像素比例 (cm/pixel) """ try: bbox = reference_object["bbox"] dimensions = reference_object["dimensions"] # 計算參考物在圖片中的像素尺寸 pixel_width = bbox[2] - bbox[0] pixel_height = bbox[3] - bbox[1] if dimensions["type"] == "circular": # 圓形參考物(如餐盤、碗、硬幣) pixel_diameter = min(pixel_width, pixel_height) # 取較小值作為直徑 actual_diameter = dimensions["diameter"] pixel_ratio = actual_diameter / pixel_diameter elif dimensions["type"] == "linear": # 線性參考物(如餐具) pixel_length = max(pixel_width, pixel_height) # 取較大值作為長度 actual_length = dimensions["length"] pixel_ratio = actual_length / pixel_length elif dimensions["type"] == "rectangular": # 矩形參考物(如信用卡) pixel_length = max(pixel_width, pixel_height) actual_length = dimensions["length"] pixel_ratio = actual_length / pixel_length else: # 預設情況 pixel_ratio = 0.01 # 100像素 = 1cm logger.info(f"參考物 {reference_object['label']} 像素比例: {pixel_ratio:.4f} cm/pixel") return pixel_ratio except Exception as e: logger.error(f"計算像素比例失敗: {str(e)}") return 0.01 # 預設值 def get_best_reference_object(self, reference_objects: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """ 從偵測到的參考物中選擇最佳的參考物 Args: reference_objects: 參考物列表 Returns: Optional[Dict]: 最佳參考物,如果沒有則返回 None """ if not reference_objects: return None # 優先選擇餐盤或碗,因為它們通常最穩定 priority_objects = ["plate", "bowl"] for obj in reference_objects: if any(priority in obj["label"] for priority in priority_objects): return obj # 如果沒有優先參考物,選擇信心度最高的 return reference_objects[0] # 全域服務實例 reference_service = ReferenceDetectionService() def detect_reference_objects_from_image(image_bytes: bytes) -> Tuple[List[Dict[str, Any]], Optional[float]]: """ 從圖片中偵測參考物並計算像素比例 Args: image_bytes: 圖片二進位數據 Returns: Tuple[List[Dict], Optional[float]]: (參考物列表, 像素比例) """ try: image = Image.open(io.BytesIO(image_bytes)).convert("RGB") # 偵測參考物 reference_objects = reference_service.detect_reference_objects(image) # 選擇最佳參考物 best_reference = reference_service.get_best_reference_object(reference_objects) # 計算像素比例 pixel_ratio = None if best_reference: pixel_ratio = reference_service.calculate_pixel_ratio(best_reference) return reference_objects, pixel_ratio except Exception as e: logger.error(f"參考物偵測失敗: {str(e)}") return [], None