health-assistant / app /services /reference_detection_service.py
yuting111222's picture
Update health assistant minimal with new services and improvements
a608ddf
# 檔案路徑: app/services/reference_detection_service.py
import logging
import numpy as np
from PIL import Image
from typing import Dict, Any, List, Optional, Tuple
from ultralytics import YOLO
import io
# 設置日誌
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 參考物尺寸表 (cm)
REFERENCE_OBJECTS = {
"plate": {"diameter": 24.0, "type": "circular"}, # 標準餐盤直徑
"bowl": {"diameter": 15.0, "type": "circular"}, # 標準碗直徑
"spoon": {"length": 15.0, "type": "linear"}, # 湯匙長度
"fork": {"length": 20.0, "type": "linear"}, # 叉子長度
"knife": {"length": 20.0, "type": "linear"}, # 刀子長度
"coin": {"diameter": 2.4, "type": "circular"}, # 硬幣直徑
"credit_card": {"length": 8.5, "width": 5.4, "type": "rectangular"}, # 信用卡
"default": {"diameter": 24.0, "type": "circular"} # 預設參考物
}
class ReferenceDetectionService:
def __init__(self):
"""初始化參考物偵測服務"""
self.yolo_model = None
self._load_model()
def _load_model(self):
"""載入 YOLO 模型"""
try:
logger.info("正在載入 YOLO 參考物偵測模型...")
# 使用 YOLOv8n 作為基礎模型
self.yolo_model = YOLO("yolov8n.pt")
logger.info("YOLO 模型載入完成!")
except Exception as e:
logger.error(f"YOLO 模型載入失敗: {str(e)}")
raise
def detect_reference_objects(self, image: Image.Image) -> List[Dict[str, Any]]:
"""
使用 YOLO 偵測圖片中的參考物
Args:
image: PIL Image 物件
Returns:
List[Dict]: 包含參考物資訊的列表
"""
try:
results = self.yolo_model(image)
reference_objects = []
for result in results[0].boxes.data.tolist():
x1, y1, x2, y2, conf, class_id = result
label = self.yolo_model.model.names[int(class_id)].lower()
# 只關注參考物類別
if self._is_reference_object(label) and conf > 0.3:
reference_objects.append({
"label": label,
"bbox": [x1, y1, x2, y2],
"confidence": conf,
"area": (x2 - x1) * (y2 - y1), # 像素面積
"dimensions": self._get_reference_dimensions(label)
})
# 按信心度排序,優先選擇高信心度的參考物
reference_objects.sort(key=lambda x: x["confidence"], reverse=True)
logger.info(f"偵測到 {len(reference_objects)} 個參考物: {[obj['label'] for obj in reference_objects]}")
return reference_objects
except Exception as e:
logger.error(f"參考物偵測失敗: {str(e)}")
return []
def _is_reference_object(self, label: str) -> bool:
"""判斷是否為參考物"""
reference_labels = [
"plate", "bowl", "spoon", "fork", "knife",
"coin", "credit card", "card", "phone", "remote"
]
return any(ref_label in label for ref_label in reference_labels)
def _get_reference_dimensions(self, label: str) -> Dict[str, Any]:
"""取得參考物的實際尺寸"""
for ref_name, dimensions in REFERENCE_OBJECTS.items():
if ref_name in label:
return dimensions
return REFERENCE_OBJECTS["default"]
def calculate_pixel_ratio(self, reference_object: Dict[str, Any]) -> float:
"""
根據參考物計算像素到實際距離的比例
Args:
reference_object: 參考物資訊
Returns:
float: 像素比例 (cm/pixel)
"""
try:
bbox = reference_object["bbox"]
dimensions = reference_object["dimensions"]
# 計算參考物在圖片中的像素尺寸
pixel_width = bbox[2] - bbox[0]
pixel_height = bbox[3] - bbox[1]
if dimensions["type"] == "circular":
# 圓形參考物(如餐盤、碗、硬幣)
pixel_diameter = min(pixel_width, pixel_height) # 取較小值作為直徑
actual_diameter = dimensions["diameter"]
pixel_ratio = actual_diameter / pixel_diameter
elif dimensions["type"] == "linear":
# 線性參考物(如餐具)
pixel_length = max(pixel_width, pixel_height) # 取較大值作為長度
actual_length = dimensions["length"]
pixel_ratio = actual_length / pixel_length
elif dimensions["type"] == "rectangular":
# 矩形參考物(如信用卡)
pixel_length = max(pixel_width, pixel_height)
actual_length = dimensions["length"]
pixel_ratio = actual_length / pixel_length
else:
# 預設情況
pixel_ratio = 0.01 # 100像素 = 1cm
logger.info(f"參考物 {reference_object['label']} 像素比例: {pixel_ratio:.4f} cm/pixel")
return pixel_ratio
except Exception as e:
logger.error(f"計算像素比例失敗: {str(e)}")
return 0.01 # 預設值
def get_best_reference_object(self, reference_objects: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""
從偵測到的參考物中選擇最佳的參考物
Args:
reference_objects: 參考物列表
Returns:
Optional[Dict]: 最佳參考物,如果沒有則返回 None
"""
if not reference_objects:
return None
# 優先選擇餐盤或碗,因為它們通常最穩定
priority_objects = ["plate", "bowl"]
for obj in reference_objects:
if any(priority in obj["label"] for priority in priority_objects):
return obj
# 如果沒有優先參考物,選擇信心度最高的
return reference_objects[0]
# 全域服務實例
reference_service = ReferenceDetectionService()
def detect_reference_objects_from_image(image_bytes: bytes) -> Tuple[List[Dict[str, Any]], Optional[float]]:
"""
從圖片中偵測參考物並計算像素比例
Args:
image_bytes: 圖片二進位數據
Returns:
Tuple[List[Dict], Optional[float]]: (參考物列表, 像素比例)
"""
try:
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
# 偵測參考物
reference_objects = reference_service.detect_reference_objects(image)
# 選擇最佳參考物
best_reference = reference_service.get_best_reference_object(reference_objects)
# 計算像素比例
pixel_ratio = None
if best_reference:
pixel_ratio = reference_service.calculate_pixel_ratio(best_reference)
return reference_objects, pixel_ratio
except Exception as e:
logger.error(f"參考物偵測失敗: {str(e)}")
return [], None