File size: 16,053 Bytes
9684df5
 
 
 
 
 
 
 
a608ddf
9684df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a608ddf
 
 
 
9684df5
 
 
 
 
 
 
a608ddf
 
9684df5
 
a608ddf
 
 
 
 
 
 
 
 
 
9684df5
a608ddf
9684df5
a608ddf
 
9684df5
a608ddf
 
 
 
9684df5
a608ddf
 
9684df5
 
 
 
 
a608ddf
9684df5
 
 
 
 
a608ddf
 
 
9684df5
 
 
a608ddf
9684df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a608ddf
9684df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a608ddf
9684df5
 
 
 
a608ddf
 
 
 
9684df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a608ddf
9684df5
 
 
 
 
 
 
 
 
a608ddf
9684df5
a608ddf
9684df5
a608ddf
9684df5
a608ddf
 
 
 
 
 
 
9684df5
a608ddf
9684df5
a608ddf
 
9684df5
a608ddf
 
 
 
 
 
 
 
9684df5
a608ddf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9684df5
a608ddf
 
 
9684df5
a608ddf
 
9684df5
a608ddf
 
 
9684df5
a608ddf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9684df5
a608ddf
9684df5
a608ddf
9684df5
a608ddf
 
 
 
 
9684df5
 
a608ddf
 
 
 
 
 
 
 
 
 
 
 
 
9684df5
 
a608ddf
 
 
 
 
 
9684df5
a608ddf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# 檔案路徑: backend/app/services/weight_estimation_service.py

import logging
import numpy as np
from PIL import Image
import io
from typing import Dict, Any, List, Optional, Tuple
import torch
from ultralytics import YOLO

# 設置日誌
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 食物密度表 (g/cm³) - 常見食物的平均密度
FOOD_DENSITY_TABLE = {
    "rice": 0.8,           # 米飯
    "fried_rice": 0.7,     # 炒飯
    "noodles": 0.6,        # 麵條
    "bread": 0.3,          # 麵包
    "meat": 1.0,           # 肉類
    "fish": 1.1,           # 魚類
    "vegetables": 0.4,     # 蔬菜
    "fruits": 0.8,         # 水果
    "soup": 1.0,           # 湯類
    "default": 0.8         # 預設密度
}

# 參考物尺寸表 (cm)
REFERENCE_OBJECTS = {
    "plate": {"diameter": 24.0},      # 標準餐盤直徑
    "bowl": {"diameter": 15.0},       # 標準碗直徑
    "spoon": {"length": 15.0},        # 湯匙長度
    "fork": {"length": 20.0},         # 叉子長度
    "default": {"diameter": 24.0}     # 預設參考物
}

class WeightEstimationService:
    def __init__(self):
        """初始化重量估算服務"""
        self.sam_model = None
        self.dpt_model = None
        self.detection_model = None
        self._load_models()
    
    def _load_models(self):
        """載入所需的 AI 模型"""
        try:
            # 載入 SAM 分割模型
            from transformers import SamModel, SamProcessor
            logger.info("正在載入 SAM 分割模型...")
            self.sam_model = SamModel.from_pretrained("facebook/sam-vit-base")
            self.sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
            
            # 載入 DPT 深度估計模型
            from transformers import pipeline
            logger.info("正在載入 DPT 深度估計模型...")
            self.dpt_model = pipeline("depth-estimation", model="Intel/dpt-large")

            # 載入 YOLOv8 物件偵測模型(用於偵測參考物)
            logger.info("正在載入 YOLOv8 物件偵測模型...")
            self.detection_model = YOLO("yolov8n.pt")  # 你可以改成 yolov5s.pt 或自訂模型
            
            logger.info("所有模型載入完成!")
            
        except Exception as e:
            logger.error(f"模型載入失敗: {str(e)}")
            raise
    
    def detect_objects(self, image: Image.Image) -> List[Dict[str, Any]]:
        """使用 YOLOv8 偵測圖片中的所有物體"""
        try:
            results = self.detection_model(image)
            detected_objects = []
            for result in results[0].boxes.data.tolist():
                x1, y1, x2, y2, conf, class_id = result
                label = self.detection_model.model.names[int(class_id)].lower()
                # 我們對所有高信度的物體都感興趣,除了明確的餐具
                if conf > 0.4 and label not in ["spoon", "fork", "knife", "scissors"]:
                    detected_objects.append({
                        "label": label,
                        "bbox": [x1, y1, x2, y2],
                        "confidence": conf
                    })
            return detected_objects
        except Exception as e:
            logger.warning(f"物件偵測失敗: {str(e)}")
            return []
    
    def segment_food(self, image: Image.Image, input_boxes: List[List[float]]) -> List[np.ndarray]:
        """使用 SAM 根據提供的邊界框分割食物區域"""
        if not input_boxes:
            return []
        try:
            # 使用 SAM 進行分割,並提供邊界框作為提示
            inputs = self.sam_processor(image, input_boxes=[input_boxes], return_tensors="pt")
            
            with torch.no_grad():
                outputs = self.sam_model(**inputs)
            
            # 取得分割遮罩
            masks_tensor = self.sam_processor.image_processor.post_process_masks(
                outputs.pred_masks.sigmoid(), 
                inputs["original_sizes"], 
                inputs["reshaped_input_sizes"]
            )[0]
            
            # 將 Tensor 轉換為 list of numpy arrays
            masks = [m.squeeze().cpu().numpy().astype(bool) for m in masks_tensor]
            return masks
            
        except Exception as e:
            logger.error(f"食物分割失敗: {str(e)}")
            return []
    
    def estimate_depth(self, image: Image.Image) -> np.ndarray:
        """使用 DPT 進行深度估計"""
        try:
            # 使用 DPT 進行深度估計
            depth_result = self.dpt_model(image)
            depth_map = depth_result["depth"]
            
            return np.array(depth_map)
            
        except Exception as e:
            logger.error(f"深度估計失敗: {str(e)}")
            # 回傳一個預設的深度圖
            return np.ones((image.height, image.width))
    
    def calculate_volume_and_weight(self, 
                                  mask: np.ndarray, 
                                  depth_map: np.ndarray, 
                                  food_type: str,
                                  reference_object: Optional[Dict[str, Any]] = None) -> Tuple[float, float, float]:
        """計算體積和重量"""
        try:
            # 計算食物區域的像素數量
            food_pixels = np.sum(mask)
            
            # 計算食物區域的平均深度
            food_depth = np.mean(depth_map[mask])
            
            # 估算體積(相對體積)
            relative_volume = food_pixels * food_depth
            
            # 如果有參考物,進行尺寸校正
            if reference_object:
                ref_type = reference_object["label"] # Changed from "type" to "label"
                if ref_type in REFERENCE_OBJECTS:
                    ref_size = REFERENCE_OBJECTS[ref_type]
                    # 根據參考物尺寸校正體積
                    if "diameter" in ref_size:
                        # 圓形參考物(如餐盤)
                        pixel_to_cm_ratio = ref_size["diameter"] / np.sqrt(food_pixels / np.pi)
                    else:
                        # 線性參考物(如餐具)
                        pixel_to_cm_ratio = ref_size["length"] / np.sqrt(food_pixels)
                    
                    # 校正體積
                    actual_volume = relative_volume * (pixel_to_cm_ratio ** 3)
                    confidence = 0.85  # 有參考物時信心度較高
                    error_range = 0.15  # ±15% 誤差
                else:
                    actual_volume = relative_volume * 0.1  # 預設校正係數
                    confidence = 0.6
                    error_range = 0.3
            else:
                # 無參考物,使用預設值
                actual_volume = relative_volume * 0.1  # 預設校正係數
                confidence = 0.5  # 無參考物時信心度較低
                error_range = 0.4  # ±40% 誤差
            
            # 根據食物類型取得密度
            density = self.get_food_density(food_type)
            
            # 計算重量 (g)
            weight = actual_volume * density
            
            # 對單一物件的重量做一個合理性檢查
            if weight > 1500: # > 1.5kg
                logger.warning(f"單一物件預估重量 {weight:.2f}g 過高,可能不準確。")

            return weight, confidence, error_range
            
        except Exception as e:
            logger.error(f"體積重量計算失敗: {str(e)}")
            return 150.0, 0.3, 0.5  # 預設值
    
    def get_food_density(self, food_name: str) -> float:
        """根據食物名稱取得密度"""
        food_name_lower = food_name.lower()
        
        # 簡單的關鍵字匹配
        if any(keyword in food_name_lower for keyword in ["rice", "飯"]):
            return FOOD_DENSITY_TABLE["rice"]
        elif any(keyword in food_name_lower for keyword in ["noodle", "麵"]):
            return FOOD_DENSITY_TABLE["noodles"]
        elif any(keyword in food_name_lower for keyword in ["meat", "肉", "chicken", "pork", "beef", "lamb"]):
            return FOOD_DENSITY_TABLE["meat"]
        elif any(keyword in food_name_lower for keyword in ["vegetable", "菜"]):
            return FOOD_DENSITY_TABLE["vegetables"]
        else:
            return FOOD_DENSITY_TABLE["default"]

# 全域服務實例
weight_service = WeightEstimationService()

async def estimate_food_weight(image_bytes: bytes, debug: bool = False) -> Dict[str, Any]:
    """
    整合食物辨識、重量估算與營養分析的主函數 (YOLO + SAM 引導模式)
    """
    debug_dir = None
    try:
        if debug:
            import os
            from datetime import datetime
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
            debug_dir = os.path.join("debug_output", timestamp)
            os.makedirs(debug_dir, exist_ok=True)
            
        # 將 bytes 轉換為 PIL Image
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        
        if debug:
            image.save(os.path.join(debug_dir, "00_original.jpg"))
        
        # 1. 物件偵測 (YOLO),取得所有物件的邊界框
        all_objects = weight_service.detect_objects(image)

        if not all_objects:
            note = "無法從圖片中偵測到任何物體。"
            result = {"detected_foods": [], "total_estimated_weight": 0, "total_nutrition": {}, "note": note}
            if debug: result["debug_output_path"] = debug_dir
            return result
        
        if debug:
            from PIL import ImageDraw
            debug_image = image.copy()
            draw = ImageDraw.Draw(debug_image)
            for obj in all_objects:
                bbox = obj.get("bbox")
                label = obj.get("label", "unknown")
                draw.rectangle(bbox, outline="red", width=3)
                draw.text((bbox[0], bbox[1]), label, fill="red")
            debug_image.save(os.path.join(debug_dir, "01_detected_objects.jpg"))
            
        # 2. 尋找參考物 (如餐盤、碗)
        reference_objects = [obj for obj in all_objects if obj["label"] in ["plate", "bowl"]]
        reference_object = max(reference_objects, key=lambda x: x["confidence"]) if reference_objects else None

        # 3. 深度估計 (DPT),只需執行一次
        depth_map = weight_service.estimate_depth(image)
        if debug:
            depth_for_save = (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map) + 1e-6) * 255.0
            Image.fromarray(depth_for_save.astype(np.uint8)).convert("L").save(os.path.join(debug_dir, "03_depth_map.png"))
        
        # 載入相關服務
        from .ai_service import classify_food_image
        from .nutrition_api_service import fetch_nutrition_data

        detected_foods = []
        total_nutrition = {"calories": 0, "protein": 0, "carbs": 0, "fat": 0, "fiber": 0}
        
        # 4. 遍歷每個偵測到的物件 (YOLO Box)
        food_objects = [obj for obj in all_objects if obj["label"] not in ["plate", "bowl"]]

        for i, food_obj in enumerate(food_objects):
            try:
                # a. 使用物件的邊界框提示 SAM 進行精準分割
                input_box = [food_obj["bbox"]]
                masks = weight_service.segment_food(image, input_boxes=input_box)
                if not masks: continue
                
                # SAM 對於一個 prompt 可能回傳多個 mask,我們選最大的一個
                mask = max(masks, key=lambda m: np.sum(m))
                
                # b. 根據遮罩裁切出單一食物的圖片 (辨識用)
                # (此部分邏輯與先前版本相同)
                rows, cols = np.any(mask, axis=1), np.any(mask, axis=0)
                if not np.any(rows) or not np.any(cols): continue
                rmin, rmax = np.where(rows)[0][[0, -1]]
                cmin, cmax = np.where(cols)[0][[0, -1]]
                item_array = np.array(image); item_rgba = np.zeros((*item_array.shape[:2], 4), dtype=np.uint8)
                item_rgba[:,:,:3] = item_array; item_rgba[:,:,3] = mask * 255
                cropped_pil = Image.fromarray(item_rgba[rmin:rmax+1, cmin:cmax+1, :], 'RGBA')
                buffer = io.BytesIO(); cropped_pil.save(buffer, format="PNG"); item_image_bytes = buffer.getvalue()
                if debug:
                    cropped_pil.save(os.path.join(debug_dir, f"item_{i}_{food_obj['label']}_cropped.png"))

                # c. 辨識食物種類 (使用更精準的食物辨識模型)
                food_name = classify_food_image(item_image_bytes)
                
                # d. 計算體積和重量
                weight, confidence, error_range = weight_service.calculate_volume_and_weight(
                    mask, depth_map, food_name, reference_object
                )
                
                # e. 查詢營養資訊
                nutrition_info = fetch_nutrition_data(food_name)
                if nutrition_info is None:
                    nutrition_info = {"calories": 0, "protein": 0, "carbs": 0, "fat": 0, "fiber": 0}

                # f. 根據重量調整營養素
                weight_ratio = weight / 100
                adjusted_nutrition = {k: v * weight_ratio for k, v in nutrition_info.items()}
                
                # g. 累加總營養
                for key in total_nutrition: total_nutrition[key] += adjusted_nutrition.get(key, 0)

                # h. 儲存單項食物結果
                detected_foods.append({
                    "food_name": food_name,
                    "estimated_weight": round(weight, 1),
                    "nutrition": {k: round(v, 1) for k, v in adjusted_nutrition.items()}
                })
            except Exception as item_e:
                logger.error(f"處理物件 '{food_obj['label']}' 時失敗: {str(item_e)}")
                continue

        # 5. 生成備註
        note = f"已使用 YOLO+SAM 模型成功分析 {len(detected_foods)} 項食物。"
        if reference_object:
            note += f" 檢測到參考物:{reference_object['label']},準確度較高。"
        else:
            note += " 未檢測到參考物,重量為估算值,結果僅供參考。"
        
        result = {
            "detected_foods": detected_foods,
            "total_estimated_weight": round(sum(item['estimated_weight'] for item in detected_foods), 1),
            "total_nutrition": {k: round(v, 1) for k, v in total_nutrition.items()},
            "reference_object": reference_object["label"] if reference_object else None,
            "note": note
        }
        if debug:
            # 儲存最終分割圖
            overlay_img = image.copy()
            overlay_array = np.array(overlay_img)
            # Find all masks again to draw
            all_food_boxes = [obj['bbox'] for obj in food_objects]
            all_masks = weight_service.segment_food(image, input_boxes=all_food_boxes)
            for mask in all_masks:
                color = np.random.randint(0, 255, size=3, dtype=np.uint8)
                overlay_array[mask] = (overlay_array[mask] * 0.5 + color * 0.5).astype(np.uint8)
            Image.fromarray(overlay_array).save(os.path.join(debug_dir, "02_final_segmentation.jpg"))
            result["debug_output_path"] = debug_dir
        return result
        
    except Exception as e:
        logger.error(f"多食物重量估算主流程失敗: {str(e)}")
        # 回傳包含錯誤訊息的標準結構
        result = {
            "detected_foods": [],
            "total_estimated_weight": 0,
            "total_nutrition": {},
            "reference_object": None,
            "note": f"分析失敗: {str(e)}"
        }
        if debug and debug_dir:
            result["debug_output_path"] = debug_dir
        return result