Update app.py
Browse files
app.py
CHANGED
|
@@ -103,6 +103,8 @@ FOOD_DB = {
|
|
| 103 |
"egg": {"kcal":155,"carb_g":1.1,"protein_g":13, "fat_g":11, "sodium_mg":124, "cat":"豆魚蛋肉類", "base_g":60, "tip":"水煮/荷包少油,避免重鹹醬料"},
|
| 104 |
"banana":{"kcal":89, "carb_g":23, "protein_g":1.1,"fat_g":0.3,"sodium_mg":1, "cat":"水果類", "base_g":100, "tip":"控制份量,避免一次過量"},
|
| 105 |
"miso_soup":{"kcal":36,"carb_g":4.3,"protein_g":2.0,"fat_g":1.3,"sodium_mg":550, "cat":"湯品/飲品", "base_g":200, "tip":"味噌湯偏鹹,建議少量品嚐"},
|
|
|
|
|
|
|
| 106 |
}
|
| 107 |
|
| 108 |
ALIASES = {
|
|
@@ -137,22 +139,69 @@ def detect_foods_from_text(text: str) -> List[str]:
|
|
| 137 |
import re
|
| 138 |
DEFAULT_BASE_G = 100
|
| 139 |
STOPWORDS = {
|
|
|
|
| 140 |
"a","an","the","with","and","of","on","in","to","served","over","side","sides",
|
| 141 |
-
"
|
| 142 |
-
|
| 143 |
-
"sauce","soup","salad","topped","seasoned","style","japanese","taiwanese","korean","chinese",
|
| 144 |
"便當","套餐","一盤","一碗","配菜","附餐","湯","沙拉","醬","佐","搭配","附","拌","炒","滷","炸","烤","蒸","煮"
|
| 145 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
def extract_food_terms_free(text: str):
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
hits = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
for p in parts:
|
| 150 |
-
if not p:
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
hits.add(ALIASES.get(head, head))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
return list(hits)
|
| 157 |
|
| 158 |
def estimate_weight(name: str, plate_cm: int, portion: str) -> int:
|
|
|
|
| 103 |
"egg": {"kcal":155,"carb_g":1.1,"protein_g":13, "fat_g":11, "sodium_mg":124, "cat":"豆魚蛋肉類", "base_g":60, "tip":"水煮/荷包少油,避免重鹹醬料"},
|
| 104 |
"banana":{"kcal":89, "carb_g":23, "protein_g":1.1,"fat_g":0.3,"sodium_mg":1, "cat":"水果類", "base_g":100, "tip":"控制份量,避免一次過量"},
|
| 105 |
"miso_soup":{"kcal":36,"carb_g":4.3,"protein_g":2.0,"fat_g":1.3,"sodium_mg":550, "cat":"湯品/飲品", "base_g":200, "tip":"味噌湯偏鹹,建議少量品嚐"},
|
| 106 |
+
"salad": {"kcal":30,"carb_g":5,"protein_g":1.5,"fat_g":0.5,"sodium_mg":40,"cat":"蔬菜類","base_g":100,"tip":"少醬少油,優先清爽調味"},
|
| 107 |
+
"fish": {"kcal":170,"carb_g":0,"protein_g":22,"fat_g":8,"sodium_mg":70,"cat":"豆魚蛋肉類","base_g":120,"tip":"蒸/烤/煎少油,避免重鹹醬汁"},
|
| 108 |
}
|
| 109 |
|
| 110 |
ALIASES = {
|
|
|
|
| 139 |
import re
|
| 140 |
DEFAULT_BASE_G = 100
|
| 141 |
STOPWORDS = {
|
| 142 |
+
# 英文
|
| 143 |
"a","an","the","with","and","of","on","in","to","served","over","side","sides",
|
| 144 |
+
"set","dish","meal","mixed","assorted","fresh","hot","cold","topped","style","seasoned",
|
| 145 |
+
# 中文
|
|
|
|
| 146 |
"便當","套餐","一盤","一碗","配菜","附餐","湯","沙拉","醬","佐","搭配","附","拌","炒","滷","炸","烤","蒸","煮"
|
| 147 |
}
|
| 148 |
+
COLOR_WORDS = {"white","black","red","green","yellow","orange","brown","purple","pink","golden"}
|
| 149 |
+
UTENSILS = {"plate","bowl","tray","box","cup","glass","plateful","bento"}
|
| 150 |
+
ADJ_MISC = {"piece","slice","fillet","serving","topped","mixed","assorted"}
|
| 151 |
+
|
| 152 |
+
# 常見食物名詞(沒有就先列為候選)
|
| 153 |
+
FOOD_LIKE = {
|
| 154 |
+
"salad","fish","chicken","beef","pork","shrimp","tofu","egg",
|
| 155 |
+
"rice","noodles","bread","soup","vegetables","veggies","fruit"
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
import re
|
| 159 |
+
|
| 160 |
def extract_food_terms_free(text: str):
|
| 161 |
+
"""
|
| 162 |
+
從 caption 中抽食物詞(允許未知):
|
| 163 |
+
- 解析片語:piece/slice/fillet/serving of X → X
|
| 164 |
+
- 切片(逗號/and/with),去掉顏色、器皿、形容詞停用詞
|
| 165 |
+
- 取片尾名詞;若無,掃描整句抓常見食物名詞
|
| 166 |
+
- Alias → 主鍵;沒對到就保留原字(當未知)
|
| 167 |
+
"""
|
| 168 |
+
t = text.strip().lower()
|
| 169 |
hits = set()
|
| 170 |
+
|
| 171 |
+
# 1) 特例:「X of Y」→ 直接抓 Y
|
| 172 |
+
for pat in [r"(?:piece|slice|fillet|serving)\s+of\s+([a-z\u4e00-\u9fff]+)"]:
|
| 173 |
+
for m in re.findall(pat, t, flags=re.I):
|
| 174 |
+
y = m.strip()
|
| 175 |
+
if y in COLOR_WORDS or y in UTENSILS or y in ADJ_MISC or y in STOPWORDS:
|
| 176 |
+
continue
|
| 177 |
+
hits.add(ALIASES.get(y, y))
|
| 178 |
+
|
| 179 |
+
# 2) 片段切分(逗號、分號、and、with、換行)
|
| 180 |
+
parts = re.split(r"(?:,|;|\.|\band\b|\bwith\b|\n)+", t, flags=re.I)
|
| 181 |
for p in parts:
|
| 182 |
+
if not p:
|
| 183 |
+
continue
|
| 184 |
+
# 擷取英/中文字
|
| 185 |
+
toks = re.findall(r"[a-z\u4e00-\u9fff]+", p)
|
| 186 |
+
# 過濾顏色/器皿/形容詞/停用詞
|
| 187 |
+
toks = [
|
| 188 |
+
w for w in toks
|
| 189 |
+
if w not in COLOR_WORDS
|
| 190 |
+
and w not in UTENSILS
|
| 191 |
+
and w not in ADJ_MISC
|
| 192 |
+
and w not in STOPWORDS
|
| 193 |
+
and len(w) >= 2
|
| 194 |
+
]
|
| 195 |
+
if not toks:
|
| 196 |
+
continue
|
| 197 |
+
head = toks[-1] # 片尾通常是名詞,如 "salad"/"fish"
|
| 198 |
hits.add(ALIASES.get(head, head))
|
| 199 |
+
|
| 200 |
+
# 3) 萬一片段沒抓到,再從整句補常見食物名詞
|
| 201 |
+
for w in FOOD_LIKE:
|
| 202 |
+
if re.search(rf"\b{re.escape(w)}\b", t):
|
| 203 |
+
hits.add(ALIASES.get(w, w))
|
| 204 |
+
|
| 205 |
return list(hits)
|
| 206 |
|
| 207 |
def estimate_weight(name: str, plate_cm: int, portion: str) -> int:
|