Spaces:
Running
on
Zero
Running
on
Zero
Upload 5 files
Browse filesfixed format ("indoor, ") issues
- llm_enhancer.py +16 -5
- llm_model_manager.py +44 -25
- object_description_generator.py +41 -40
- response_processor.py +105 -31
llm_enhancer.py
CHANGED
@@ -126,6 +126,18 @@ class LLMEnhancer:
|
|
126 |
# 10. 移除解釋性注釋
|
127 |
cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# 11. 事實準確性驗證
|
130 |
try:
|
131 |
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
@@ -142,12 +154,9 @@ class LLMEnhancer:
|
|
142 |
cleaned_response, scene_type, original_desc
|
143 |
)
|
144 |
|
145 |
-
#
|
146 |
-
perspective = self.quality_validator.extract_perspective_from_description(original_desc)
|
147 |
-
if perspective and perspective.lower() not in cleaned_response.lower():
|
148 |
-
cleaned_response = f"{perspective}, {cleaned_response[0].lower()}{cleaned_response[1:]}"
|
149 |
|
150 |
-
# 13.
|
151 |
identical_final_cleanup = [
|
152 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
153 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
@@ -157,6 +166,7 @@ class LLMEnhancer:
|
|
157 |
|
158 |
for pattern, replacement in identical_final_cleanup:
|
159 |
cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
|
|
|
160 |
|
161 |
# 14. 最終驗證:如果結果過短,嘗試fallback
|
162 |
final_result = cleaned_response.strip()
|
@@ -183,6 +193,7 @@ class LLMEnhancer:
|
|
183 |
|
184 |
# 15. display enhanced description
|
185 |
self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
|
|
|
186 |
return final_result
|
187 |
|
188 |
except Exception as e:
|
|
|
126 |
# 10. 移除解釋性注釋
|
127 |
cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
|
128 |
|
129 |
+
# self.logger.info(f"DEBUG: Before factual verification: {cleaned_response[:50]}...")
|
130 |
+
|
131 |
+
# 10.5 事實準確性驗證
|
132 |
+
try:
|
133 |
+
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
134 |
+
original_desc, cleaned_response, object_list
|
135 |
+
)
|
136 |
+
except Exception:
|
137 |
+
self.logger.warning("Fact verification failed; using response without verification")
|
138 |
+
|
139 |
+
# self.logger.info(f"DEBUG: After factual verification: {cleaned_response[:50]}...")
|
140 |
+
|
141 |
# 11. 事實準確性驗證
|
142 |
try:
|
143 |
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
|
|
154 |
cleaned_response, scene_type, original_desc
|
155 |
)
|
156 |
|
157 |
+
# print(f"DEBUG: After scene type consistency: {cleaned_response[:50]}...")
|
|
|
|
|
|
|
158 |
|
159 |
+
# 13. 最終的 identical 詞彙清理(確保LLM輸出不包含重複性描述)
|
160 |
identical_final_cleanup = [
|
161 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
162 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
|
|
166 |
|
167 |
for pattern, replacement in identical_final_cleanup:
|
168 |
cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
|
169 |
+
# print(f"DEBUG: After identical cleanup: {cleaned_response[:50]}...")
|
170 |
|
171 |
# 14. 最終驗證:如果結果過短,嘗試fallback
|
172 |
final_result = cleaned_response.strip()
|
|
|
193 |
|
194 |
# 15. display enhanced description
|
195 |
self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
|
196 |
+
# print(f"DEBUG: LLMEnhancer final_result before return: {final_result[:50]}..." if final_result else "DEBUG: LLMEnhancer final_result is empty")
|
197 |
return final_result
|
198 |
|
199 |
except Exception as e:
|
llm_model_manager.py
CHANGED
@@ -173,19 +173,6 @@ class LLMModelManager:
|
|
173 |
self.logger.debug("GPU cache cleared")
|
174 |
|
175 |
def generate_response(self, prompt: str, **generation_kwargs) -> str:
|
176 |
-
"""
|
177 |
-
生成LLM回應
|
178 |
-
|
179 |
-
Args:
|
180 |
-
prompt: 輸入提示詞
|
181 |
-
**generation_kwargs: 額外的生成參數,可覆蓋預設值
|
182 |
-
|
183 |
-
Returns:
|
184 |
-
str: 生成的回應文本
|
185 |
-
|
186 |
-
Raises:
|
187 |
-
ModelGenerationError: 當生成失敗時
|
188 |
-
"""
|
189 |
# 確保模型已載入
|
190 |
if not self._model_loaded:
|
191 |
self._load_model()
|
@@ -194,6 +181,10 @@ class LLMModelManager:
|
|
194 |
self.call_count += 1
|
195 |
self.logger.info(f"Generating response (call #{self.call_count})")
|
196 |
|
|
|
|
|
|
|
|
|
197 |
# clean GPU
|
198 |
self._clear_gpu_cache()
|
199 |
|
@@ -216,14 +207,21 @@ class LLMModelManager:
|
|
216 |
"use_cache": True,
|
217 |
})
|
218 |
|
219 |
-
#
|
220 |
with torch.no_grad():
|
221 |
outputs = self.model.generate(inputs.input_ids, **generation_params)
|
222 |
|
223 |
# 解碼回應
|
224 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
225 |
response = self._extract_generated_response(full_response, prompt)
|
226 |
|
|
|
|
|
|
|
227 |
if not response or len(response.strip()) < 10:
|
228 |
raise ModelGenerationError("Generated response is too short or empty")
|
229 |
|
@@ -281,13 +279,6 @@ class LLMModelManager:
|
|
281 |
def _extract_generated_response(self, full_response: str, prompt: str) -> str:
|
282 |
"""
|
283 |
從完整回應中提取生成的部分
|
284 |
-
|
285 |
-
Args:
|
286 |
-
full_response: 模型的完整輸出
|
287 |
-
prompt: 原始提示詞
|
288 |
-
|
289 |
-
Returns:
|
290 |
-
str: 提取的生成回應
|
291 |
"""
|
292 |
# 尋找assistant標記
|
293 |
assistant_tag = "<|assistant|>"
|
@@ -298,14 +289,42 @@ class LLMModelManager:
|
|
298 |
user_tag = "<|user|>"
|
299 |
if user_tag in response:
|
300 |
response = response.split(user_tag)[0].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
return response
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
|
308 |
-
return
|
309 |
|
310 |
def reset_context(self):
|
311 |
"""重置模型上下文,清理GPU緩存"""
|
|
|
173 |
self.logger.debug("GPU cache cleared")
|
174 |
|
175 |
def generate_response(self, prompt: str, **generation_kwargs) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
# 確保模型已載入
|
177 |
if not self._model_loaded:
|
178 |
self._load_model()
|
|
|
181 |
self.call_count += 1
|
182 |
self.logger.info(f"Generating response (call #{self.call_count})")
|
183 |
|
184 |
+
# # record input prompt
|
185 |
+
# self.logger.info(f"DEBUG: Input prompt length: {len(prompt)}")
|
186 |
+
# self.logger.info(f"DEBUG: Input prompt preview: {prompt[:200]}...")
|
187 |
+
|
188 |
# clean GPU
|
189 |
self._clear_gpu_cache()
|
190 |
|
|
|
207 |
"use_cache": True,
|
208 |
})
|
209 |
|
210 |
+
# response
|
211 |
with torch.no_grad():
|
212 |
outputs = self.model.generate(inputs.input_ids, **generation_params)
|
213 |
|
214 |
# 解碼回應
|
215 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
216 |
+
|
217 |
+
# # record whole response
|
218 |
+
# self.logger.info(f"DEBUG: Full LLM response: {full_response}")
|
219 |
+
|
220 |
response = self._extract_generated_response(full_response, prompt)
|
221 |
|
222 |
+
# # 記錄提取後的回應
|
223 |
+
# self.logger.info(f"DEBUG: Extracted response: {response}")
|
224 |
+
|
225 |
if not response or len(response.strip()) < 10:
|
226 |
raise ModelGenerationError("Generated response is too short or empty")
|
227 |
|
|
|
279 |
def _extract_generated_response(self, full_response: str, prompt: str) -> str:
|
280 |
"""
|
281 |
從完整回應中提取生成的部分
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
"""
|
283 |
# 尋找assistant標記
|
284 |
assistant_tag = "<|assistant|>"
|
|
|
289 |
user_tag = "<|user|>"
|
290 |
if user_tag in response:
|
291 |
response = response.split(user_tag)[0].strip()
|
292 |
+
else:
|
293 |
+
# 移除輸入提示詞
|
294 |
+
if full_response.startswith(prompt):
|
295 |
+
response = full_response[len(prompt):].strip()
|
296 |
+
else:
|
297 |
+
response = full_response.strip()
|
298 |
+
|
299 |
+
# 移除不自然的場景類型前綴
|
300 |
+
response = self._remove_scene_type_prefixes(response)
|
301 |
+
|
302 |
+
return response
|
303 |
+
|
304 |
+
def _remove_scene_type_prefixes(self, response: str) -> str:
|
305 |
+
"""
|
306 |
+
移除LLM生成回應中的場景類型前綴
|
307 |
|
308 |
+
Args:
|
309 |
+
response: 原始LLM回應
|
310 |
+
|
311 |
+
Returns:
|
312 |
+
str: 移除前綴後的回應
|
313 |
+
"""
|
314 |
+
if not response:
|
315 |
return response
|
316 |
|
317 |
+
prefix_patterns = [r'^[A-Za-z]+\,\s*']
|
318 |
+
|
319 |
+
# 應用清理模式
|
320 |
+
for pattern in prefix_patterns:
|
321 |
+
response = re.sub(pattern, '', response, flags=re.IGNORECASE)
|
322 |
+
|
323 |
+
# 確保首字母大寫
|
324 |
+
if response and response[0].islower():
|
325 |
+
response = response[0].upper() + response[1:]
|
326 |
|
327 |
+
return response.strip()
|
328 |
|
329 |
def reset_context(self):
|
330 |
"""重置模型上下文,清理GPU緩存"""
|
object_description_generator.py
CHANGED
@@ -389,7 +389,7 @@ class ObjectDescriptionGenerator:
|
|
389 |
def optimize_object_description(self, description: str) -> str:
|
390 |
"""
|
391 |
優化物件描述文本,消除冗餘重複並改善表達流暢度
|
392 |
-
|
393 |
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
394 |
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
395 |
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
@@ -402,31 +402,31 @@ class ObjectDescriptionGenerator:
|
|
402 |
"""
|
403 |
try:
|
404 |
import re
|
405 |
-
|
406 |
-
# 1.
|
407 |
# 使用通用模式來識別和移除不必要的空間描述
|
408 |
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
409 |
description = self._remove_redundant_spatial_qualifiers(description)
|
410 |
|
411 |
-
# 2.
|
412 |
-
# 尋找形如 "with X, Y, Z" 或 "with X and Y"
|
413 |
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
414 |
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
415 |
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
416 |
-
|
417 |
# 遍歷每個找到的物件列表進行重複檢測和優化
|
418 |
for obj_list in object_lists:
|
419 |
-
# 3. 解析單個物件列表中的項目
|
420 |
# 使用更精確的正則表達式來分割物件項目
|
421 |
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
422 |
# 需要特別注意處理最後一個 "and" 的情況
|
423 |
-
|
424 |
# 先處理逗號格式 "A, B, and C"
|
425 |
if ", and " in obj_list:
|
426 |
# 分割 ", and " 前後的部分
|
427 |
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
428 |
last_item = obj_list.rsplit(", and ", 1)[1]
|
429 |
-
|
430 |
# 處理前面的項目(用逗號分割)
|
431 |
front_items = [item.strip() for item in before_last_and.split(",")]
|
432 |
# 添加最後一個項目
|
@@ -437,11 +437,11 @@ class ObjectDescriptionGenerator:
|
|
437 |
else:
|
438 |
# 處理純逗號分隔的列表
|
439 |
all_items = [item.strip() for item in obj_list.split(",")]
|
440 |
-
|
441 |
-
# 4. 統計物件出現頻率
|
442 |
# 建立字典來記錄每個物件的出現次數
|
443 |
item_counts = {}
|
444 |
-
|
445 |
for item in all_items:
|
446 |
# 清理項目文字並過濾無效內容
|
447 |
item = item.strip()
|
@@ -453,11 +453,11 @@ class ObjectDescriptionGenerator:
|
|
453 |
if clean_item not in item_counts:
|
454 |
item_counts[clean_item] = 0
|
455 |
item_counts[clean_item] += 1
|
456 |
-
|
457 |
-
# 5. 生成優化後的物件列表
|
458 |
if item_counts:
|
459 |
new_items = []
|
460 |
-
|
461 |
for item, count in item_counts.items():
|
462 |
if count > 1:
|
463 |
# 對於重複項目,使用數字加複數形式
|
@@ -466,8 +466,8 @@ class ObjectDescriptionGenerator:
|
|
466 |
else:
|
467 |
# 單個項目保持原樣
|
468 |
new_items.append(item)
|
469 |
-
|
470 |
-
# 6. 重新格式化物件列表
|
471 |
# 使用標準的英文列表連接格式
|
472 |
if len(new_items) == 1:
|
473 |
new_list = new_items[0]
|
@@ -476,13 +476,13 @@ class ObjectDescriptionGenerator:
|
|
476 |
else:
|
477 |
# 使用逗號格式確保清晰度
|
478 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
479 |
-
|
480 |
# 7. 在原文中替換優化後的列表
|
481 |
-
#
|
482 |
description = description.replace(obj_list, new_list)
|
483 |
-
|
484 |
return description
|
485 |
-
|
486 |
except Exception as e:
|
487 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
488 |
return description
|
@@ -490,19 +490,19 @@ class ObjectDescriptionGenerator:
|
|
490 |
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
491 |
"""
|
492 |
移除描述中冗餘的空間限定詞
|
493 |
-
|
494 |
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
495 |
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
496 |
床這個物件本身就是室內環境。
|
497 |
-
|
498 |
Args:
|
499 |
description: 包含可能多餘空間描述的文本
|
500 |
-
|
501 |
Returns:
|
502 |
str: 移除多餘空間限定詞後的文本
|
503 |
"""
|
504 |
import re
|
505 |
-
|
506 |
# 定義常見的多餘空間表達模式
|
507 |
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
508 |
redundant_patterns = [
|
@@ -515,23 +515,23 @@ class ObjectDescriptionGenerator:
|
|
515 |
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
516 |
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
517 |
]
|
518 |
-
|
519 |
for pattern, replacement in redundant_patterns:
|
520 |
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
521 |
-
|
522 |
return description
|
523 |
|
524 |
|
525 |
def _normalize_item_for_counting(self, item: str) -> str:
|
526 |
"""
|
527 |
正規化物件項目以便準確計數
|
528 |
-
|
529 |
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
530 |
確保 "a car" 和 "car" 被視為同一物件類型。
|
531 |
-
|
532 |
Args:
|
533 |
item: 原始物件項目字串
|
534 |
-
|
535 |
Returns:
|
536 |
str: 正規化後的物件項目
|
537 |
"""
|
@@ -542,10 +542,10 @@ class ObjectDescriptionGenerator:
|
|
542 |
def _make_plural(self, item: str) -> str:
|
543 |
"""
|
544 |
將單數名詞轉換為複數形式
|
545 |
-
|
546 |
Args:
|
547 |
item: 單數形式的名詞
|
548 |
-
|
549 |
Returns:
|
550 |
str: 複數形式的名詞
|
551 |
"""
|
@@ -589,22 +589,23 @@ class ObjectDescriptionGenerator:
|
|
589 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
590 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
591 |
|
592 |
-
# 1.
|
593 |
ambiance_parts = []
|
594 |
if lighting_info:
|
595 |
time_of_day = lighting_info.get("time_of_day", "unknown lighting")
|
596 |
is_indoor = lighting_info.get("is_indoor")
|
597 |
-
|
|
|
|
|
|
|
|
|
598 |
if is_indoor is True:
|
599 |
-
ambiance_statement
|
600 |
elif is_indoor is False:
|
601 |
-
ambiance_statement
|
602 |
else:
|
603 |
-
ambiance_statement
|
604 |
|
605 |
-
# remove underline
|
606 |
-
readable_lighting = f"with {time_of_day.replace('_', ' ')} lighting conditions"
|
607 |
-
ambiance_statement += f", likely {readable_lighting}."
|
608 |
ambiance_parts.append(ambiance_statement)
|
609 |
|
610 |
if viewpoint and viewpoint != "eye_level":
|
|
|
389 |
def optimize_object_description(self, description: str) -> str:
|
390 |
"""
|
391 |
優化物件描述文本,消除冗餘重複並改善表達流暢度
|
392 |
+
|
393 |
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
394 |
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
395 |
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
|
|
402 |
"""
|
403 |
try:
|
404 |
import re
|
405 |
+
|
406 |
+
# 1. 處理多餘的空間限定表達
|
407 |
# 使用通用模式來識別和移除不必要的空間描述
|
408 |
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
409 |
description = self._remove_redundant_spatial_qualifiers(description)
|
410 |
|
411 |
+
# 2. 辨識並處理物件列表的重複問題
|
412 |
+
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
413 |
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
414 |
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
415 |
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
416 |
+
|
417 |
# 遍歷每個找到的物件列表進行重複檢測和優化
|
418 |
for obj_list in object_lists:
|
419 |
+
# 3. 解析單個物件列表中的項目
|
420 |
# 使用更精確的正則表達式來分割物件項目
|
421 |
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
422 |
# 需要特別注意處理最後一個 "and" 的情況
|
423 |
+
|
424 |
# 先處理逗號格式 "A, B, and C"
|
425 |
if ", and " in obj_list:
|
426 |
# 分割 ", and " 前後的部分
|
427 |
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
428 |
last_item = obj_list.rsplit(", and ", 1)[1]
|
429 |
+
|
430 |
# 處理前面的項目(用逗號分割)
|
431 |
front_items = [item.strip() for item in before_last_and.split(",")]
|
432 |
# 添加最後一個項目
|
|
|
437 |
else:
|
438 |
# 處理純逗號分隔的列表
|
439 |
all_items = [item.strip() for item in obj_list.split(",")]
|
440 |
+
|
441 |
+
# 4. 統計物件出現頻率
|
442 |
# 建立字典來記錄每個物件的出現次數
|
443 |
item_counts = {}
|
444 |
+
|
445 |
for item in all_items:
|
446 |
# 清理項目文字並過濾無效內容
|
447 |
item = item.strip()
|
|
|
453 |
if clean_item not in item_counts:
|
454 |
item_counts[clean_item] = 0
|
455 |
item_counts[clean_item] += 1
|
456 |
+
|
457 |
+
# 5. 生成優化後的物件列表
|
458 |
if item_counts:
|
459 |
new_items = []
|
460 |
+
|
461 |
for item, count in item_counts.items():
|
462 |
if count > 1:
|
463 |
# 對於重複項目,使用數字加複數形式
|
|
|
466 |
else:
|
467 |
# 單個項目保持原樣
|
468 |
new_items.append(item)
|
469 |
+
|
470 |
+
# 6. 重新格式化物件列表
|
471 |
# 使用標準的英文列表連接格式
|
472 |
if len(new_items) == 1:
|
473 |
new_list = new_items[0]
|
|
|
476 |
else:
|
477 |
# 使用逗號格式確保清晰度
|
478 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
479 |
+
|
480 |
# 7. 在原文中替換優化後的列表
|
481 |
+
# 將原始的多餘列表替換為優化後的簡潔版本
|
482 |
description = description.replace(obj_list, new_list)
|
483 |
+
|
484 |
return description
|
485 |
+
|
486 |
except Exception as e:
|
487 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
488 |
return description
|
|
|
490 |
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
491 |
"""
|
492 |
移除描述中冗餘的空間限定詞
|
493 |
+
|
494 |
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
495 |
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
496 |
床這個物件本身就是室內環境。
|
497 |
+
|
498 |
Args:
|
499 |
description: 包含可能多餘空間描述的文本
|
500 |
+
|
501 |
Returns:
|
502 |
str: 移除多餘空間限定詞後的文本
|
503 |
"""
|
504 |
import re
|
505 |
+
|
506 |
# 定義常見的多餘空間表達模式
|
507 |
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
508 |
redundant_patterns = [
|
|
|
515 |
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
516 |
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
517 |
]
|
518 |
+
|
519 |
for pattern, replacement in redundant_patterns:
|
520 |
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
521 |
+
|
522 |
return description
|
523 |
|
524 |
|
525 |
def _normalize_item_for_counting(self, item: str) -> str:
|
526 |
"""
|
527 |
正規化物件項目以便準確計數
|
528 |
+
|
529 |
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
530 |
確保 "a car" 和 "car" 被視為同一物件類型。
|
531 |
+
|
532 |
Args:
|
533 |
item: 原始物件項目字串
|
534 |
+
|
535 |
Returns:
|
536 |
str: 正規化後的物件項目
|
537 |
"""
|
|
|
542 |
def _make_plural(self, item: str) -> str:
|
543 |
"""
|
544 |
將單數名詞轉換為複數形式
|
545 |
+
|
546 |
Args:
|
547 |
item: 單數形式的名詞
|
548 |
+
|
549 |
Returns:
|
550 |
str: 複數形式的名詞
|
551 |
"""
|
|
|
589 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
590 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
591 |
|
592 |
+
# 1. 整體氛圍(照明和視角)- 移除室內外標籤
|
593 |
ambiance_parts = []
|
594 |
if lighting_info:
|
595 |
time_of_day = lighting_info.get("time_of_day", "unknown lighting")
|
596 |
is_indoor = lighting_info.get("is_indoor")
|
597 |
+
|
598 |
+
# 直接描述照明條件,不加入室內外標籤
|
599 |
+
readable_lighting = f"{time_of_day.replace('_', ' ')} lighting conditions"
|
600 |
+
|
601 |
+
# 根據室內外環境調整描述但不直接標明
|
602 |
if is_indoor is True:
|
603 |
+
ambiance_statement = f"The scene features {readable_lighting} characteristic of an interior space."
|
604 |
elif is_indoor is False:
|
605 |
+
ambiance_statement = f"The scene displays {readable_lighting} typical of an outdoor environment."
|
606 |
else:
|
607 |
+
ambiance_statement = f"The scene presents {readable_lighting}."
|
608 |
|
|
|
|
|
|
|
609 |
ambiance_parts.append(ambiance_statement)
|
610 |
|
611 |
if viewpoint and viewpoint != "eye_level":
|
response_processor.py
CHANGED
@@ -60,7 +60,11 @@ class ResponseProcessor:
|
|
60 |
"Here is a rewritten scene description that adheres to the provided critical rules:",
|
61 |
"Here is the rewritten scene description:",
|
62 |
"Here's a rewritten scene description:",
|
63 |
-
"The rewritten scene description is as follows:"
|
|
|
|
|
|
|
|
|
64 |
]
|
65 |
|
66 |
# 設置需要移除的後綴短語
|
@@ -187,23 +191,13 @@ class ResponseProcessor:
|
|
187 |
raise ResponseProcessingError(error_msg) from e
|
188 |
|
189 |
def clean_response(self, response: str, model_type: str = "general") -> str:
|
190 |
-
"""
|
191 |
-
清理LLM回應
|
192 |
-
|
193 |
-
Args:
|
194 |
-
response: 原始LLM回應
|
195 |
-
model_type: 模型類型(用於特定清理規則)
|
196 |
-
|
197 |
-
Returns:
|
198 |
-
str: 清理後的回應
|
199 |
-
|
200 |
-
Raises:
|
201 |
-
ResponseProcessingError: 當回應處理失敗時
|
202 |
-
"""
|
203 |
if not response:
|
204 |
raise ResponseProcessingError("Empty response provided for cleaning")
|
205 |
|
206 |
try:
|
|
|
|
|
|
|
207 |
self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
|
208 |
|
209 |
# 保存原始回應作為備份
|
@@ -215,6 +209,9 @@ class ResponseProcessor:
|
|
215 |
else:
|
216 |
cleaned_response = self._clean_general_response(response)
|
217 |
|
|
|
|
|
|
|
218 |
# 如果清理後內容過短,嘗試從原始回應中恢復
|
219 |
if len(cleaned_response.strip()) < 40:
|
220 |
self.logger.warning("Cleaned response too short, attempting recovery")
|
@@ -447,23 +444,52 @@ class ResponseProcessor:
|
|
447 |
return response
|
448 |
|
449 |
def _remove_introduction_prefixes(self, response: str) -> str:
|
450 |
-
"""
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
]
|
457 |
|
458 |
-
|
459 |
-
|
|
|
|
|
|
|
|
|
460 |
|
461 |
-
|
462 |
-
for prefix in self.prefixes_to_remove:
|
463 |
-
if response.lower().startswith(prefix.lower()):
|
464 |
-
response = response[len(prefix):].strip()
|
465 |
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
468 |
def _remove_format_markers(self, response: str) -> str:
|
469 |
"""移除格式標記和上下文標籤(保留括號內的地理與細節資訊)"""
|
@@ -668,7 +694,7 @@ class ResponseProcessor:
|
|
668 |
# 數字到文字
|
669 |
number_conversions = {
|
670 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
671 |
-
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
672 |
'11': 'eleven', '12': 'twelve'
|
673 |
}
|
674 |
|
@@ -677,15 +703,15 @@ class ResponseProcessor:
|
|
677 |
# 模式1: 數字 + 單一複數詞 (如 "7 chairs")
|
678 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
679 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
680 |
-
|
681 |
# 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
|
682 |
pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
|
683 |
processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
|
684 |
-
|
685 |
# 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
|
686 |
pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
|
687 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
688 |
-
|
689 |
# 模式4: 介詞片語中的數字 (如 "around 2 tables")
|
690 |
pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
|
691 |
processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
|
@@ -978,6 +1004,25 @@ class ResponseProcessor:
|
|
978 |
|
979 |
def _final_formatting(self, response: str) -> str:
|
980 |
"""最終格式化處理"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
981 |
# 確保首字母大寫
|
982 |
if response and response[0].islower():
|
983 |
response = response[0].upper() + response[1:]
|
@@ -988,6 +1033,35 @@ class ResponseProcessor:
|
|
988 |
|
989 |
return response.strip()
|
990 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
991 |
def _recover_from_overcleaning(self, original_response: str) -> str:
|
992 |
"""從過度清理中恢復內容"""
|
993 |
try:
|
|
|
60 |
"Here is a rewritten scene description that adheres to the provided critical rules:",
|
61 |
"Here is the rewritten scene description:",
|
62 |
"Here's a rewritten scene description:",
|
63 |
+
"The rewritten scene description is as follows:",
|
64 |
+
"indoor,",
|
65 |
+
"outdoor,",
|
66 |
+
"indoor ",
|
67 |
+
"outdoor "
|
68 |
]
|
69 |
|
70 |
# 設置需要移除的後綴短語
|
|
|
191 |
raise ResponseProcessingError(error_msg) from e
|
192 |
|
193 |
def clean_response(self, response: str, model_type: str = "general") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if not response:
|
195 |
raise ResponseProcessingError("Empty response provided for cleaning")
|
196 |
|
197 |
try:
|
198 |
+
# 調試:記錄清理前的原始回應
|
199 |
+
self.logger.info(f"DEBUG: Response before cleaning: {response}")
|
200 |
+
|
201 |
self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
|
202 |
|
203 |
# 保存原始回應作為備份
|
|
|
209 |
else:
|
210 |
cleaned_response = self._clean_general_response(response)
|
211 |
|
212 |
+
# 調試:記錄清理後的回應
|
213 |
+
self.logger.info(f"DEBUG: Response after cleaning: {cleaned_response}")
|
214 |
+
|
215 |
# 如果清理後內容過短,嘗試從原始回應中恢復
|
216 |
if len(cleaned_response.strip()) < 40:
|
217 |
self.logger.warning("Cleaned response too short, attempting recovery")
|
|
|
444 |
return response
|
445 |
|
446 |
def _remove_introduction_prefixes(self, response: str) -> str:
|
447 |
+
"""
|
448 |
+
移除介紹性前綴,強化對多種模式的處理。
|
449 |
+
"""
|
450 |
+
if not response:
|
451 |
+
return ""
|
452 |
+
|
453 |
+
cleaned_response = response.strip()
|
454 |
+
|
455 |
+
# 1. 將所有要移除的前綴模式合併成一個大的正則表達式
|
456 |
+
# - r'^(?: ... )' 表示從字串開頭匹配非捕獲分組
|
457 |
+
# - '|' 用於分隔不同的模式
|
458 |
+
# - re.escape() 用於安全地處理 self.prefixes_to_remove 中的特殊字符
|
459 |
+
# - `\\s*,?` 處理可選的逗號和空格
|
460 |
+
# - `\\s*` 處理結尾的任意空格
|
461 |
+
all_prefix_patterns = [
|
462 |
+
r'Here\s+is\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?scene\s+description.*?:',
|
463 |
+
r'The\s+(?:rewritten\s+|enhanced\s+)?(?:scene\s+)?description\s+is.*?:',
|
464 |
+
r'Here\'s\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?description.*?:',
|
465 |
+
|
466 |
+
# 這個模式會匹配這些詞,無論後面是逗號還是空格
|
467 |
+
r'(?:indoor|outdoor|inside|outside)\s*,?'
|
468 |
]
|
469 |
|
470 |
+
# 將 self.prefixes_to_remove 中的字符串也轉換為正則表達式模式
|
471 |
+
# 確保 self.prefixes_to_remove 存在,否則提供一個空列表
|
472 |
+
prefixes_to_add = getattr(self, 'prefixes_to_remove', [])
|
473 |
+
for prefix in prefixes_to_add:
|
474 |
+
# 使用 re.escape 來確保前綴中的任何特殊字符被正確處理
|
475 |
+
all_prefix_patterns.append(re.escape(prefix))
|
476 |
|
477 |
+
cleaned_response = re.sub(r'^(?:indoor|outdoor|inside|outside)\s*,?\s*', '', cleaned_response, flags=re.IGNORECASE).strip()
|
|
|
|
|
|
|
478 |
|
479 |
+
# 將所有模式用 '|' 連接起來,形成一個大的組合模式
|
480 |
+
# 我們在模式的結尾加上 \\s* 來匹配並移除前綴後可能跟隨的空格
|
481 |
+
combined_pattern = r'^(?:' + '|'.join(all_prefix_patterns) + r')\s*'
|
482 |
+
|
483 |
+
# 2. 執行一次性的替換,並忽略大小寫
|
484 |
+
# 這一行程式碼會移除所有匹配到的前綴
|
485 |
+
cleaned_response = re.sub(combined_pattern, '', cleaned_response, flags=re.IGNORECASE).strip()
|
486 |
+
|
487 |
+
# 3. 確保首字母大寫
|
488 |
+
# 移除前綴後,新的句首可能變成小寫, 這邊得修正
|
489 |
+
if cleaned_response:
|
490 |
+
cleaned_response = cleaned_response[0].upper() + cleaned_response[1:]
|
491 |
+
|
492 |
+
return cleaned_response
|
493 |
|
494 |
def _remove_format_markers(self, response: str) -> str:
|
495 |
"""移除格式標記和上下文標籤(保留括號內的地理與細節資訊)"""
|
|
|
694 |
# 數字到文字
|
695 |
number_conversions = {
|
696 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
697 |
+
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
698 |
'11': 'eleven', '12': 'twelve'
|
699 |
}
|
700 |
|
|
|
703 |
# 模式1: 數字 + 單一複數詞 (如 "7 chairs")
|
704 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
705 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
706 |
+
|
707 |
# 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
|
708 |
pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
|
709 |
processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
|
710 |
+
|
711 |
# 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
|
712 |
pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
|
713 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
714 |
+
|
715 |
# 模式4: 介詞片語中的數字 (如 "around 2 tables")
|
716 |
pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
|
717 |
processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
|
|
|
1004 |
|
1005 |
def _final_formatting(self, response: str) -> str:
|
1006 |
"""最終格式化處理"""
|
1007 |
+
# 專門處理 "indoor," 前綴問題
|
1008 |
+
indoor_patterns = [
|
1009 |
+
r'^indoor\s*,\s*',
|
1010 |
+
r'^outdoor\s*,\s*',
|
1011 |
+
r'^inside\s*,\s*',
|
1012 |
+
r'^outside\s*,\s*',
|
1013 |
+
r'^indoor\s+',
|
1014 |
+
r'^outdoor\s+',
|
1015 |
+
]
|
1016 |
+
|
1017 |
+
for pattern in indoor_patterns:
|
1018 |
+
response = re.sub(pattern, '', response, flags=re.IGNORECASE)
|
1019 |
+
|
1020 |
+
# 移除開頭的空白和標點符號
|
1021 |
+
response = re.sub(r'^[\s,;:.-]+', '', response)
|
1022 |
+
|
1023 |
+
# 修復常見的語法問題
|
1024 |
+
response = self._fix_grammatical_issues(response)
|
1025 |
+
|
1026 |
# 確保首字母大寫
|
1027 |
if response and response[0].islower():
|
1028 |
response = response[0].upper() + response[1:]
|
|
|
1033 |
|
1034 |
return response.strip()
|
1035 |
|
1036 |
+
def _fix_grammatical_issues(self, response: str) -> str:
|
1037 |
+
"""修復常見的語法問題"""
|
1038 |
+
if not response:
|
1039 |
+
return response
|
1040 |
+
|
1041 |
+
# 修復不完整的句子開頭
|
1042 |
+
grammar_fixes = [
|
1043 |
+
# 修復 "A dining table with... A dining table..." 重複問題
|
1044 |
+
(r'\b(A|An)\s+([^.!?]*?)\s+\1\s+\2', r'\1 \2'),
|
1045 |
+
|
1046 |
+
# 修復 "This scene presents a scene" 重複
|
1047 |
+
(r'\bThis scene presents a scene\b', 'This scene presents'),
|
1048 |
+
|
1049 |
+
# 修復不完整的句子 "A dining table with four chairs and a dining table"
|
1050 |
+
(r'\b([A-Z][^.!?]*?)\s+and\s+a\s+\1\b', r'\1'),
|
1051 |
+
|
1052 |
+
# 修復空的介詞短語
|
1053 |
+
(r'\bwith\s+with\b', 'with'),
|
1054 |
+
(r'\band\s+and\b', 'and'),
|
1055 |
+
|
1056 |
+
# 確保句子完整性
|
1057 |
+
(r'(\w+)\s*\.\s*(\w+)', r'\1. \2'),
|
1058 |
+
]
|
1059 |
+
|
1060 |
+
for pattern, replacement in grammar_fixes:
|
1061 |
+
response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)
|
1062 |
+
|
1063 |
+
return response
|
1064 |
+
|
1065 |
def _recover_from_overcleaning(self, original_response: str) -> str:
|
1066 |
"""從過度清理中恢復內容"""
|
1067 |
try:
|