Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on 2 days ago

Commit

6be6bee

verified ·

1 Parent(s): e2ae770

Upload 5 files

Browse files

fixed format ("indoor, ") issues

Files changed (4) hide show

llm_enhancer.py +16 -5
llm_model_manager.py +44 -25
object_description_generator.py +41 -40
response_processor.py +105 -31

llm_enhancer.py CHANGED Viewed

@@ -126,6 +126,18 @@ class LLMEnhancer:
             # 10. 移除解釋性注釋
             cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
             # 11. 事實準確性驗證
             try:
                 cleaned_response = self.quality_validator.verify_factual_accuracy(
@@ -142,12 +154,9 @@ class LLMEnhancer:
                     cleaned_response, scene_type, original_desc
                 )
-            # 13. 視角一致性處理
-            perspective = self.quality_validator.extract_perspective_from_description(original_desc)
-            if perspective and perspective.lower() not in cleaned_response.lower():
-                cleaned_response = f"{perspective}, {cleaned_response[0].lower()}{cleaned_response[1:]}"
-            # 13.5. 最終的 identical 詞彙清理（確保LLM輸出不包含重複性描述）
             identical_final_cleanup = [
                 (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
                 (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
@@ -157,6 +166,7 @@ class LLMEnhancer:
             for pattern, replacement in identical_final_cleanup:
                 cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
             # 14. 最終驗證：如果結果過短，嘗試fallback
             final_result = cleaned_response.strip()
@@ -183,6 +193,7 @@ class LLMEnhancer:
             # 15. display enhanced description
             self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
             return final_result
         except Exception as e:

             # 10. 移除解釋性注釋
             cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
+            # self.logger.info(f"DEBUG: Before factual verification: {cleaned_response[:50]}...")
+            # 10.5 事實準確性驗證
+            try:
+                cleaned_response = self.quality_validator.verify_factual_accuracy(
+                    original_desc, cleaned_response, object_list
+                )
+            except Exception:
+                self.logger.warning("Fact verification failed; using response without verification")
+            # self.logger.info(f"DEBUG: After factual verification: {cleaned_response[:50]}...")
             # 11. 事實準確性驗證
             try:
                 cleaned_response = self.quality_validator.verify_factual_accuracy(
                     cleaned_response, scene_type, original_desc
                 )
+            # print(f"DEBUG: After scene type consistency: {cleaned_response[:50]}...")
+            # 13. 最終的 identical 詞彙清理（確保LLM輸出不包含重複性描述）
             identical_final_cleanup = [
                 (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
                 (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
             for pattern, replacement in identical_final_cleanup:
                 cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
+            # print(f"DEBUG: After identical cleanup: {cleaned_response[:50]}...")
             # 14. 最終驗證：如果結果過短，嘗試fallback
             final_result = cleaned_response.strip()
             # 15. display enhanced description
             self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
+            # print(f"DEBUG: LLMEnhancer final_result before return: {final_result[:50]}..." if final_result else "DEBUG: LLMEnhancer final_result is empty")
             return final_result
         except Exception as e:

llm_model_manager.py CHANGED Viewed

@@ -173,19 +173,6 @@ class LLMModelManager:
             self.logger.debug("GPU cache cleared")
     def generate_response(self, prompt: str, **generation_kwargs) -> str:
-        """
-        生成LLM回應
-        Args:
-            prompt: 輸入提示詞
-            **generation_kwargs: 額外的生成參數，可覆蓋預設值
-        Returns:
-            str: 生成的回應文本
-        Raises:
-            ModelGenerationError: 當生成失敗時
-        """
         # 確保模型已載入
         if not self._model_loaded:
             self._load_model()
@@ -194,6 +181,10 @@ class LLMModelManager:
             self.call_count += 1
             self.logger.info(f"Generating response (call #{self.call_count})")
             # clean GPU
             self._clear_gpu_cache()
@@ -216,14 +207,21 @@ class LLMModelManager:
                 "use_cache": True,
             })
-            # resposne
             with torch.no_grad():
                 outputs = self.model.generate(inputs.input_ids, **generation_params)
             # 解碼回應
             full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = self._extract_generated_response(full_response, prompt)
             if not response or len(response.strip()) < 10:
                 raise ModelGenerationError("Generated response is too short or empty")
@@ -281,13 +279,6 @@ class LLMModelManager:
     def _extract_generated_response(self, full_response: str, prompt: str) -> str:
         """
         從完整回應中提取生成的部分
-        Args:
-            full_response: 模型的完整輸出
-            prompt: 原始提示詞
-        Returns:
-            str: 提取的生成回應
         """
         # 尋找assistant標記
         assistant_tag = "<|assistant|>"
@@ -298,14 +289,42 @@ class LLMModelManager:
             user_tag = "<|user|>"
             if user_tag in response:
                 response = response.split(user_tag)[0].strip()
             return response
-        # 移除輸入提示詞
-        if full_response.startswith(prompt):
-            return full_response[len(prompt):].strip()
-        return full_response.strip()
     def reset_context(self):
         """重置模型上下文，清理GPU緩存"""

             self.logger.debug("GPU cache cleared")
     def generate_response(self, prompt: str, **generation_kwargs) -> str:
         # 確保模型已載入
         if not self._model_loaded:
             self._load_model()
             self.call_count += 1
             self.logger.info(f"Generating response (call #{self.call_count})")
+            # # record input prompt
+            # self.logger.info(f"DEBUG: Input prompt length: {len(prompt)}")
+            # self.logger.info(f"DEBUG: Input prompt preview: {prompt[:200]}...")
             # clean GPU
             self._clear_gpu_cache()
                 "use_cache": True,
             })
+            # response
             with torch.no_grad():
                 outputs = self.model.generate(inputs.input_ids, **generation_params)
             # 解碼回應
             full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # # record whole response
+            # self.logger.info(f"DEBUG: Full LLM response: {full_response}")
             response = self._extract_generated_response(full_response, prompt)
+            # # 記錄提取後的回應
+            # self.logger.info(f"DEBUG: Extracted response: {response}")
             if not response or len(response.strip()) < 10:
                 raise ModelGenerationError("Generated response is too short or empty")
     def _extract_generated_response(self, full_response: str, prompt: str) -> str:
         """
         從完整回應中提取生成的部分
         """
         # 尋找assistant標記
         assistant_tag = "<|assistant|>"
             user_tag = "<|user|>"
             if user_tag in response:
                 response = response.split(user_tag)[0].strip()
+        else:
+            # 移除輸入提示詞
+            if full_response.startswith(prompt):
+                response = full_response[len(prompt):].strip()
+            else:
+                response = full_response.strip()
+        # 移除不自然的場景類型前綴
+        response = self._remove_scene_type_prefixes(response)
+        return response
+    def _remove_scene_type_prefixes(self, response: str) -> str:
+        """
+        移除LLM生成回應中的場景類型前綴
+        Args:
+            response: 原始LLM回應
+        Returns:
+            str: 移除前綴後的回應
+        """
+        if not response:
             return response
+        prefix_patterns = [r'^[A-Za-z]+\,\s*']
+        # 應用清理模式
+        for pattern in prefix_patterns:
+            response = re.sub(pattern, '', response, flags=re.IGNORECASE)
+        # 確保首字母大寫
+        if response and response[0].islower():
+            response = response[0].upper() + response[1:]
+        return response.strip()
     def reset_context(self):
         """重置模型上下文，清理GPU緩存"""

object_description_generator.py CHANGED Viewed

@@ -389,7 +389,7 @@ class ObjectDescriptionGenerator:
     def optimize_object_description(self, description: str) -> str:
         """
         優化物件描述文本，消除冗餘重複並改善表達流暢度
         這個函數是後處理階段的關鍵組件，負責清理和精簡自然語言生成系統
         產出的描述文字。它專門處理常見的重複問題，如相同物件的重複
         列舉和冗餘的空間描述，讓最終的描述更簡潔自然。
@@ -402,31 +402,31 @@ class ObjectDescriptionGenerator:
         """
         try:
             import re
-            # 1. 處理冗餘的空間限定表達
             # 使用通用模式來識別和移除不必要的空間描述
             # 例如："bed in the room" -> "bed"，因為床本身就表示是室內環境
             description = self._remove_redundant_spatial_qualifiers(description)
-            # 2. 識別並處理物件列表的重複問題
-            # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表模式
             # 使用正則表達式捕獲 "with" 關鍵字後的物件序列
             # 注意：正則表達式需要修正以避免貪婪匹配的問題
             object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
             # 遍歷每個找到的物件列表進行重複檢測和優化
             for obj_list in object_lists:
-                # 3. 解析單個物件列表中的項目
                 # 使用更精確的正則表達式來分割物件項目
                 # 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
                 # 需要特別注意處理最後一個 "and" 的情況
                 # 先處理逗號格式 "A, B, and C"
                 if ", and " in obj_list:
                     # 分割 ", and " 前後的部分
                     before_last_and = obj_list.rsplit(", and ", 1)[0]
                     last_item = obj_list.rsplit(", and ", 1)[1]
                     # 處理前面的項目（用逗號分割）
                     front_items = [item.strip() for item in before_last_and.split(",")]
                     # 添加最後一個項目
@@ -437,11 +437,11 @@ class ObjectDescriptionGenerator:
                 else:
                     # 處理純逗號分隔的列表
                     all_items = [item.strip() for item in obj_list.split(",")]
-                # 4. 統計物件出現頻率
                 # 建立字典來記錄每個物件的出現次數
                 item_counts = {}
                 for item in all_items:
                     # 清理項目文字並過濾無效內容
                     item = item.strip()
@@ -453,11 +453,11 @@ class ObjectDescriptionGenerator:
                         if clean_item not in item_counts:
                             item_counts[clean_item] = 0
                         item_counts[clean_item] += 1
-                # 5. 生成優化後的物件列表
                 if item_counts:
                     new_items = []
                     for item, count in item_counts.items():
                         if count > 1:
                             # 對於重複項目，使用數字加複數形式
@@ -466,8 +466,8 @@ class ObjectDescriptionGenerator:
                         else:
                             # 單個項目保持原樣
                             new_items.append(item)
-                    # 6. 重新格式化物件列表
                     # 使用標準的英文列表連接格式
                     if len(new_items) == 1:
                         new_list = new_items[0]
@@ -476,13 +476,13 @@ class ObjectDescriptionGenerator:
                     else:
                         # 使用逗號格式確保清晰度
                         new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
                     # 7. 在原文中替換優化後的列表
-                    # 將原始的冗餘列表替換為優化後的簡潔版本
                     description = description.replace(obj_list, new_list)
             return description
         except Exception as e:
             self.logger.warning(f"Error optimizing object description: {str(e)}")
             return description
@@ -490,19 +490,19 @@ class ObjectDescriptionGenerator:
     def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
         """
         移除描述中冗餘的空間限定詞
         這個方法使用模式匹配來識別和移除不必要的空間描述，例如
         "bed in the room" 中的 "in the room" 部分通常是多餘的，因為
         床這個物件本身就是室內環境。
         Args:
             description: 包含可能多餘空間描述的文本
         Returns:
             str: 移除多餘空間限定詞後的文本
         """
         import re
         # 定義常見的多餘空間表達模式
         # 這些模式捕獲「物件 + 不必要的空間限定」的情況
         redundant_patterns = [
@@ -515,23 +515,23 @@ class ObjectDescriptionGenerator:
             # 一般性的多餘表達：「在場景中」、「在圖片中」等
             (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
         ]
         for pattern, replacement in redundant_patterns:
             description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
         return description
     def _normalize_item_for_counting(self, item: str) -> str:
         """
         正規化物件項目以便準確計數
         移除冠詞和其他可能影響計數準確性的前綴詞彙，
         確保 "a car" 和 "car" 被視為同一物件類型。
         Args:
             item: 原始物件項目字串
         Returns:
             str: 正規化後的物件項目
         """
@@ -542,10 +542,10 @@ class ObjectDescriptionGenerator:
     def _make_plural(self, item: str) -> str:
         """
         將單數名詞轉換為複數形式
         Args:
             item: 單數形式的名詞
         Returns:
             str: 複數形式的名詞
         """
@@ -589,22 +589,23 @@ class ObjectDescriptionGenerator:
             self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
                             f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
-            # 1. 整體氛圍（照明和視角）
             ambiance_parts = []
             if lighting_info:
                 time_of_day = lighting_info.get("time_of_day", "unknown lighting")
                 is_indoor = lighting_info.get("is_indoor")
-                ambiance_statement = "This is"
                 if is_indoor is True:
-                    ambiance_statement += " an indoor scene"
                 elif is_indoor is False:
-                    ambiance_statement += " an outdoor scene"
                 else:
-                    ambiance_statement += " a scene"
-                # remove underline
-                readable_lighting = f"with {time_of_day.replace('_', ' ')} lighting conditions"
-                ambiance_statement += f", likely {readable_lighting}."
                 ambiance_parts.append(ambiance_statement)
             if viewpoint and viewpoint != "eye_level":

     def optimize_object_description(self, description: str) -> str:
         """
         優化物件描述文本，消除冗餘重複並改善表達流暢度
         這個函數是後處理階段的關鍵組件，負責清理和精簡自然語言生成系統
         產出的描述文字。它專門處理常見的重複問題，如相同物件的重複
         列舉和冗餘的空間描述，讓最終的描述更簡潔自然。
         """
         try:
             import re
+            # 1. 處理多餘的空間限定表達
             # 使用通用模式來識別和移除不必要的空間描述
             # 例如："bed in the room" -> "bed"，因為床本身就表示是室內環境
             description = self._remove_redundant_spatial_qualifiers(description)
+            # 2. 辨識並處理物件列表的重複問題
+            # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
             # 使用正則表達式捕獲 "with" 關鍵字後的物件序列
             # 注意：正則表達式需要修正以避免貪婪匹配的問題
             object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
             # 遍歷每個找到的物件列表進行重複檢測和優化
             for obj_list in object_lists:
+                # 3. 解析單個物件列表中的項目
                 # 使用更精確的正則表達式來分割物件項目
                 # 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
                 # 需要特別注意處理最後一個 "and" 的情況
                 # 先處理逗號格式 "A, B, and C"
                 if ", and " in obj_list:
                     # 分割 ", and " 前後的部分
                     before_last_and = obj_list.rsplit(", and ", 1)[0]
                     last_item = obj_list.rsplit(", and ", 1)[1]
                     # 處理前面的項目（用逗號分割）
                     front_items = [item.strip() for item in before_last_and.split(",")]
                     # 添加最後一個項目
                 else:
                     # 處理純逗號分隔的列表
                     all_items = [item.strip() for item in obj_list.split(",")]
+                # 4. 統計物件出現頻率
                 # 建立字典來記錄每個物件的出現次數
                 item_counts = {}
                 for item in all_items:
                     # 清理項目文字並過濾無效內容
                     item = item.strip()
                         if clean_item not in item_counts:
                             item_counts[clean_item] = 0
                         item_counts[clean_item] += 1
+                # 5. 生成優化後的物件列表
                 if item_counts:
                     new_items = []
                     for item, count in item_counts.items():
                         if count > 1:
                             # 對於重複項目，使用數字加複數形式
                         else:
                             # 單個項目保持原樣
                             new_items.append(item)
+                    # 6. 重新格式化物件列表
                     # 使用標準的英文列表連接格式
                     if len(new_items) == 1:
                         new_list = new_items[0]
                     else:
                         # 使用逗號格式確保清晰度
                         new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
                     # 7. 在原文中替換優化後的列表
+                    # 將原始的多餘列表替換為優化後的簡潔版本
                     description = description.replace(obj_list, new_list)
             return description
         except Exception as e:
             self.logger.warning(f"Error optimizing object description: {str(e)}")
             return description
     def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
         """
         移除描述中冗餘的空間限定詞
         這個方法使用模式匹配來識別和移除不必要的空間描述，例如
         "bed in the room" 中的 "in the room" 部分通常是多餘的，因為
         床這個物件本身就是室內環境。
         Args:
             description: 包含可能多餘空間描述的文本
         Returns:
             str: 移除多餘空間限定詞後的文本
         """
         import re
         # 定義常見的多餘空間表達模式
         # 這些模式捕獲「物件 + 不必要的空間限定」的情況
         redundant_patterns = [
             # 一般性的多餘表達：「在場景中」、「在圖片中」等
             (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
         ]
         for pattern, replacement in redundant_patterns:
             description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
         return description
     def _normalize_item_for_counting(self, item: str) -> str:
         """
         正規化物件項目以便準確計數
         移除冠詞和其他可能影響計數準確性的前綴詞彙，
         確保 "a car" 和 "car" 被視為同一物件類型。
         Args:
             item: 原始物件項目字串
         Returns:
             str: 正規化後的物件項目
         """
     def _make_plural(self, item: str) -> str:
         """
         將單數名詞轉換為複數形式
         Args:
             item: 單數形式的名詞
         Returns:
             str: 複數形式的名詞
         """
             self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
                             f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
+            # 1. 整體氛圍（照明和視角）- 移除室內外標籤
             ambiance_parts = []
             if lighting_info:
                 time_of_day = lighting_info.get("time_of_day", "unknown lighting")
                 is_indoor = lighting_info.get("is_indoor")
+                # 直接描述照明條件，不加入室內外標籤
+                readable_lighting = f"{time_of_day.replace('_', ' ')} lighting conditions"
+                # 根據室內外環境調整描述但不直接標明
                 if is_indoor is True:
+                    ambiance_statement = f"The scene features {readable_lighting} characteristic of an interior space."
                 elif is_indoor is False:
+                    ambiance_statement = f"The scene displays {readable_lighting} typical of an outdoor environment."
                 else:
+                    ambiance_statement = f"The scene presents {readable_lighting}."
                 ambiance_parts.append(ambiance_statement)
             if viewpoint and viewpoint != "eye_level":

response_processor.py CHANGED Viewed

@@ -60,7 +60,11 @@ class ResponseProcessor:
                 "Here is a rewritten scene description that adheres to the provided critical rules:",
                 "Here is the rewritten scene description:",
                 "Here's a rewritten scene description:",
-                "The rewritten scene description is as follows:"
             ]
             # 設置需要移除的後綴短語
@@ -187,23 +191,13 @@ class ResponseProcessor:
             raise ResponseProcessingError(error_msg) from e
     def clean_response(self, response: str, model_type: str = "general") -> str:
-        """
-        清理LLM回應
-        Args:
-            response: 原始LLM回應
-            model_type: 模型類型（用於特定清理規則）
-        Returns:
-            str: 清理後的回應
-        Raises:
-            ResponseProcessingError: 當回應處理失敗時
-        """
         if not response:
             raise ResponseProcessingError("Empty response provided for cleaning")
         try:
             self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
             # 保存原始回應作為備份
@@ -215,6 +209,9 @@ class ResponseProcessor:
             else:
                 cleaned_response = self._clean_general_response(response)
             # 如果清理後內容過短，嘗試從原始回應中恢復
             if len(cleaned_response.strip()) < 40:
                 self.logger.warning("Cleaned response too short, attempting recovery")
@@ -447,23 +444,52 @@ class ResponseProcessor:
         return response
     def _remove_introduction_prefixes(self, response: str) -> str:
-        """移除介紹性前綴"""
-        # 處理 "Here is..." 類型的prefix
-        intro_prefixes = [
-            r'^Here\s+is\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?scene\s+description.*?:\s*',
-            r'^The\s+(?:rewritten\s+|enhanced\s+)?(?:scene\s+)?description\s+is.*?:\s*',
-            r'^Here\'s\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?description.*?:\s*'
         ]
-        for prefix_pattern in intro_prefixes:
-            response = re.sub(prefix_pattern, '', response, flags=re.IGNORECASE)
-        # 處理固定prefix
-        for prefix in self.prefixes_to_remove:
-            if response.lower().startswith(prefix.lower()):
-                response = response[len(prefix):].strip()
-        return response
     def _remove_format_markers(self, response: str) -> str:
         """移除格式標記和上下文標籤（保留括號內的地理與細節資訊）"""
@@ -668,7 +694,7 @@ class ResponseProcessor:
             # 數字到文字
             number_conversions = {
                 '2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
-                '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
                 '11': 'eleven', '12': 'twelve'
             }
@@ -677,15 +703,15 @@ class ResponseProcessor:
                 # 模式1: 數字 + 單一複數詞 (如 "7 chairs")
                 pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
                 # 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
                 pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
                 # 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
                 pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
                 # 模式4: 介詞片語中的數字 (如 "around 2 tables")
                 pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
@@ -978,6 +1004,25 @@ class ResponseProcessor:
     def _final_formatting(self, response: str) -> str:
         """最終格式化處理"""
         # 確保首字母大寫
         if response and response[0].islower():
             response = response[0].upper() + response[1:]
@@ -988,6 +1033,35 @@ class ResponseProcessor:
         return response.strip()
     def _recover_from_overcleaning(self, original_response: str) -> str:
         """從過度清理中恢復內容"""
         try:

                 "Here is a rewritten scene description that adheres to the provided critical rules:",
                 "Here is the rewritten scene description:",
                 "Here's a rewritten scene description:",
+                "The rewritten scene description is as follows:",
+                "indoor,",
+                "outdoor,",
+                "indoor ",
+                "outdoor "
             ]
             # 設置需要移除的後綴短語
             raise ResponseProcessingError(error_msg) from e
     def clean_response(self, response: str, model_type: str = "general") -> str:
         if not response:
             raise ResponseProcessingError("Empty response provided for cleaning")
         try:
+            # 調試：記錄清理前的原始回應
+            self.logger.info(f"DEBUG: Response before cleaning: {response}")
             self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
             # 保存原始回應作為備份
             else:
                 cleaned_response = self._clean_general_response(response)
+            # 調試：記錄清理後的回應
+            self.logger.info(f"DEBUG: Response after cleaning: {cleaned_response}")
             # 如果清理後內容過短，嘗試從原始回應中恢復
             if len(cleaned_response.strip()) < 40:
                 self.logger.warning("Cleaned response too short, attempting recovery")
         return response
     def _remove_introduction_prefixes(self, response: str) -> str:
+        """
+        移除介紹性前綴，強化對多種模式的處理。
+        """
+        if not response:
+            return ""
+        cleaned_response = response.strip()
+        # 1. 將所有要移除的前綴模式合併成一個大的正則表達式
+        #    - r'^(?: ... )' 表示從字串開頭匹配非捕獲分組
+        #    - '|' 用於分隔不同的模式
+        #    - re.escape() 用於安全地處理 self.prefixes_to_remove 中的特殊字符
+        #    - `\\s*,?` 處理可選的逗號和空格
+        #    - `\\s*` 處理結尾的任意空格
+        all_prefix_patterns = [
+            r'Here\s+is\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?scene\s+description.*?:',
+            r'The\s+(?:rewritten\s+|enhanced\s+)?(?:scene\s+)?description\s+is.*?:',
+            r'Here\'s\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?description.*?:',
+            # 這個模式會匹配這些詞，無論後面是逗號還是空格
+            r'(?:indoor|outdoor|inside|outside)\s*,?'
         ]
+        # 將 self.prefixes_to_remove 中的字符串也轉換為正則表達式模式
+        # 確保 self.prefixes_to_remove 存在，否則提供一個空列表
+        prefixes_to_add = getattr(self, 'prefixes_to_remove', [])
+        for prefix in prefixes_to_add:
+            # 使用 re.escape 來確保前綴中的任何特殊字符被正確處理
+            all_prefix_patterns.append(re.escape(prefix))
+        cleaned_response = re.sub(r'^(?:indoor|outdoor|inside|outside)\s*,?\s*', '', cleaned_response, flags=re.IGNORECASE).strip()
+        # 將所有模式用 '|' 連接起來，形成一個大的組合模式
+        # 我們在模式的結尾加上 \\s* 來匹配並移除前綴後可能跟隨的空格
+        combined_pattern = r'^(?:' + '|'.join(all_prefix_patterns) + r')\s*'
+        # 2. 執行一次性的替換，並忽略大小寫
+        # 這一行程式碼會移除所有匹配到的前綴
+        cleaned_response = re.sub(combined_pattern, '', cleaned_response, flags=re.IGNORECASE).strip()
+        # 3. 確保首字母大寫
+        # 移除前綴後，新的句首可能變成小寫, 這邊得修正
+        if cleaned_response:
+            cleaned_response = cleaned_response[0].upper() + cleaned_response[1:]
+        return cleaned_response
     def _remove_format_markers(self, response: str) -> str:
         """移除格式標記和上下文標籤（保留括號內的地理與細節資訊）"""
             # 數字到文字
             number_conversions = {
                 '2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
+                '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
                 '11': 'eleven', '12': 'twelve'
             }
                 # 模式1: 數字 + 單一複數詞 (如 "7 chairs")
                 pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
                 # 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
                 pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
                 # 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
                 pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
                 # 模式4: 介詞片語中的數字 (如 "around 2 tables")
                 pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
                 processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
     def _final_formatting(self, response: str) -> str:
         """最終格式化處理"""
+        # 專門處理 "indoor," 前綴問題
+        indoor_patterns = [
+            r'^indoor\s*,\s*',
+            r'^outdoor\s*,\s*',
+            r'^inside\s*,\s*',
+            r'^outside\s*,\s*',
+            r'^indoor\s+',
+            r'^outdoor\s+',
+        ]
+        for pattern in indoor_patterns:
+            response = re.sub(pattern, '', response, flags=re.IGNORECASE)
+        # 移除開頭的空白和標點符號
+        response = re.sub(r'^[\s,;:.-]+', '', response)
+        # 修復常見的語法問題
+        response = self._fix_grammatical_issues(response)
         # 確保首字母大寫
         if response and response[0].islower():
             response = response[0].upper() + response[1:]
         return response.strip()
+    def _fix_grammatical_issues(self, response: str) -> str:
+        """修復常見的語法問題"""
+        if not response:
+            return response
+        # 修復不完整的句子開頭
+        grammar_fixes = [
+            # 修復 "A dining table with... A dining table..." 重複問題
+            (r'\b(A|An)\s+([^.!?]*?)\s+\1\s+\2', r'\1 \2'),
+            # 修復 "This scene presents a scene" 重複
+            (r'\bThis scene presents a scene\b', 'This scene presents'),
+            # 修復不完整的句子 "A dining table with four chairs and a dining table"
+            (r'\b([A-Z][^.!?]*?)\s+and\s+a\s+\1\b', r'\1'),
+            # 修復空的介詞短語
+            (r'\bwith\s+with\b', 'with'),
+            (r'\band\s+and\b', 'and'),
+            # 確保句子完整性
+            (r'(\w+)\s*\.\s*(\w+)', r'\1. \2'),
+        ]
+        for pattern, replacement in grammar_fixes:
+            response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)
+        return response
     def _recover_from_overcleaning(self, original_response: str) -> str:
         """從過度清理中恢復內容"""
         try: