Spaces:
Running
on
Zero
Running
on
Zero
Upload object_description_generator.py
Browse files- object_description_generator.py +235 -121
object_description_generator.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import logging
|
2 |
import traceback
|
3 |
-
import re
|
4 |
from typing import Dict, List, Tuple, Optional, Any
|
5 |
import numpy as np
|
6 |
|
@@ -389,62 +388,177 @@ class ObjectDescriptionGenerator:
|
|
389 |
|
390 |
def optimize_object_description(self, description: str) -> str:
|
391 |
"""
|
392 |
-
|
|
|
|
|
|
|
|
|
393 |
|
394 |
Args:
|
395 |
-
description:
|
396 |
|
397 |
Returns:
|
398 |
-
str:
|
399 |
"""
|
400 |
try:
|
401 |
import re
|
402 |
-
|
403 |
-
#
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
|
|
|
|
|
|
|
|
|
|
410 |
for obj_list in object_lists:
|
411 |
-
#
|
412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
item_counts = {}
|
414 |
-
|
415 |
-
for item in
|
|
|
416 |
item = item.strip()
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
|
|
|
|
|
|
|
|
423 |
if item_counts:
|
424 |
new_items = []
|
|
|
425 |
for item, count in item_counts.items():
|
426 |
if count > 1:
|
427 |
-
|
|
|
|
|
428 |
else:
|
|
|
429 |
new_items.append(item)
|
430 |
-
|
431 |
-
#
|
|
|
432 |
if len(new_items) == 1:
|
433 |
new_list = new_items[0]
|
434 |
elif len(new_items) == 2:
|
435 |
new_list = f"{new_items[0]} and {new_items[1]}"
|
436 |
else:
|
|
|
437 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
438 |
-
|
439 |
-
#
|
|
|
440 |
description = description.replace(obj_list, new_list)
|
441 |
-
|
442 |
return description
|
443 |
-
|
444 |
except Exception as e:
|
445 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
446 |
return description
|
447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
def generate_dynamic_everyday_description(self,
|
449 |
detected_objects: List[Dict],
|
450 |
lighting_info: Optional[Dict] = None,
|
@@ -640,15 +754,15 @@ class ObjectDescriptionGenerator:
|
|
640 |
if object_statistics and class_name in object_statistics:
|
641 |
actual_count = object_statistics[class_name]["count"]
|
642 |
formatted_name_with_exact_count = self._format_object_count_description(
|
643 |
-
normalized_class_name,
|
644 |
actual_count,
|
645 |
-
scene_type=scene_type
|
646 |
)
|
647 |
else:
|
648 |
formatted_name_with_exact_count = self._format_object_count_description(
|
649 |
-
normalized_class_name,
|
650 |
count,
|
651 |
-
scene_type=scene_type
|
652 |
)
|
653 |
|
654 |
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
@@ -747,67 +861,67 @@ class ObjectDescriptionGenerator:
|
|
747 |
def _remove_repetitive_descriptors(self, description: str) -> str:
|
748 |
"""
|
749 |
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
750 |
-
|
751 |
Args:
|
752 |
description: 原始描述文本
|
753 |
-
|
754 |
Returns:
|
755 |
str: 清理後的描述文本
|
756 |
"""
|
757 |
try:
|
758 |
import re
|
759 |
-
|
760 |
# 定義需要移除或替換的模式
|
761 |
cleanup_patterns = [
|
762 |
# 移除 "identical" 描述模式
|
763 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
764 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
765 |
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
766 |
-
|
767 |
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
768 |
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
769 |
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
770 |
(r'\bcomprehensive display of\b', 'display of'),
|
771 |
-
|
772 |
# 簡化過度描述性的短語
|
773 |
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
774 |
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
775 |
]
|
776 |
-
|
777 |
processed_description = description
|
778 |
for pattern, replacement in cleanup_patterns:
|
779 |
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
780 |
-
|
781 |
# 進一步清理可能的多餘空格
|
782 |
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
783 |
-
|
784 |
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
785 |
return processed_description
|
786 |
-
|
787 |
except Exception as e:
|
788 |
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
789 |
return description
|
790 |
|
791 |
-
def _format_object_count_description(self, class_name: str, count: int,
|
792 |
scene_type: Optional[str] = None,
|
793 |
detected_objects: Optional[List[Dict]] = None,
|
794 |
avg_confidence: float = 0.0) -> str:
|
795 |
"""
|
796 |
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
797 |
-
|
798 |
這個方法是整個物件描述系統的核心,它將多個子功能整合在一起:
|
799 |
1. 數字到文字的轉換(避免阿拉伯數字)
|
800 |
2. 基於場景的材質推斷
|
801 |
3. 空間排列模式的描述
|
802 |
4. 語境化的物件描述
|
803 |
-
|
804 |
Args:
|
805 |
class_name: 標準化後的類別名稱
|
806 |
count: 物件數量
|
807 |
scene_type: 場景類型,用於語境化描述
|
808 |
detected_objects: 該類型的所有檢測物件,用於空間分析
|
809 |
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
810 |
-
|
811 |
Returns:
|
812 |
str: 完整的格式化數量描述
|
813 |
"""
|
@@ -817,14 +931,14 @@ class ObjectDescriptionGenerator:
|
|
817 |
|
818 |
# 獲取基礎的複數形式
|
819 |
plural_form = self._get_plural_form(class_name)
|
820 |
-
|
821 |
# 單數情況的處理
|
822 |
if count == 1:
|
823 |
-
return self._format_single_object_description(class_name, scene_type,
|
824 |
detected_objects, avg_confidence)
|
825 |
-
|
826 |
# 複數情況的處理
|
827 |
-
return self._format_multiple_objects_description(class_name, count, plural_form,
|
828 |
scene_type, detected_objects, avg_confidence)
|
829 |
|
830 |
except Exception as e:
|
@@ -832,55 +946,55 @@ class ObjectDescriptionGenerator:
|
|
832 |
return f"{count} {class_name}s" if count > 1 else class_name
|
833 |
|
834 |
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
835 |
-
detected_objects: Optional[List[Dict]],
|
836 |
avg_confidence: float) -> str:
|
837 |
"""
|
838 |
處理單個物件的描述生成
|
839 |
-
|
840 |
對於單個物件,我們重點在於通過材質推斷和位置描述來豐富描述內容,
|
841 |
避免簡單的 "a chair" 這樣的描述,而是生成 "a wooden dining chair" 這樣的表達
|
842 |
-
|
843 |
Args:
|
844 |
class_name: 物件類別名稱
|
845 |
scene_type: 場景類型
|
846 |
detected_objects: 檢測物件列表
|
847 |
avg_confidence: 平均置信度
|
848 |
-
|
849 |
Returns:
|
850 |
str: 單個物件的完整描述
|
851 |
"""
|
852 |
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
853 |
-
|
854 |
# 獲取材質描述符
|
855 |
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
856 |
-
|
857 |
# 獲取位置或特徵描述符
|
858 |
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
859 |
-
|
860 |
# 組合描述
|
861 |
descriptors = []
|
862 |
if material_descriptor:
|
863 |
descriptors.append(material_descriptor)
|
864 |
if feature_descriptor:
|
865 |
descriptors.append(feature_descriptor)
|
866 |
-
|
867 |
if descriptors:
|
868 |
return f"{article} {' '.join(descriptors)} {class_name}"
|
869 |
else:
|
870 |
return f"{article} {class_name}"
|
871 |
|
872 |
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
873 |
-
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
874 |
avg_confidence: float) -> str:
|
875 |
"""
|
876 |
處理多個物件的描述生成
|
877 |
-
|
878 |
對於多個物件,我們的重點是:
|
879 |
1. 將數字轉換為文字表達
|
880 |
2. 分析空間排列模式
|
881 |
3. 添加適當的材質或功能描述
|
882 |
4. 生成自然流暢的描述
|
883 |
-
|
884 |
Args:
|
885 |
class_name: 物件類別名稱
|
886 |
count: 物件數量
|
@@ -888,17 +1002,17 @@ class ObjectDescriptionGenerator:
|
|
888 |
scene_type: 場景類型
|
889 |
detected_objects: 檢測物件列表
|
890 |
avg_confidence: 平均置信度
|
891 |
-
|
892 |
Returns:
|
893 |
str: 多個物件的完整描述
|
894 |
"""
|
895 |
# 數字到文字的轉換映射
|
896 |
number_words = {
|
897 |
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
898 |
-
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
899 |
11: "eleven", 12: "twelve"
|
900 |
}
|
901 |
-
|
902 |
# 確定基礎數量表達
|
903 |
if count in number_words:
|
904 |
count_expression = number_words[count]
|
@@ -906,48 +1020,48 @@ class ObjectDescriptionGenerator:
|
|
906 |
count_expression = "several"
|
907 |
else:
|
908 |
count_expression = "numerous"
|
909 |
-
|
910 |
# 獲取材質或功能描述符
|
911 |
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
912 |
-
|
913 |
# 獲取空間排列描述
|
914 |
-
spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
|
915 |
detected_objects, count)
|
916 |
-
|
917 |
# 組合最終描述
|
918 |
descriptors = []
|
919 |
if material_descriptor:
|
920 |
descriptors.append(material_descriptor)
|
921 |
-
|
922 |
# 構建基礎描述
|
923 |
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
924 |
-
|
925 |
# 添加空間排列信息
|
926 |
if spatial_descriptor:
|
927 |
return f"{base_description} {spatial_descriptor}"
|
928 |
else:
|
929 |
return base_description
|
930 |
|
931 |
-
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
932 |
avg_confidence: float) -> Optional[str]:
|
933 |
"""
|
934 |
基於場景語境和置信度進行材質推斷
|
935 |
-
|
936 |
這個方法實現了智能的材質推斷,它不依賴複雜的圖像分析,
|
937 |
而是基於常識和場景邏輯來推斷最可能的材質描述
|
938 |
-
|
939 |
Args:
|
940 |
class_name: 物件類別名稱
|
941 |
scene_type: 場景類型
|
942 |
avg_confidence: 檢測置信度,影響推斷的保守程度
|
943 |
-
|
944 |
Returns:
|
945 |
Optional[str]: 材質描述符,如果無法推斷則返回None
|
946 |
"""
|
947 |
# 只有在置信度足夠高時才進行材質推斷
|
948 |
if avg_confidence < 0.5:
|
949 |
return None
|
950 |
-
|
951 |
# 餐廳和用餐相關場景
|
952 |
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
953 |
material_mapping = {
|
@@ -957,7 +1071,7 @@ class ObjectDescriptionGenerator:
|
|
957 |
"vase": "decorative"
|
958 |
}
|
959 |
return material_mapping.get(class_name)
|
960 |
-
|
961 |
# 辦公場景
|
962 |
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
963 |
material_mapping = {
|
@@ -967,7 +1081,7 @@ class ObjectDescriptionGenerator:
|
|
967 |
"book": "reference"
|
968 |
}
|
969 |
return material_mapping.get(class_name)
|
970 |
-
|
971 |
# 客廳場景
|
972 |
elif scene_type and scene_type in ["living_room"]:
|
973 |
material_mapping = {
|
@@ -977,7 +1091,7 @@ class ObjectDescriptionGenerator:
|
|
977 |
"vase": "decorative"
|
978 |
}
|
979 |
return material_mapping.get(class_name)
|
980 |
-
|
981 |
# 室外場景
|
982 |
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
983 |
material_mapping = {
|
@@ -986,7 +1100,7 @@ class ObjectDescriptionGenerator:
|
|
986 |
"bicycle": "stationed"
|
987 |
}
|
988 |
return material_mapping.get(class_name)
|
989 |
-
|
990 |
# 如果沒有特定的場景映射,返回通用描述符
|
991 |
generic_mapping = {
|
992 |
"chair": "comfortable",
|
@@ -994,30 +1108,30 @@ class ObjectDescriptionGenerator:
|
|
994 |
"car": "parked",
|
995 |
"person": "present"
|
996 |
}
|
997 |
-
|
998 |
return generic_mapping.get(class_name)
|
999 |
|
1000 |
def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
|
1001 |
-
detected_objects: Optional[List[Dict]],
|
1002 |
count: int) -> Optional[str]:
|
1003 |
"""
|
1004 |
分析物件的空間排列模式並生成相應描述
|
1005 |
-
|
1006 |
這個方法通過分析物件的位置分布來判斷排列模式,
|
1007 |
然後根據物件類型和場景生成適當的空間描述
|
1008 |
-
|
1009 |
Args:
|
1010 |
class_name: 物件類別名稱
|
1011 |
scene_type: 場景類型
|
1012 |
detected_objects: 該類型的所有檢測物件
|
1013 |
count: 物件數量
|
1014 |
-
|
1015 |
Returns:
|
1016 |
Optional[str]: 空間排列描述,如果無法分析則返回None
|
1017 |
"""
|
1018 |
if not detected_objects or len(detected_objects) < 2:
|
1019 |
return None
|
1020 |
-
|
1021 |
try:
|
1022 |
# 提取物件的標準化位置
|
1023 |
positions = []
|
@@ -1025,17 +1139,17 @@ class ObjectDescriptionGenerator:
|
|
1025 |
center = obj.get("normalized_center", [0.5, 0.5])
|
1026 |
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
1027 |
positions.append(center)
|
1028 |
-
|
1029 |
if len(positions) < 2:
|
1030 |
return None
|
1031 |
-
|
1032 |
# 分析排列模式
|
1033 |
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
1034 |
-
|
1035 |
# 根據物件類型和場景生成描述
|
1036 |
-
return self._generate_arrangement_description(class_name, scene_type,
|
1037 |
arrangement_pattern, count)
|
1038 |
-
|
1039 |
except Exception as e:
|
1040 |
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
1041 |
return None
|
@@ -1043,43 +1157,43 @@ class ObjectDescriptionGenerator:
|
|
1043 |
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
1044 |
"""
|
1045 |
分析位置點的排列模式
|
1046 |
-
|
1047 |
這個方法使用簡單的幾何分析來判斷物件的排列類型,
|
1048 |
幫助我們理解物件在空間中的組織方式
|
1049 |
-
|
1050 |
Args:
|
1051 |
positions: 標準化的位置座標列表
|
1052 |
-
|
1053 |
Returns:
|
1054 |
str: 排列模式類型(linear, clustered, scattered, circular等)
|
1055 |
"""
|
1056 |
import numpy as np
|
1057 |
-
|
1058 |
if len(positions) < 2:
|
1059 |
return "single"
|
1060 |
-
|
1061 |
# 轉換為numpy陣列便於計算
|
1062 |
pos_array = np.array(positions)
|
1063 |
-
|
1064 |
# 計算位置的分布特徵
|
1065 |
x_coords = pos_array[:, 0]
|
1066 |
y_coords = pos_array[:, 1]
|
1067 |
-
|
1068 |
# 分析x和y方向的變異程度
|
1069 |
x_variance = np.var(x_coords)
|
1070 |
y_variance = np.var(y_coords)
|
1071 |
-
|
1072 |
# 計算物件間的平均距離
|
1073 |
distances = []
|
1074 |
for i in range(len(positions)):
|
1075 |
for j in range(i + 1, len(positions)):
|
1076 |
-
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
1077 |
(positions[i][1] - positions[j][1])**2)
|
1078 |
distances.append(dist)
|
1079 |
-
|
1080 |
avg_distance = np.mean(distances) if distances else 0
|
1081 |
distance_variance = np.var(distances) if distances else 0
|
1082 |
-
|
1083 |
# ���斷排列模式
|
1084 |
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
1085 |
return "circular"
|
@@ -1097,35 +1211,35 @@ class ObjectDescriptionGenerator:
|
|
1097 |
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
1098 |
"""
|
1099 |
檢查位置是否形成圓形或環形排列
|
1100 |
-
|
1101 |
Args:
|
1102 |
positions: 位置座標列表
|
1103 |
-
|
1104 |
Returns:
|
1105 |
bool: 是否為圓形排列
|
1106 |
"""
|
1107 |
import numpy as np
|
1108 |
-
|
1109 |
if len(positions) < 4:
|
1110 |
return False
|
1111 |
-
|
1112 |
try:
|
1113 |
pos_array = np.array(positions)
|
1114 |
-
|
1115 |
# 計算中心點
|
1116 |
center_x = np.mean(pos_array[:, 0])
|
1117 |
center_y = np.mean(pos_array[:, 1])
|
1118 |
-
|
1119 |
# 計算每個點到中心的距離
|
1120 |
distances_to_center = []
|
1121 |
for pos in positions:
|
1122 |
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
1123 |
distances_to_center.append(dist)
|
1124 |
-
|
1125 |
# 如果所有距離都相近,可能是圓形排列
|
1126 |
distance_variance = np.var(distances_to_center)
|
1127 |
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
1128 |
-
|
1129 |
except:
|
1130 |
return False
|
1131 |
|
@@ -1133,16 +1247,16 @@ class ObjectDescriptionGenerator:
|
|
1133 |
arrangement_pattern: str, count: int) -> Optional[str]:
|
1134 |
"""
|
1135 |
根據物件類型、場景和排列模式生成空間描述
|
1136 |
-
|
1137 |
這個方法將抽象的排列模式轉換為自然語言描述,
|
1138 |
並根據具體的物件類型和場景語境進行定制
|
1139 |
-
|
1140 |
Args:
|
1141 |
class_name: 物件類別名稱
|
1142 |
scene_type: 場景類型
|
1143 |
arrangement_pattern: 排列模式
|
1144 |
count: 物件數量
|
1145 |
-
|
1146 |
Returns:
|
1147 |
Optional[str]: 生成的空間排列描述
|
1148 |
"""
|
@@ -1178,7 +1292,7 @@ class ObjectDescriptionGenerator:
|
|
1178 |
"distributed": "positioned throughout the scene"
|
1179 |
}
|
1180 |
}
|
1181 |
-
|
1182 |
# 獲取對應的描述模板
|
1183 |
if class_name in arrangement_templates:
|
1184 |
template_dict = arrangement_templates[class_name]
|
@@ -1194,30 +1308,30 @@ class ObjectDescriptionGenerator:
|
|
1194 |
"distributed": "thoughtfully placed"
|
1195 |
}
|
1196 |
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
1197 |
-
|
1198 |
return base_description
|
1199 |
|
1200 |
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
1201 |
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
1202 |
"""
|
1203 |
為單個物件生成特徵描述符
|
1204 |
-
|
1205 |
當只有一個物件時,我們可以提供更具體的位置或功能描述
|
1206 |
-
|
1207 |
Args:
|
1208 |
class_name: 物件類別名稱
|
1209 |
scene_type: 場景類型
|
1210 |
detected_objects: 檢測物件(單個)
|
1211 |
-
|
1212 |
Returns:
|
1213 |
Optional[str]: 特徵描述符
|
1214 |
"""
|
1215 |
if not detected_objects or len(detected_objects) != 1:
|
1216 |
return None
|
1217 |
-
|
1218 |
obj = detected_objects[0]
|
1219 |
region = obj.get("region", "").lower()
|
1220 |
-
|
1221 |
# 基於位置的描述
|
1222 |
if "center" in region:
|
1223 |
if class_name == "dining table":
|
@@ -1226,14 +1340,14 @@ class ObjectDescriptionGenerator:
|
|
1226 |
return "centrally placed"
|
1227 |
elif "corner" in region or "left" in region or "right" in region:
|
1228 |
return "positioned"
|
1229 |
-
|
1230 |
# 基於場景的功能描述
|
1231 |
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
1232 |
if class_name == "chair":
|
1233 |
return "dining"
|
1234 |
elif class_name == "vase":
|
1235 |
return "decorative"
|
1236 |
-
|
1237 |
return None
|
1238 |
|
1239 |
def _get_plural_form(self, word: str) -> str:
|
|
|
1 |
import logging
|
2 |
import traceback
|
|
|
3 |
from typing import Dict, List, Tuple, Optional, Any
|
4 |
import numpy as np
|
5 |
|
|
|
388 |
|
389 |
def optimize_object_description(self, description: str) -> str:
|
390 |
"""
|
391 |
+
優化物件描述文本,消除冗餘重複並改善表達流暢度
|
392 |
+
|
393 |
+
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
394 |
+
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
395 |
+
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
396 |
|
397 |
Args:
|
398 |
+
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
399 |
|
400 |
Returns:
|
401 |
+
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
402 |
"""
|
403 |
try:
|
404 |
import re
|
405 |
+
|
406 |
+
# 1. 處理冗餘的空間限定表達
|
407 |
+
# 使用通用模式來識別和移除不必要的空間描述
|
408 |
+
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
409 |
+
description = self._remove_redundant_spatial_qualifiers(description)
|
410 |
+
|
411 |
+
# 2. 識別並處理物件列表的重複問題
|
412 |
+
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表模式
|
413 |
+
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
414 |
+
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
415 |
+
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
416 |
+
|
417 |
+
# 遍歷每個找到的物件列表進行重複檢測和優化
|
418 |
for obj_list in object_lists:
|
419 |
+
# 3. 解析單個物件列表中的項目
|
420 |
+
# 使用更精確的正則表達式來分割物件項目
|
421 |
+
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
422 |
+
# 需要特別注意處理最後一個 "and" 的情況
|
423 |
+
|
424 |
+
# 先處理逗號格式 "A, B, and C"
|
425 |
+
if ", and " in obj_list:
|
426 |
+
# 分割 ", and " 前後的部分
|
427 |
+
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
428 |
+
last_item = obj_list.rsplit(", and ", 1)[1]
|
429 |
+
|
430 |
+
# 處理前面的項目(用逗號分割)
|
431 |
+
front_items = [item.strip() for item in before_last_and.split(",")]
|
432 |
+
# 添加最後一個項目
|
433 |
+
all_items = front_items + [last_item.strip()]
|
434 |
+
elif " and " in obj_list:
|
435 |
+
# 處理簡單的 "A and B" 格式
|
436 |
+
all_items = [item.strip() for item in obj_list.split(" and ")]
|
437 |
+
else:
|
438 |
+
# 處理純逗號分隔的列表
|
439 |
+
all_items = [item.strip() for item in obj_list.split(",")]
|
440 |
+
|
441 |
+
# 4. 統計物件出現頻率
|
442 |
+
# 建立字典來記錄每個物件的出現次數
|
443 |
item_counts = {}
|
444 |
+
|
445 |
+
for item in all_items:
|
446 |
+
# 清理項目文字並過濾無效內容
|
447 |
item = item.strip()
|
448 |
+
# 過濾掉連接詞和空白項目
|
449 |
+
if item and item not in ["and", "with", ""]:
|
450 |
+
# 移除可能的冠詞前綴以便正確計數
|
451 |
+
# 例如 "a car" 和 "car" 應該被視為同一項目
|
452 |
+
clean_item = self._normalize_item_for_counting(item)
|
453 |
+
if clean_item not in item_counts:
|
454 |
+
item_counts[clean_item] = 0
|
455 |
+
item_counts[clean_item] += 1
|
456 |
+
|
457 |
+
# 5. 生成優化後的物件列表
|
458 |
if item_counts:
|
459 |
new_items = []
|
460 |
+
|
461 |
for item, count in item_counts.items():
|
462 |
if count > 1:
|
463 |
+
# 對於重複項目,使用數字加複數形式
|
464 |
+
plural_item = self._make_plural(item)
|
465 |
+
new_items.append(f"{count} {plural_item}")
|
466 |
else:
|
467 |
+
# 單個項目保持原樣
|
468 |
new_items.append(item)
|
469 |
+
|
470 |
+
# 6. 重新格式化物件列表
|
471 |
+
# 使用標準的英文列表連接格式
|
472 |
if len(new_items) == 1:
|
473 |
new_list = new_items[0]
|
474 |
elif len(new_items) == 2:
|
475 |
new_list = f"{new_items[0]} and {new_items[1]}"
|
476 |
else:
|
477 |
+
# 使用逗號格式確保清晰度
|
478 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
479 |
+
|
480 |
+
# 7. 在原文中替換優化後的列表
|
481 |
+
# 將原始的冗餘列表替換為優化後的簡潔版本
|
482 |
description = description.replace(obj_list, new_list)
|
483 |
+
|
484 |
return description
|
485 |
+
|
486 |
except Exception as e:
|
487 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
488 |
return description
|
489 |
|
490 |
+
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
491 |
+
"""
|
492 |
+
移除描述中冗餘的空間限定詞
|
493 |
+
|
494 |
+
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
495 |
+
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
496 |
+
床這個物件本身就是室內環境。
|
497 |
+
|
498 |
+
Args:
|
499 |
+
description: 包含可能多餘空間描述的文本
|
500 |
+
|
501 |
+
Returns:
|
502 |
+
str: 移除多餘空間限定詞後的文本
|
503 |
+
"""
|
504 |
+
import re
|
505 |
+
|
506 |
+
# 定義常見的多餘空間表達模式
|
507 |
+
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
508 |
+
redundant_patterns = [
|
509 |
+
# 室內物件的多餘房間描述
|
510 |
+
(r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
|
511 |
+
# 廚房物件的多餘描述
|
512 |
+
(r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
|
513 |
+
# 浴室物件的多餘描述
|
514 |
+
(r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
|
515 |
+
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
516 |
+
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
517 |
+
]
|
518 |
+
|
519 |
+
for pattern, replacement in redundant_patterns:
|
520 |
+
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
521 |
+
|
522 |
+
return description
|
523 |
+
|
524 |
+
|
525 |
+
def _normalize_item_for_counting(self, item: str) -> str:
|
526 |
+
"""
|
527 |
+
正規化物件項目以便準確計數
|
528 |
+
|
529 |
+
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
530 |
+
確保 "a car" 和 "car" 被視為同一物件類型。
|
531 |
+
|
532 |
+
Args:
|
533 |
+
item: 原始物件項目字串
|
534 |
+
|
535 |
+
Returns:
|
536 |
+
str: 正規化後的物件項目
|
537 |
+
"""
|
538 |
+
# 移除常見的英文冠詞
|
539 |
+
item = re.sub(r'^(a|an|the)\s+', '', item.lower())
|
540 |
+
return item.strip()
|
541 |
+
|
542 |
+
def _make_plural(self, item: str) -> str:
|
543 |
+
"""
|
544 |
+
將單數名詞轉換為複數形式
|
545 |
+
|
546 |
+
Args:
|
547 |
+
item: 單數形式的名詞
|
548 |
+
|
549 |
+
Returns:
|
550 |
+
str: 複數形式的名詞
|
551 |
+
"""
|
552 |
+
# 重用已經實現的複數化邏輯
|
553 |
+
if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
|
554 |
+
return item[:-1] + "ies"
|
555 |
+
elif item.endswith(("s", "sh", "ch", "x", "z")):
|
556 |
+
return item + "es"
|
557 |
+
elif not item.endswith("s"):
|
558 |
+
return item + "s"
|
559 |
+
else:
|
560 |
+
return item
|
561 |
+
|
562 |
def generate_dynamic_everyday_description(self,
|
563 |
detected_objects: List[Dict],
|
564 |
lighting_info: Optional[Dict] = None,
|
|
|
754 |
if object_statistics and class_name in object_statistics:
|
755 |
actual_count = object_statistics[class_name]["count"]
|
756 |
formatted_name_with_exact_count = self._format_object_count_description(
|
757 |
+
normalized_class_name,
|
758 |
actual_count,
|
759 |
+
scene_type=scene_type
|
760 |
)
|
761 |
else:
|
762 |
formatted_name_with_exact_count = self._format_object_count_description(
|
763 |
+
normalized_class_name,
|
764 |
count,
|
765 |
+
scene_type=scene_type
|
766 |
)
|
767 |
|
768 |
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
|
|
861 |
def _remove_repetitive_descriptors(self, description: str) -> str:
|
862 |
"""
|
863 |
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
864 |
+
|
865 |
Args:
|
866 |
description: 原始描述文本
|
867 |
+
|
868 |
Returns:
|
869 |
str: 清理後的描述文本
|
870 |
"""
|
871 |
try:
|
872 |
import re
|
873 |
+
|
874 |
# 定義需要移除或替換的模式
|
875 |
cleanup_patterns = [
|
876 |
# 移除 "identical" 描述模式
|
877 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
878 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
879 |
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
880 |
+
|
881 |
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
882 |
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
883 |
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
884 |
(r'\bcomprehensive display of\b', 'display of'),
|
885 |
+
|
886 |
# 簡化過度描述性的短語
|
887 |
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
888 |
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
889 |
]
|
890 |
+
|
891 |
processed_description = description
|
892 |
for pattern, replacement in cleanup_patterns:
|
893 |
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
894 |
+
|
895 |
# 進一步清理可能的多餘空格
|
896 |
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
897 |
+
|
898 |
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
899 |
return processed_description
|
900 |
+
|
901 |
except Exception as e:
|
902 |
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
903 |
return description
|
904 |
|
905 |
+
def _format_object_count_description(self, class_name: str, count: int,
|
906 |
scene_type: Optional[str] = None,
|
907 |
detected_objects: Optional[List[Dict]] = None,
|
908 |
avg_confidence: float = 0.0) -> str:
|
909 |
"""
|
910 |
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
911 |
+
|
912 |
這個方法是整個物件描述系統的核心,它將多個子功能整合在一起:
|
913 |
1. 數字到文字的轉換(避免阿拉伯數字)
|
914 |
2. 基於場景的材質推斷
|
915 |
3. 空間排列模式的描述
|
916 |
4. 語境化的物件描述
|
917 |
+
|
918 |
Args:
|
919 |
class_name: 標準化後的類別名稱
|
920 |
count: 物件數量
|
921 |
scene_type: 場景類型,用於語境化描述
|
922 |
detected_objects: 該類型的所有檢測物件,用於空間分析
|
923 |
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
924 |
+
|
925 |
Returns:
|
926 |
str: 完整的格式化數量描述
|
927 |
"""
|
|
|
931 |
|
932 |
# 獲取基礎的複數形式
|
933 |
plural_form = self._get_plural_form(class_name)
|
934 |
+
|
935 |
# 單數情況的處理
|
936 |
if count == 1:
|
937 |
+
return self._format_single_object_description(class_name, scene_type,
|
938 |
detected_objects, avg_confidence)
|
939 |
+
|
940 |
# 複數情況的處理
|
941 |
+
return self._format_multiple_objects_description(class_name, count, plural_form,
|
942 |
scene_type, detected_objects, avg_confidence)
|
943 |
|
944 |
except Exception as e:
|
|
|
946 |
return f"{count} {class_name}s" if count > 1 else class_name
|
947 |
|
948 |
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
949 |
+
detected_objects: Optional[List[Dict]],
|
950 |
avg_confidence: float) -> str:
|
951 |
"""
|
952 |
處理單個物件的描述生成
|
953 |
+
|
954 |
對於單個物件,我們重點在於通過材質推斷和位置描述來豐富描述內容,
|
955 |
避免簡單的 "a chair" 這樣的描述,而是生成 "a wooden dining chair" 這樣的表達
|
956 |
+
|
957 |
Args:
|
958 |
class_name: 物件類別名稱
|
959 |
scene_type: 場景類型
|
960 |
detected_objects: 檢測物件列表
|
961 |
avg_confidence: 平均置信度
|
962 |
+
|
963 |
Returns:
|
964 |
str: 單個物件的完整描述
|
965 |
"""
|
966 |
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
967 |
+
|
968 |
# 獲取材質描述符
|
969 |
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
970 |
+
|
971 |
# 獲取位置或特徵描述符
|
972 |
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
973 |
+
|
974 |
# 組合描述
|
975 |
descriptors = []
|
976 |
if material_descriptor:
|
977 |
descriptors.append(material_descriptor)
|
978 |
if feature_descriptor:
|
979 |
descriptors.append(feature_descriptor)
|
980 |
+
|
981 |
if descriptors:
|
982 |
return f"{article} {' '.join(descriptors)} {class_name}"
|
983 |
else:
|
984 |
return f"{article} {class_name}"
|
985 |
|
986 |
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
987 |
+
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
988 |
avg_confidence: float) -> str:
|
989 |
"""
|
990 |
處理多個物件的描述生成
|
991 |
+
|
992 |
對於多個物件,我們的重點是:
|
993 |
1. 將數字轉換為文字表達
|
994 |
2. 分析空間排列模式
|
995 |
3. 添加適當的材質或功能描述
|
996 |
4. 生成自然流暢的描述
|
997 |
+
|
998 |
Args:
|
999 |
class_name: 物件類別名稱
|
1000 |
count: 物件數量
|
|
|
1002 |
scene_type: 場景類型
|
1003 |
detected_objects: 檢測物件列表
|
1004 |
avg_confidence: 平均置信度
|
1005 |
+
|
1006 |
Returns:
|
1007 |
str: 多個物件的完整描述
|
1008 |
"""
|
1009 |
# 數字到文字的轉換映射
|
1010 |
number_words = {
|
1011 |
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
1012 |
+
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
1013 |
11: "eleven", 12: "twelve"
|
1014 |
}
|
1015 |
+
|
1016 |
# 確定基礎數量表達
|
1017 |
if count in number_words:
|
1018 |
count_expression = number_words[count]
|
|
|
1020 |
count_expression = "several"
|
1021 |
else:
|
1022 |
count_expression = "numerous"
|
1023 |
+
|
1024 |
# 獲取材質或功能描述符
|
1025 |
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
1026 |
+
|
1027 |
# 獲取空間排列描述
|
1028 |
+
spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
|
1029 |
detected_objects, count)
|
1030 |
+
|
1031 |
# 組合最終描述
|
1032 |
descriptors = []
|
1033 |
if material_descriptor:
|
1034 |
descriptors.append(material_descriptor)
|
1035 |
+
|
1036 |
# 構建基礎描述
|
1037 |
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
1038 |
+
|
1039 |
# 添加空間排列信息
|
1040 |
if spatial_descriptor:
|
1041 |
return f"{base_description} {spatial_descriptor}"
|
1042 |
else:
|
1043 |
return base_description
|
1044 |
|
1045 |
+
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
1046 |
avg_confidence: float) -> Optional[str]:
|
1047 |
"""
|
1048 |
基於場景語境和置信度進行材質推斷
|
1049 |
+
|
1050 |
這個方法實現了智能的材質推斷,它不依賴複雜的圖像分析,
|
1051 |
而是基於常識和場景邏輯來推斷最可能的材質描述
|
1052 |
+
|
1053 |
Args:
|
1054 |
class_name: 物件類別名稱
|
1055 |
scene_type: 場景類型
|
1056 |
avg_confidence: 檢測置信度,影響推斷的保守程度
|
1057 |
+
|
1058 |
Returns:
|
1059 |
Optional[str]: 材質描述符,如果無法推斷則返回None
|
1060 |
"""
|
1061 |
# 只有在置信度足夠高時才進行材質推斷
|
1062 |
if avg_confidence < 0.5:
|
1063 |
return None
|
1064 |
+
|
1065 |
# 餐廳和用餐相關場景
|
1066 |
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
1067 |
material_mapping = {
|
|
|
1071 |
"vase": "decorative"
|
1072 |
}
|
1073 |
return material_mapping.get(class_name)
|
1074 |
+
|
1075 |
# 辦公場景
|
1076 |
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
1077 |
material_mapping = {
|
|
|
1081 |
"book": "reference"
|
1082 |
}
|
1083 |
return material_mapping.get(class_name)
|
1084 |
+
|
1085 |
# 客廳場景
|
1086 |
elif scene_type and scene_type in ["living_room"]:
|
1087 |
material_mapping = {
|
|
|
1091 |
"vase": "decorative"
|
1092 |
}
|
1093 |
return material_mapping.get(class_name)
|
1094 |
+
|
1095 |
# 室外場景
|
1096 |
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
1097 |
material_mapping = {
|
|
|
1100 |
"bicycle": "stationed"
|
1101 |
}
|
1102 |
return material_mapping.get(class_name)
|
1103 |
+
|
1104 |
# 如果沒有特定的場景映射,返回通用描述符
|
1105 |
generic_mapping = {
|
1106 |
"chair": "comfortable",
|
|
|
1108 |
"car": "parked",
|
1109 |
"person": "present"
|
1110 |
}
|
1111 |
+
|
1112 |
return generic_mapping.get(class_name)
|
1113 |
|
1114 |
def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
|
1115 |
+
detected_objects: Optional[List[Dict]],
|
1116 |
count: int) -> Optional[str]:
|
1117 |
"""
|
1118 |
分析物件的空間排列模式並生成相應描述
|
1119 |
+
|
1120 |
這個方法通過分析物件的位置分布來判斷排列模式,
|
1121 |
然後根據物件類型和場景生成適當的空間描述
|
1122 |
+
|
1123 |
Args:
|
1124 |
class_name: 物件類別名稱
|
1125 |
scene_type: 場景類型
|
1126 |
detected_objects: 該類型的所有檢測物件
|
1127 |
count: 物件數量
|
1128 |
+
|
1129 |
Returns:
|
1130 |
Optional[str]: 空間排列描述,如果無法分析則返回None
|
1131 |
"""
|
1132 |
if not detected_objects or len(detected_objects) < 2:
|
1133 |
return None
|
1134 |
+
|
1135 |
try:
|
1136 |
# 提取物件的標準化位置
|
1137 |
positions = []
|
|
|
1139 |
center = obj.get("normalized_center", [0.5, 0.5])
|
1140 |
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
1141 |
positions.append(center)
|
1142 |
+
|
1143 |
if len(positions) < 2:
|
1144 |
return None
|
1145 |
+
|
1146 |
# 分析排列模式
|
1147 |
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
1148 |
+
|
1149 |
# 根據物件類型和場景生成描述
|
1150 |
+
return self._generate_arrangement_description(class_name, scene_type,
|
1151 |
arrangement_pattern, count)
|
1152 |
+
|
1153 |
except Exception as e:
|
1154 |
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
1155 |
return None
|
|
|
1157 |
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
1158 |
"""
|
1159 |
分析位置點的排列模式
|
1160 |
+
|
1161 |
這個方法使用簡單的幾何分析來判斷物件的排列類型,
|
1162 |
幫助我們理解物件在空間中的組織方式
|
1163 |
+
|
1164 |
Args:
|
1165 |
positions: 標準化的位置座標列表
|
1166 |
+
|
1167 |
Returns:
|
1168 |
str: 排列模式類型(linear, clustered, scattered, circular等)
|
1169 |
"""
|
1170 |
import numpy as np
|
1171 |
+
|
1172 |
if len(positions) < 2:
|
1173 |
return "single"
|
1174 |
+
|
1175 |
# 轉換為numpy陣列便於計算
|
1176 |
pos_array = np.array(positions)
|
1177 |
+
|
1178 |
# 計算位置的分布特徵
|
1179 |
x_coords = pos_array[:, 0]
|
1180 |
y_coords = pos_array[:, 1]
|
1181 |
+
|
1182 |
# 分析x和y方向的變異程度
|
1183 |
x_variance = np.var(x_coords)
|
1184 |
y_variance = np.var(y_coords)
|
1185 |
+
|
1186 |
# 計算物件間的平均距離
|
1187 |
distances = []
|
1188 |
for i in range(len(positions)):
|
1189 |
for j in range(i + 1, len(positions)):
|
1190 |
+
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
1191 |
(positions[i][1] - positions[j][1])**2)
|
1192 |
distances.append(dist)
|
1193 |
+
|
1194 |
avg_distance = np.mean(distances) if distances else 0
|
1195 |
distance_variance = np.var(distances) if distances else 0
|
1196 |
+
|
1197 |
# ���斷排列模式
|
1198 |
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
1199 |
return "circular"
|
|
|
1211 |
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
1212 |
"""
|
1213 |
檢查位置是否形成圓形或環形排列
|
1214 |
+
|
1215 |
Args:
|
1216 |
positions: 位置座標列表
|
1217 |
+
|
1218 |
Returns:
|
1219 |
bool: 是否為圓形排列
|
1220 |
"""
|
1221 |
import numpy as np
|
1222 |
+
|
1223 |
if len(positions) < 4:
|
1224 |
return False
|
1225 |
+
|
1226 |
try:
|
1227 |
pos_array = np.array(positions)
|
1228 |
+
|
1229 |
# 計算中心點
|
1230 |
center_x = np.mean(pos_array[:, 0])
|
1231 |
center_y = np.mean(pos_array[:, 1])
|
1232 |
+
|
1233 |
# 計算每個點到中心的距離
|
1234 |
distances_to_center = []
|
1235 |
for pos in positions:
|
1236 |
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
1237 |
distances_to_center.append(dist)
|
1238 |
+
|
1239 |
# 如果所有距離都相近,可能是圓形排列
|
1240 |
distance_variance = np.var(distances_to_center)
|
1241 |
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
1242 |
+
|
1243 |
except:
|
1244 |
return False
|
1245 |
|
|
|
1247 |
arrangement_pattern: str, count: int) -> Optional[str]:
|
1248 |
"""
|
1249 |
根據物件類型、場景和排列模式生成空間描述
|
1250 |
+
|
1251 |
這個方法將抽象的排列模式轉換為自然語言描述,
|
1252 |
並根據具體的物件類型和場景語境進行定制
|
1253 |
+
|
1254 |
Args:
|
1255 |
class_name: 物件類別名稱
|
1256 |
scene_type: 場景類型
|
1257 |
arrangement_pattern: 排列模式
|
1258 |
count: 物件數量
|
1259 |
+
|
1260 |
Returns:
|
1261 |
Optional[str]: 生成的空間排列描述
|
1262 |
"""
|
|
|
1292 |
"distributed": "positioned throughout the scene"
|
1293 |
}
|
1294 |
}
|
1295 |
+
|
1296 |
# 獲取對應的描述模板
|
1297 |
if class_name in arrangement_templates:
|
1298 |
template_dict = arrangement_templates[class_name]
|
|
|
1308 |
"distributed": "thoughtfully placed"
|
1309 |
}
|
1310 |
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
1311 |
+
|
1312 |
return base_description
|
1313 |
|
1314 |
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
1315 |
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
1316 |
"""
|
1317 |
為單個物件生成特徵描述符
|
1318 |
+
|
1319 |
當只有一個物件時,我們可以提供更具體的位置或功能描述
|
1320 |
+
|
1321 |
Args:
|
1322 |
class_name: 物件類別名稱
|
1323 |
scene_type: 場景類型
|
1324 |
detected_objects: 檢測物件(單個)
|
1325 |
+
|
1326 |
Returns:
|
1327 |
Optional[str]: 特徵描述符
|
1328 |
"""
|
1329 |
if not detected_objects or len(detected_objects) != 1:
|
1330 |
return None
|
1331 |
+
|
1332 |
obj = detected_objects[0]
|
1333 |
region = obj.get("region", "").lower()
|
1334 |
+
|
1335 |
# 基於位置的描述
|
1336 |
if "center" in region:
|
1337 |
if class_name == "dining table":
|
|
|
1340 |
return "centrally placed"
|
1341 |
elif "corner" in region or "left" in region or "right" in region:
|
1342 |
return "positioned"
|
1343 |
+
|
1344 |
# 基於場景的功能描述
|
1345 |
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
1346 |
if class_name == "chair":
|
1347 |
return "dining"
|
1348 |
elif class_name == "vase":
|
1349 |
return "decorative"
|
1350 |
+
|
1351 |
return None
|
1352 |
|
1353 |
def _get_plural_form(self, word: str) -> str:
|