Spaces:
Running
on
Zero
Running
on
Zero
Upload 14 files
Browse files- content_generator.py +569 -0
- functional_zone_detector.py +298 -0
- object_description_generator.py +76 -1191
- object_group_processor.py +397 -0
- pattern_analyzer.py +371 -0
- prominence_calculator.py +147 -0
- scene_zone_identifier.py +35 -1121
- spatial_location_handler.py +346 -0
- specialized_scene_processor.py +527 -0
- statistics_processor.py +343 -0
- template_manager.py +0 -0
- template_processor.py +429 -0
- template_repository.py +834 -0
- text_optimizer.py +616 -0
content_generator.py
ADDED
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import random
|
3 |
+
import re
|
4 |
+
from typing import Dict, List, Optional, Union, Any
|
5 |
+
|
6 |
+
class ContentGenerator:
|
7 |
+
"""
|
8 |
+
內容生成器 - 負責基礎內容生成和佔位符替換邏輯
|
9 |
+
|
10 |
+
此類別專門處理模板中的動態內容生成,包括物件摘要、
|
11 |
+
場景特定內容生成,以及提供默認的替換字典。
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
"""初始化內容生成器"""
|
16 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
17 |
+
|
18 |
+
# 預載入默認替換內容
|
19 |
+
self.default_replacements = self._generate_default_replacements()
|
20 |
+
|
21 |
+
self.logger.debug("ContentGenerator initialized successfully")
|
22 |
+
|
23 |
+
def _generate_default_replacements(self) -> Dict[str, str]:
|
24 |
+
"""
|
25 |
+
生成默認的模板替換內容
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
Dict[str, str]: 默認替換內容字典
|
29 |
+
"""
|
30 |
+
return {
|
31 |
+
# 場景介紹相關
|
32 |
+
"scene_introduction": "this scene",
|
33 |
+
"location_prefix": "this location",
|
34 |
+
"setting_description": "this setting",
|
35 |
+
"area_description": "this area",
|
36 |
+
"environment_description": "this environment",
|
37 |
+
"spatial_introduction": "this space",
|
38 |
+
|
39 |
+
# 室內相關
|
40 |
+
"furniture": "various furniture pieces",
|
41 |
+
"seating": "comfortable seating",
|
42 |
+
"electronics": "entertainment devices",
|
43 |
+
"bed_type": "a bed",
|
44 |
+
"bed_location": "room",
|
45 |
+
"bed_description": "sleeping arrangements",
|
46 |
+
"extras": "personal items",
|
47 |
+
"table_setup": "a dining table and chairs",
|
48 |
+
"table_description": "a dining surface",
|
49 |
+
"dining_items": "dining furniture and tableware",
|
50 |
+
"appliances": "kitchen appliances",
|
51 |
+
"kitchen_items": "cooking utensils and dishware",
|
52 |
+
"cooking_equipment": "cooking equipment",
|
53 |
+
"office_equipment": "work-related furniture and devices",
|
54 |
+
"desk_setup": "a desk and chair",
|
55 |
+
"computer_equipment": "electronic devices",
|
56 |
+
|
57 |
+
# 室外/城市相關
|
58 |
+
"traffic_description": "vehicles and pedestrians",
|
59 |
+
"people_and_vehicles": "people and various vehicles",
|
60 |
+
"street_elements": "urban infrastructure",
|
61 |
+
"park_features": "benches and greenery",
|
62 |
+
"outdoor_elements": "natural features",
|
63 |
+
"park_description": "outdoor amenities",
|
64 |
+
"store_elements": "merchandise displays",
|
65 |
+
"shopping_activity": "customers browse and shop",
|
66 |
+
"store_items": "products for sale",
|
67 |
+
|
68 |
+
# 高級餐廳相關
|
69 |
+
"design_elements": "elegant decor",
|
70 |
+
"lighting": "stylish lighting fixtures",
|
71 |
+
|
72 |
+
# 亞洲商業街相關
|
73 |
+
"storefront_features": "compact shops",
|
74 |
+
"pedestrian_flow": "people walking",
|
75 |
+
"asian_elements": "distinctive cultural elements",
|
76 |
+
"cultural_elements": "traditional design features",
|
77 |
+
"signage": "colorful signs",
|
78 |
+
"street_activities": "busy urban activity",
|
79 |
+
|
80 |
+
# 金融區相關
|
81 |
+
"buildings": "tall buildings",
|
82 |
+
"traffic_elements": "vehicles",
|
83 |
+
"skyscrapers": "high-rise buildings",
|
84 |
+
"road_features": "wide streets",
|
85 |
+
"architectural_elements": "modern architecture",
|
86 |
+
"city_landmarks": "prominent structures",
|
87 |
+
|
88 |
+
# 十字路口相關
|
89 |
+
"crossing_pattern": "clearly marked pedestrian crossings",
|
90 |
+
"pedestrian_behavior": "careful pedestrian movement",
|
91 |
+
"pedestrian_density": "multiple groups of pedestrians",
|
92 |
+
"traffic_pattern": "well-regulated traffic flow",
|
93 |
+
"pedestrian_flow": "steady pedestrian movement",
|
94 |
+
"traffic_description": "active urban traffic",
|
95 |
+
"people_and_vehicles": "pedestrians and vehicles",
|
96 |
+
"street_elements": "urban infrastructure elements",
|
97 |
+
|
98 |
+
# 交通相關
|
99 |
+
"transit_vehicles": "public transportation vehicles",
|
100 |
+
"passenger_activity": "commuter movement",
|
101 |
+
"transportation_modes": "various transit options",
|
102 |
+
"passenger_needs": "waiting areas",
|
103 |
+
"transit_infrastructure": "transit facilities",
|
104 |
+
"passenger_movement": "commuter flow",
|
105 |
+
|
106 |
+
# 購物區相關
|
107 |
+
"retail_elements": "shops and displays",
|
108 |
+
"store_types": "various retail establishments",
|
109 |
+
"walkway_features": "pedestrian pathways",
|
110 |
+
"commercial_signage": "store signs",
|
111 |
+
"consumer_behavior": "shopping activities",
|
112 |
+
|
113 |
+
# 空中視角相關
|
114 |
+
"commercial_layout": "organized retail areas",
|
115 |
+
"pedestrian_pattern": "people movement patterns",
|
116 |
+
"gathering_features": "public gathering spaces",
|
117 |
+
"movement_pattern": "crowd flow patterns",
|
118 |
+
"urban_elements": "city infrastructure",
|
119 |
+
"public_activity": "social interaction",
|
120 |
+
|
121 |
+
# 文化特定元素
|
122 |
+
"stall_elements": "vendor booths",
|
123 |
+
"lighting_features": "decorative lights",
|
124 |
+
"food_elements": "food offerings",
|
125 |
+
"vendor_stalls": "market stalls",
|
126 |
+
"nighttime_activity": "evening commerce",
|
127 |
+
"cultural_lighting": "traditional lighting",
|
128 |
+
"night_market_sounds": "lively market sounds",
|
129 |
+
"evening_crowd_behavior": "nighttime social activity",
|
130 |
+
"architectural_elements": "cultural buildings",
|
131 |
+
"religious_structures": "sacred buildings",
|
132 |
+
"decorative_features": "ornamental designs",
|
133 |
+
"cultural_practices": "traditional activities",
|
134 |
+
"temple_architecture": "religious structures",
|
135 |
+
"sensory_elements": "atmospheric elements",
|
136 |
+
"visitor_activities": "cultural experiences",
|
137 |
+
"ritual_activities": "ceremonial practices",
|
138 |
+
"cultural_symbols": "meaningful symbols",
|
139 |
+
"architectural_style": "historical buildings",
|
140 |
+
"historic_elements": "traditional architecture",
|
141 |
+
"urban_design": "city planning elements",
|
142 |
+
"social_behaviors": "public interactions",
|
143 |
+
"european_features": "European architectural details",
|
144 |
+
"tourist_activities": "visitor activities",
|
145 |
+
"local_customs": "regional practices",
|
146 |
+
|
147 |
+
# 時間特定元素
|
148 |
+
"lighting_effects": "artificial lighting",
|
149 |
+
"shadow_patterns": "light and shadow",
|
150 |
+
"urban_features": "city elements",
|
151 |
+
"illuminated_elements": "lit structures",
|
152 |
+
"evening_activities": "nighttime activities",
|
153 |
+
"light_sources": "lighting points",
|
154 |
+
"lit_areas": "illuminated spaces",
|
155 |
+
"shadowed_zones": "darker areas",
|
156 |
+
"illuminated_signage": "bright signs",
|
157 |
+
"colorful_lighting": "multicolored lights",
|
158 |
+
"neon_elements": "neon signs",
|
159 |
+
"night_crowd_behavior": "evening social patterns",
|
160 |
+
"light_displays": "lighting installations",
|
161 |
+
"building_features": "architectural elements",
|
162 |
+
"nightlife_activities": "evening entertainment",
|
163 |
+
"lighting_modifier": "bright",
|
164 |
+
|
165 |
+
# 混合環境元素
|
166 |
+
"transitional_elements": "connecting features",
|
167 |
+
"indoor_features": "interior elements",
|
168 |
+
"outdoor_setting": "exterior spaces",
|
169 |
+
"interior_amenities": "inside comforts",
|
170 |
+
"exterior_features": "outside elements",
|
171 |
+
"inside_elements": "interior design",
|
172 |
+
"outside_spaces": "outdoor areas",
|
173 |
+
"dual_environment_benefits": "combined settings",
|
174 |
+
"passenger_activities": "waiting behaviors",
|
175 |
+
"transportation_types": "transit vehicles",
|
176 |
+
"sheltered_elements": "covered areas",
|
177 |
+
"exposed_areas": "open sections",
|
178 |
+
"waiting_behaviors": "passenger activities",
|
179 |
+
"indoor_facilities": "inside services",
|
180 |
+
"platform_features": "transit platform elements",
|
181 |
+
"transit_routines": "transportation procedures",
|
182 |
+
|
183 |
+
# 專門場所元素
|
184 |
+
"seating_arrangement": "spectator seating",
|
185 |
+
"playing_surface": "athletic field",
|
186 |
+
"sporting_activities": "sports events",
|
187 |
+
"spectator_facilities": "viewer accommodations",
|
188 |
+
"competition_space": "sports arena",
|
189 |
+
"sports_events": "athletic competitions",
|
190 |
+
"viewing_areas": "audience sections",
|
191 |
+
"field_elements": "field markings and equipment",
|
192 |
+
"game_activities": "competitive play",
|
193 |
+
"construction_equipment": "building machinery",
|
194 |
+
"building_materials": "construction supplies",
|
195 |
+
"construction_activities": "building work",
|
196 |
+
"work_elements": "construction tools",
|
197 |
+
"structural_components": "building structures",
|
198 |
+
"site_equipment": "construction gear",
|
199 |
+
"raw_materials": "building supplies",
|
200 |
+
"construction_process": "building phases",
|
201 |
+
"medical_elements": "healthcare equipment",
|
202 |
+
"clinical_activities": "medical procedures",
|
203 |
+
"facility_design": "healthcare layout",
|
204 |
+
"healthcare_features": "medical facilities",
|
205 |
+
"patient_interactions": "care activities",
|
206 |
+
"equipment_types": "medical devices",
|
207 |
+
"care_procedures": "health services",
|
208 |
+
"treatment_spaces": "clinical areas",
|
209 |
+
"educational_furniture": "learning furniture",
|
210 |
+
"learning_activities": "educational practices",
|
211 |
+
"instructional_design": "teaching layout",
|
212 |
+
"classroom_elements": "school equipment",
|
213 |
+
"teaching_methods": "educational approaches",
|
214 |
+
"student_engagement": "learning participation",
|
215 |
+
"learning_spaces": "educational areas",
|
216 |
+
"educational_tools": "teaching resources",
|
217 |
+
"knowledge_transfer": "learning exchanges"
|
218 |
+
}
|
219 |
+
|
220 |
+
def generate_objects_summary(self, detected_objects: List[Dict]) -> str:
|
221 |
+
"""
|
222 |
+
基於檢測物件生成自然語言摘要,按重要性排序
|
223 |
+
|
224 |
+
Args:
|
225 |
+
detected_objects: 檢測到的物件列表
|
226 |
+
|
227 |
+
Returns:
|
228 |
+
str: 物件摘要描述
|
229 |
+
"""
|
230 |
+
try:
|
231 |
+
# detected_objects 裡有幾個 traffic light)
|
232 |
+
tl_count = len([obj for obj in detected_objects if obj.get("class_name","") == "traffic light"])
|
233 |
+
# print(f"[DEBUG] _generate_objects_summary 傳入的 detected_objects 中 traffic light: {tl_count} 個")
|
234 |
+
for obj in detected_objects:
|
235 |
+
if obj.get("class_name","") == "traffic light":
|
236 |
+
print(f" - conf={obj.get('confidence',0):.4f}, bbox={obj.get('bbox')}, region={obj.get('region')}")
|
237 |
+
|
238 |
+
if not detected_objects:
|
239 |
+
return "various elements"
|
240 |
+
|
241 |
+
# 計算物件統計
|
242 |
+
object_counts = {}
|
243 |
+
total_confidence = 0
|
244 |
+
|
245 |
+
for obj in detected_objects:
|
246 |
+
class_name = obj.get("class_name", "unknown")
|
247 |
+
confidence = obj.get("confidence", 0.5)
|
248 |
+
|
249 |
+
if class_name not in object_counts:
|
250 |
+
object_counts[class_name] = {"count": 0, "total_confidence": 0}
|
251 |
+
|
252 |
+
object_counts[class_name]["count"] += 1
|
253 |
+
object_counts[class_name]["total_confidence"] += confidence
|
254 |
+
total_confidence += confidence
|
255 |
+
|
256 |
+
# 計算平均置信度並排序
|
257 |
+
sorted_objects = []
|
258 |
+
for class_name, stats in object_counts.items():
|
259 |
+
avg_confidence = stats["total_confidence"] / stats["count"]
|
260 |
+
count = stats["count"]
|
261 |
+
|
262 |
+
# 重要性評分:結合數量和置信度
|
263 |
+
importance_score = (count * 0.6) + (avg_confidence * 0.4)
|
264 |
+
sorted_objects.append((class_name, count, importance_score))
|
265 |
+
|
266 |
+
# 按重要性排序,取前5個最重要的物件
|
267 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
268 |
+
top_objects = sorted_objects[:5]
|
269 |
+
|
270 |
+
# 生成自然語言描述
|
271 |
+
descriptions = []
|
272 |
+
for class_name, count, _ in top_objects:
|
273 |
+
clean_name = class_name.replace('_', ' ')
|
274 |
+
if count == 1:
|
275 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
276 |
+
descriptions.append(f"{article} {clean_name}")
|
277 |
+
else:
|
278 |
+
descriptions.append(f"{count} {clean_name}s")
|
279 |
+
|
280 |
+
# 組合描述
|
281 |
+
if len(descriptions) == 1:
|
282 |
+
return descriptions[0]
|
283 |
+
elif len(descriptions) == 2:
|
284 |
+
return f"{descriptions[0]} and {descriptions[1]}"
|
285 |
+
else:
|
286 |
+
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
287 |
+
|
288 |
+
except Exception as e:
|
289 |
+
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
290 |
+
return "various elements"
|
291 |
+
|
292 |
+
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
293 |
+
all_replacements: Dict, detected_objects: List[Dict],
|
294 |
+
scene_type: str) -> str:
|
295 |
+
"""
|
296 |
+
獲取特定佔位符的替換內容,確保永遠不返回空值
|
297 |
+
|
298 |
+
Args:
|
299 |
+
placeholder: 佔位符名稱
|
300 |
+
fillers: 模板填充器字典
|
301 |
+
all_replacements: 所有替換內容字典
|
302 |
+
detected_objects: 檢測到的物體列表
|
303 |
+
scene_type: 場景類型
|
304 |
+
|
305 |
+
Returns:
|
306 |
+
str: 替換內容
|
307 |
+
"""
|
308 |
+
try:
|
309 |
+
# 優先處理動態內容生成的佔位符
|
310 |
+
dynamic_placeholders = [
|
311 |
+
'primary_objects', 'detected_objects_summary', 'main_objects',
|
312 |
+
'functional_area', 'functional_zones_description', 'scene_elements'
|
313 |
+
]
|
314 |
+
|
315 |
+
if placeholder in dynamic_placeholders:
|
316 |
+
dynamic_content = self.generate_objects_summary(detected_objects)
|
317 |
+
if dynamic_content and dynamic_content.strip():
|
318 |
+
return dynamic_content.strip()
|
319 |
+
|
320 |
+
# 檢查預定義替換內容
|
321 |
+
if placeholder in all_replacements:
|
322 |
+
replacement = all_replacements[placeholder]
|
323 |
+
if replacement and replacement.strip():
|
324 |
+
return replacement.strip()
|
325 |
+
|
326 |
+
# 檢查物體模板填充器
|
327 |
+
if placeholder in fillers:
|
328 |
+
options = fillers[placeholder]
|
329 |
+
if options and isinstance(options, list):
|
330 |
+
valid_options = [opt.strip() for opt in options if opt and str(opt).strip()]
|
331 |
+
if valid_options:
|
332 |
+
num_items = min(len(valid_options), random.randint(1, 3))
|
333 |
+
selected_items = random.sample(valid_options, num_items)
|
334 |
+
|
335 |
+
if len(selected_items) == 1:
|
336 |
+
return selected_items[0]
|
337 |
+
elif len(selected_items) == 2:
|
338 |
+
return f"{selected_items[0]} and {selected_items[1]}"
|
339 |
+
else:
|
340 |
+
return ", ".join(selected_items[:-1]) + f", and {selected_items[-1]}"
|
341 |
+
|
342 |
+
# 基於檢測對象生成動態內容
|
343 |
+
scene_specific_replacement = self.generate_scene_specific_content(
|
344 |
+
placeholder, detected_objects, scene_type
|
345 |
+
)
|
346 |
+
if scene_specific_replacement and scene_specific_replacement.strip():
|
347 |
+
return scene_specific_replacement.strip()
|
348 |
+
|
349 |
+
# 通用備用字典
|
350 |
+
fallback_replacements = {
|
351 |
+
# 交通和城市相關
|
352 |
+
"crossing_pattern": "pedestrian crosswalks",
|
353 |
+
"pedestrian_behavior": "people moving carefully",
|
354 |
+
"traffic_pattern": "vehicle movement",
|
355 |
+
"urban_elements": "city infrastructure",
|
356 |
+
"street_elements": "urban features",
|
357 |
+
"intersection_features": "traffic management systems",
|
358 |
+
"pedestrian_density": "groups of people",
|
359 |
+
"pedestrian_flow": "pedestrian movement",
|
360 |
+
"traffic_description": "vehicle traffic",
|
361 |
+
"people_and_vehicles": "pedestrians and cars",
|
362 |
+
|
363 |
+
# 場景設置相關
|
364 |
+
"scene_setting": "this urban environment",
|
365 |
+
"location_context": "the area",
|
366 |
+
"spatial_context": "the scene",
|
367 |
+
"environmental_context": "this location",
|
368 |
+
|
369 |
+
# 常見的家具和設備
|
370 |
+
"furniture": "various furniture pieces",
|
371 |
+
"seating": "seating arrangements",
|
372 |
+
"electronics": "electronic devices",
|
373 |
+
"appliances": "household appliances",
|
374 |
+
|
375 |
+
# 活動和行為
|
376 |
+
"activities": "various activities",
|
377 |
+
"interactions": "people interacting",
|
378 |
+
"movement": "movement patterns",
|
379 |
+
|
380 |
+
# 照明和氛圍
|
381 |
+
"lighting_conditions": "ambient lighting",
|
382 |
+
"atmosphere": "the overall atmosphere",
|
383 |
+
"ambiance": "environmental ambiance",
|
384 |
+
|
385 |
+
# 空間描述
|
386 |
+
"spatial_arrangement": "spatial organization",
|
387 |
+
"layout": "the layout",
|
388 |
+
"composition": "visual composition",
|
389 |
+
|
390 |
+
# 物體和元素
|
391 |
+
"objects": "various objects",
|
392 |
+
"elements": "scene elements",
|
393 |
+
"features": "notable features",
|
394 |
+
"details": "observable details"
|
395 |
+
}
|
396 |
+
|
397 |
+
if placeholder in fallback_replacements:
|
398 |
+
return fallback_replacements[placeholder]
|
399 |
+
|
400 |
+
# 基於場景類型的智能默認值
|
401 |
+
scene_based_defaults = self.get_scene_based_default(placeholder, scene_type)
|
402 |
+
if scene_based_defaults:
|
403 |
+
return scene_based_defaults
|
404 |
+
|
405 |
+
# 最終備用:將下劃線轉換為有意義的短語
|
406 |
+
cleaned_placeholder = placeholder.replace('_', ' ')
|
407 |
+
|
408 |
+
# 對常見模式提供更好的默認值
|
409 |
+
if placeholder.endswith('_pattern'):
|
410 |
+
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
411 |
+
elif placeholder.endswith('_behavior'):
|
412 |
+
return f"{cleaned_placeholder.replace(' behavior', '')} activity"
|
413 |
+
elif placeholder.endswith('_description'):
|
414 |
+
return f"{cleaned_placeholder.replace(' description', '')} elements"
|
415 |
+
elif placeholder.endswith('_elements'):
|
416 |
+
return cleaned_placeholder
|
417 |
+
elif placeholder.endswith('_features'):
|
418 |
+
return cleaned_placeholder
|
419 |
+
else:
|
420 |
+
return cleaned_placeholder if cleaned_placeholder != placeholder else "various elements"
|
421 |
+
|
422 |
+
except Exception as e:
|
423 |
+
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
424 |
+
# 確保即使在異常情況下也返回有意義的內容
|
425 |
+
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
426 |
+
|
427 |
+
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
428 |
+
"""
|
429 |
+
基於場景類型提供智能默認值
|
430 |
+
|
431 |
+
Args:
|
432 |
+
placeholder: 佔位符名稱
|
433 |
+
scene_type: 場景類型
|
434 |
+
|
435 |
+
Returns:
|
436 |
+
Optional[str]: 場景特定的默認值或None
|
437 |
+
"""
|
438 |
+
try:
|
439 |
+
# 針對不同場景類型的特定默認值
|
440 |
+
scene_defaults = {
|
441 |
+
"urban_intersection": {
|
442 |
+
"crossing_pattern": "marked crosswalks",
|
443 |
+
"pedestrian_behavior": "pedestrians crossing carefully",
|
444 |
+
"traffic_pattern": "controlled traffic flow"
|
445 |
+
},
|
446 |
+
"city_street": {
|
447 |
+
"traffic_description": "urban vehicle traffic",
|
448 |
+
"street_elements": "city infrastructure",
|
449 |
+
"people_and_vehicles": "pedestrians and vehicles"
|
450 |
+
},
|
451 |
+
"living_room": {
|
452 |
+
"furniture": "comfortable living room furniture",
|
453 |
+
"seating": "sofas and chairs",
|
454 |
+
"electronics": "entertainment equipment"
|
455 |
+
},
|
456 |
+
"kitchen": {
|
457 |
+
"appliances": "kitchen appliances",
|
458 |
+
"cooking_equipment": "cooking tools and equipment"
|
459 |
+
},
|
460 |
+
"office_workspace": {
|
461 |
+
"office_equipment": "work furniture and devices",
|
462 |
+
"desk_setup": "desk and office chair"
|
463 |
+
}
|
464 |
+
}
|
465 |
+
|
466 |
+
if scene_type in scene_defaults and placeholder in scene_defaults[scene_type]:
|
467 |
+
return scene_defaults[scene_type][placeholder]
|
468 |
+
|
469 |
+
return None
|
470 |
+
|
471 |
+
except Exception as e:
|
472 |
+
self.logger.warning(f"Error getting scene-based default for '{placeholder}' in '{scene_type}': {str(e)}")
|
473 |
+
return None
|
474 |
+
|
475 |
+
def generate_scene_specific_content(self, placeholder: str, detected_objects: List[Dict],
|
476 |
+
scene_type: str) -> Optional[str]:
|
477 |
+
"""
|
478 |
+
基於場景特定邏輯生成佔位符內容
|
479 |
+
|
480 |
+
Args:
|
481 |
+
placeholder: 佔位符名稱
|
482 |
+
detected_objects: 檢測到的物體列表
|
483 |
+
scene_type: 場景類型
|
484 |
+
|
485 |
+
Returns:
|
486 |
+
Optional[str]: 生成的內容或None
|
487 |
+
"""
|
488 |
+
try:
|
489 |
+
if placeholder == "furniture":
|
490 |
+
# 提取家具物品
|
491 |
+
furniture_ids = [56, 57, 58, 59, 60, 61] # 家具類別ID
|
492 |
+
furniture_objects = [obj for obj in detected_objects if obj.get("class_id") in furniture_ids]
|
493 |
+
|
494 |
+
if furniture_objects:
|
495 |
+
furniture_names = [obj.get("class_name", "furniture") for obj in furniture_objects[:3]]
|
496 |
+
unique_names = list(set(furniture_names))
|
497 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
498 |
+
return "various furniture items"
|
499 |
+
|
500 |
+
elif placeholder == "electronics":
|
501 |
+
# 提取電子設備
|
502 |
+
electronics_ids = [62, 63, 64, 65, 66, 67, 68, 69, 70] # 電子設備類別ID
|
503 |
+
electronics_objects = [obj for obj in detected_objects if obj.get("class_id") in electronics_ids]
|
504 |
+
|
505 |
+
if electronics_objects:
|
506 |
+
electronics_names = [obj.get("class_name", "electronic device") for obj in electronics_objects[:3]]
|
507 |
+
unique_names = list(set(electronics_names))
|
508 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
509 |
+
return "electronic devices"
|
510 |
+
|
511 |
+
elif placeholder == "people_count":
|
512 |
+
# 計算人數
|
513 |
+
people_count = len([obj for obj in detected_objects if obj.get("class_id") == 0])
|
514 |
+
|
515 |
+
if people_count == 0:
|
516 |
+
return "no people"
|
517 |
+
elif people_count == 1:
|
518 |
+
return "one person"
|
519 |
+
elif people_count < 5:
|
520 |
+
return f"{people_count} people"
|
521 |
+
else:
|
522 |
+
return "several people"
|
523 |
+
|
524 |
+
elif placeholder == "seating":
|
525 |
+
# 提取座位物品
|
526 |
+
seating_ids = [56, 57] # chair, sofa
|
527 |
+
seating_objects = [obj for obj in detected_objects if obj.get("class_id") in seating_ids]
|
528 |
+
|
529 |
+
if seating_objects:
|
530 |
+
seating_names = [obj.get("class_name", "seating") for obj in seating_objects[:2]]
|
531 |
+
unique_names = list(set(seating_names))
|
532 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
533 |
+
return "seating arrangements"
|
534 |
+
|
535 |
+
# 如果沒有匹配的特定邏輯,返回None
|
536 |
+
return None
|
537 |
+
|
538 |
+
except Exception as e:
|
539 |
+
self.logger.warning(f"Error generating scene-specific content for '{placeholder}': {str(e)}")
|
540 |
+
return None
|
541 |
+
|
542 |
+
def get_emergency_replacement(self, placeholder: str) -> str:
|
543 |
+
"""
|
544 |
+
獲取緊急替換值,確保不會產生語法錯誤
|
545 |
+
|
546 |
+
Args:
|
547 |
+
placeholder: 佔位符名稱
|
548 |
+
|
549 |
+
Returns:
|
550 |
+
str: 安全的替換值
|
551 |
+
"""
|
552 |
+
emergency_replacements = {
|
553 |
+
"crossing_pattern": "pedestrian walkways",
|
554 |
+
"pedestrian_behavior": "people moving through the area",
|
555 |
+
"traffic_pattern": "vehicle movement",
|
556 |
+
"scene_setting": "this location",
|
557 |
+
"urban_elements": "city features",
|
558 |
+
"street_elements": "urban components"
|
559 |
+
}
|
560 |
+
|
561 |
+
if placeholder in emergency_replacements:
|
562 |
+
return emergency_replacements[placeholder]
|
563 |
+
|
564 |
+
# 基於佔位符名稱生成合理的替換
|
565 |
+
cleaned = placeholder.replace('_', ' ')
|
566 |
+
if len(cleaned.split()) > 1:
|
567 |
+
return cleaned
|
568 |
+
else:
|
569 |
+
return f"various {cleaned}"
|
functional_zone_detector.py
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import logging
|
3 |
+
import traceback
|
4 |
+
from typing import Dict, List, Any, Optional
|
5 |
+
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
class FunctionalZoneDetector:
|
9 |
+
"""
|
10 |
+
負責基於物件關聯性的功能區域識別
|
11 |
+
處理物件組合分析和描述性區域命名
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
"""初始化功能區域檢測器"""
|
16 |
+
try:
|
17 |
+
logger.info("FunctionalZoneDetector initialized successfully")
|
18 |
+
except Exception as e:
|
19 |
+
logger.error(f"Failed to initialize FunctionalZoneDetector: {str(e)}")
|
20 |
+
logger.error(traceback.format_exc())
|
21 |
+
raise
|
22 |
+
|
23 |
+
def identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
|
24 |
+
"""
|
25 |
+
識別主要功能區域,基於最強的物件關聯性組合
|
26 |
+
採用通用邏輯處理各種室內場景
|
27 |
+
|
28 |
+
Args:
|
29 |
+
detected_objects: 檢測到的物件列表
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
主要功能區域字典或None
|
33 |
+
"""
|
34 |
+
try:
|
35 |
+
# 用餐區域檢測(桌椅組合)
|
36 |
+
dining_area = self.detect_functional_combination(
|
37 |
+
detected_objects,
|
38 |
+
primary_objects=[60], # dining table
|
39 |
+
supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
|
40 |
+
min_supporting=2,
|
41 |
+
description_template="Dining area with table and seating arrangement"
|
42 |
+
)
|
43 |
+
if dining_area:
|
44 |
+
return dining_area
|
45 |
+
|
46 |
+
# 休息區域檢測(沙發電視組合或床)
|
47 |
+
seating_area = self.detect_functional_combination(
|
48 |
+
detected_objects,
|
49 |
+
primary_objects=[57, 59], # sofa, bed
|
50 |
+
supporting_objects=[62, 58, 56], # tv, potted plant, chair
|
51 |
+
min_supporting=1,
|
52 |
+
description_template="Seating and relaxation area"
|
53 |
+
)
|
54 |
+
if seating_area:
|
55 |
+
return seating_area
|
56 |
+
|
57 |
+
# 工作區域檢測(電子設備與家具組合)
|
58 |
+
work_area = self.detect_functional_combination(
|
59 |
+
detected_objects,
|
60 |
+
primary_objects=[63, 66], # laptop, keyboard
|
61 |
+
supporting_objects=[60, 56, 64], # dining table, chair, mouse
|
62 |
+
min_supporting=2,
|
63 |
+
description_template="Workspace area with electronics and furniture"
|
64 |
+
)
|
65 |
+
if work_area:
|
66 |
+
return work_area
|
67 |
+
|
68 |
+
return None
|
69 |
+
|
70 |
+
except Exception as e:
|
71 |
+
logger.error(f"Error identifying primary functional area: {str(e)}")
|
72 |
+
logger.error(traceback.format_exc())
|
73 |
+
return None
|
74 |
+
|
75 |
+
def identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
76 |
+
"""
|
77 |
+
識別次要功能區域,避免與主要區域重疊
|
78 |
+
|
79 |
+
Args:
|
80 |
+
detected_objects: 檢測到的物件列表
|
81 |
+
existing_zones: 已存在的功能區域
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
次要功能區域字典或None
|
85 |
+
"""
|
86 |
+
try:
|
87 |
+
# 獲取已使用的區域
|
88 |
+
used_regions = set(zone.get("region") for zone in existing_zones.values())
|
89 |
+
|
90 |
+
# 裝飾區域檢測(植物集中區域)
|
91 |
+
decorative_area = self.detect_functional_combination(
|
92 |
+
detected_objects,
|
93 |
+
primary_objects=[58], # potted plant
|
94 |
+
supporting_objects=[75], # vase
|
95 |
+
min_supporting=0,
|
96 |
+
min_primary=3, # 至少需要3個植物
|
97 |
+
description_template="Decorative area with plants and ornamental items",
|
98 |
+
exclude_regions=used_regions
|
99 |
+
)
|
100 |
+
if decorative_area:
|
101 |
+
return decorative_area
|
102 |
+
|
103 |
+
# 儲存區域檢測(廚房電器組合)
|
104 |
+
storage_area = self.detect_functional_combination(
|
105 |
+
detected_objects,
|
106 |
+
primary_objects=[72, 68, 69], # refrigerator, microwave, oven
|
107 |
+
supporting_objects=[71], # sink
|
108 |
+
min_supporting=0,
|
109 |
+
min_primary=2,
|
110 |
+
description_template="Kitchen appliance and storage area",
|
111 |
+
exclude_regions=used_regions
|
112 |
+
)
|
113 |
+
if storage_area:
|
114 |
+
return storage_area
|
115 |
+
|
116 |
+
return None
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
logger.error(f"Error identifying secondary functional area: {str(e)}")
|
120 |
+
logger.error(traceback.format_exc())
|
121 |
+
return None
|
122 |
+
|
123 |
+
def detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
|
124 |
+
supporting_objects: List[int], min_supporting: int,
|
125 |
+
description_template: str, min_primary: int = 1,
|
126 |
+
exclude_regions: set = None) -> Dict:
|
127 |
+
"""
|
128 |
+
通用的功能組合檢測方法
|
129 |
+
基於主要物件和支持物件的組合判斷��能區域
|
130 |
+
|
131 |
+
Args:
|
132 |
+
detected_objects: 檢測到的物件列表
|
133 |
+
primary_objects: 主要物件的class_id列表
|
134 |
+
supporting_objects: 支持物件的class_id列表
|
135 |
+
min_supporting: 最少需要的支持物件數量
|
136 |
+
description_template: 描述模板
|
137 |
+
min_primary: 最少需要的主要物件數量
|
138 |
+
exclude_regions: 需要排除的區域集合
|
139 |
+
|
140 |
+
Returns:
|
141 |
+
功能區域資訊字典,如果不符合條件則返回None
|
142 |
+
"""
|
143 |
+
try:
|
144 |
+
if exclude_regions is None:
|
145 |
+
exclude_regions = set()
|
146 |
+
|
147 |
+
# 收集主要物件
|
148 |
+
primary_objs = [obj for obj in detected_objects
|
149 |
+
if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
|
150 |
+
|
151 |
+
# 收集支持物件
|
152 |
+
supporting_objs = [obj for obj in detected_objects
|
153 |
+
if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
|
154 |
+
|
155 |
+
# 檢查是否滿足最少數量要求
|
156 |
+
if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
|
157 |
+
return None
|
158 |
+
|
159 |
+
# 按區域組織物件
|
160 |
+
region_combinations = {}
|
161 |
+
all_relevant_objs = primary_objs + supporting_objs
|
162 |
+
|
163 |
+
for obj in all_relevant_objs:
|
164 |
+
region = obj.get("region")
|
165 |
+
|
166 |
+
# 排除指定區域
|
167 |
+
if region in exclude_regions:
|
168 |
+
continue
|
169 |
+
|
170 |
+
if region not in region_combinations:
|
171 |
+
region_combinations[region] = {"primary": [], "supporting": [], "all": []}
|
172 |
+
|
173 |
+
region_combinations[region]["all"].append(obj)
|
174 |
+
|
175 |
+
if obj.get("class_id") in primary_objects:
|
176 |
+
region_combinations[region]["primary"].append(obj)
|
177 |
+
else:
|
178 |
+
region_combinations[region]["supporting"].append(obj)
|
179 |
+
|
180 |
+
# 找到最佳區域組合
|
181 |
+
best_region = None
|
182 |
+
best_score = 0
|
183 |
+
|
184 |
+
for region, objs in region_combinations.items():
|
185 |
+
# 計算該區域的評分
|
186 |
+
primary_count = len(objs["primary"])
|
187 |
+
supporting_count = len(objs["supporting"])
|
188 |
+
|
189 |
+
# 必須滿足最低要求
|
190 |
+
if primary_count < min_primary or supporting_count < min_supporting:
|
191 |
+
continue
|
192 |
+
|
193 |
+
# 計算組合評分(主要物件權重較高)
|
194 |
+
score = primary_count * 2 + supporting_count
|
195 |
+
|
196 |
+
if score > best_score:
|
197 |
+
best_score = score
|
198 |
+
best_region = region
|
199 |
+
|
200 |
+
if best_region is None:
|
201 |
+
return None
|
202 |
+
|
203 |
+
best_combination = region_combinations[best_region]
|
204 |
+
all_objects = [obj["class_name"] for obj in best_combination["all"]]
|
205 |
+
|
206 |
+
return {
|
207 |
+
"region": best_region,
|
208 |
+
"objects": all_objects,
|
209 |
+
"description": description_template
|
210 |
+
}
|
211 |
+
|
212 |
+
except Exception as e:
|
213 |
+
logger.error(f"Error detecting functional combination: {str(e)}")
|
214 |
+
logger.error(traceback.format_exc())
|
215 |
+
return None
|
216 |
+
|
217 |
+
def generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
|
218 |
+
"""
|
219 |
+
基於區域與物品名產生一個比較有描述性的區域
|
220 |
+
|
221 |
+
Args:
|
222 |
+
zone_data: 區域數據字典
|
223 |
+
priority_level: 優先級別(primary/secondary)
|
224 |
+
|
225 |
+
Returns:
|
226 |
+
str: 描述性區域鍵名
|
227 |
+
"""
|
228 |
+
try:
|
229 |
+
objects = zone_data.get("objects", [])
|
230 |
+
region = zone_data.get("region", "")
|
231 |
+
description = zone_data.get("description", "")
|
232 |
+
|
233 |
+
# 基於物件內容確定功能類型
|
234 |
+
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
235 |
+
base_name = "dining area"
|
236 |
+
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
237 |
+
base_name = "seating area"
|
238 |
+
elif any("bed" in obj.lower() for obj in objects):
|
239 |
+
base_name = "sleeping area"
|
240 |
+
elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
241 |
+
base_name = "workspace area"
|
242 |
+
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
243 |
+
base_name = "decorative area"
|
244 |
+
elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
|
245 |
+
base_name = "kitchen area"
|
246 |
+
else:
|
247 |
+
# 基於描述內容推斷
|
248 |
+
if "dining" in description.lower():
|
249 |
+
base_name = "dining area"
|
250 |
+
elif "seating" in description.lower() or "relaxation" in description.lower():
|
251 |
+
base_name = "seating area"
|
252 |
+
elif "work" in description.lower():
|
253 |
+
base_name = "workspace area"
|
254 |
+
elif "decorative" in description.lower():
|
255 |
+
base_name = "decorative area"
|
256 |
+
else:
|
257 |
+
base_name = "functional area"
|
258 |
+
|
259 |
+
# 為次要區域添加位置標識以區分
|
260 |
+
if priority_level == "secondary" and region:
|
261 |
+
spatial_context = self.get_spatial_context_description(region)
|
262 |
+
if spatial_context:
|
263 |
+
return f"{spatial_context} {base_name}"
|
264 |
+
|
265 |
+
return base_name
|
266 |
+
|
267 |
+
except Exception as e:
|
268 |
+
logger.warning(f"Error generating descriptive zone key: {str(e)}")
|
269 |
+
return "activity area"
|
270 |
+
|
271 |
+
def get_spatial_context_description(self, region: str) -> str:
|
272 |
+
"""
|
273 |
+
獲取空間上下文描述
|
274 |
+
|
275 |
+
Args:
|
276 |
+
region: 區域位置標識
|
277 |
+
|
278 |
+
Returns:
|
279 |
+
str: 空間上下文描述
|
280 |
+
"""
|
281 |
+
try:
|
282 |
+
spatial_mapping = {
|
283 |
+
"top_left": "upper left",
|
284 |
+
"top_center": "upper",
|
285 |
+
"top_right": "upper right",
|
286 |
+
"middle_left": "left side",
|
287 |
+
"middle_center": "central",
|
288 |
+
"middle_right": "right side",
|
289 |
+
"bottom_left": "lower left",
|
290 |
+
"bottom_center": "lower",
|
291 |
+
"bottom_right": "lower right"
|
292 |
+
}
|
293 |
+
|
294 |
+
return spatial_mapping.get(region, "")
|
295 |
+
|
296 |
+
except Exception as e:
|
297 |
+
logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
|
298 |
+
return ""
|
object_description_generator.py
CHANGED
@@ -4,6 +4,11 @@ import traceback
|
|
4 |
from typing import Dict, List, Tuple, Optional, Any
|
5 |
import numpy as np
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
class ObjectDescriptionError(Exception):
|
8 |
"""物件描述生成過程中的自定義異常"""
|
9 |
pass
|
@@ -12,9 +17,12 @@ class ObjectDescriptionError(Exception):
|
|
12 |
class ObjectDescriptionGenerator:
|
13 |
"""
|
14 |
物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
|
|
|
15 |
|
16 |
該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
|
17 |
空間位置描述、物件列表格式化以及描述文本的優化。
|
|
|
|
|
18 |
"""
|
19 |
|
20 |
def __init__(self,
|
@@ -31,6 +39,7 @@ class ObjectDescriptionGenerator:
|
|
31 |
max_categories_to_return: 返回的物件類別最大數量
|
32 |
max_total_objects: 返回的物件總數上限
|
33 |
confidence_threshold_for_description: 用於描述的置信度閾值
|
|
|
34 |
"""
|
35 |
self.logger = logging.getLogger(self.__class__.__name__)
|
36 |
|
@@ -40,6 +49,23 @@ class ObjectDescriptionGenerator:
|
|
40 |
self.confidence_threshold_for_description = confidence_threshold_for_description
|
41 |
self.region_analyzer = region_analyzer
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
|
44 |
"max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
|
45 |
min_prominence_score, max_categories_to_return,
|
@@ -59,49 +85,11 @@ class ObjectDescriptionGenerator:
|
|
59 |
Returns:
|
60 |
List[Dict]: 按重要性排序的物件列表
|
61 |
"""
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
for obj in detected_objects:
|
69 |
-
# 計算重要性評分
|
70 |
-
prominence_score = self._calculate_prominence_score(obj)
|
71 |
-
|
72 |
-
# 只保留超過閾值的物件
|
73 |
-
if prominence_score >= min_prominence_score:
|
74 |
-
obj_copy = obj.copy()
|
75 |
-
obj_copy['prominence_score'] = prominence_score
|
76 |
-
prominent_objects.append(obj_copy)
|
77 |
-
|
78 |
-
# 按重要性評分排序(從高到低)
|
79 |
-
prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
|
80 |
-
|
81 |
-
# 如果指定了最大類別數量限制,進行過濾
|
82 |
-
if max_categories_to_return is not None and max_categories_to_return > 0:
|
83 |
-
categories_seen = set()
|
84 |
-
filtered_objects = []
|
85 |
-
|
86 |
-
for obj in prominent_objects:
|
87 |
-
class_name = obj.get("class_name", "unknown")
|
88 |
-
|
89 |
-
# 如果是新類別且未達到限制
|
90 |
-
if class_name not in categories_seen:
|
91 |
-
if len(categories_seen) < max_categories_to_return:
|
92 |
-
categories_seen.add(class_name)
|
93 |
-
filtered_objects.append(obj)
|
94 |
-
else:
|
95 |
-
# 已見過的類別,直接添加
|
96 |
-
filtered_objects.append(obj)
|
97 |
-
|
98 |
-
return filtered_objects
|
99 |
-
|
100 |
-
return prominent_objects
|
101 |
-
|
102 |
-
except Exception as e:
|
103 |
-
self.logger.error(f"Error calculating prominent objects: {str(e)}")
|
104 |
-
return []
|
105 |
|
106 |
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
107 |
"""
|
@@ -112,107 +100,11 @@ class ObjectDescriptionGenerator:
|
|
112 |
"""
|
113 |
try:
|
114 |
self.region_analyzer = region_analyzer
|
|
|
115 |
self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
|
116 |
except Exception as e:
|
117 |
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
118 |
|
119 |
-
def _get_standardized_spatial_description(self, obj: Dict) -> str:
|
120 |
-
"""
|
121 |
-
使用RegionAnalyzer生成標準化空間描述的內部方法
|
122 |
-
|
123 |
-
Args:
|
124 |
-
obj: 物件字典
|
125 |
-
|
126 |
-
Returns:
|
127 |
-
str: 標準化空間描述,失敗時返回空字串
|
128 |
-
"""
|
129 |
-
try:
|
130 |
-
if hasattr(self, 'region_analyzer') and self.region_analyzer:
|
131 |
-
region = obj.get("region", "")
|
132 |
-
object_type = obj.get("class_name", "")
|
133 |
-
|
134 |
-
if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
|
135 |
-
return self.region_analyzer.get_contextual_spatial_description(region, object_type)
|
136 |
-
elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
|
137 |
-
return self.region_analyzer.get_spatial_description_phrase(region)
|
138 |
-
|
139 |
-
return ""
|
140 |
-
|
141 |
-
except Exception as e:
|
142 |
-
self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
|
143 |
-
if object_type:
|
144 |
-
return f"visible in the scene"
|
145 |
-
return "present in the view"
|
146 |
-
|
147 |
-
def _calculate_prominence_score(self, obj: Dict) -> float:
|
148 |
-
"""
|
149 |
-
計算物件的重要性評分
|
150 |
-
|
151 |
-
Args:
|
152 |
-
obj: 物件字典,包含檢測信息
|
153 |
-
|
154 |
-
Returns:
|
155 |
-
float: 重要性評分 (0.0-1.0)
|
156 |
-
"""
|
157 |
-
try:
|
158 |
-
# 基礎置信度評分 (權重: 40%)
|
159 |
-
confidence = obj.get("confidence", 0.5)
|
160 |
-
confidence_score = confidence * 0.4
|
161 |
-
|
162 |
-
# 大小評分 (權重: 30%)
|
163 |
-
normalized_area = obj.get("normalized_area", 0.1)
|
164 |
-
# 使用對數縮放避免過大物件主導評分
|
165 |
-
size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
|
166 |
-
|
167 |
-
# 位置評分 (權重: 20%)
|
168 |
-
# 中心區域的物件通常更重要
|
169 |
-
center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
|
170 |
-
distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
|
171 |
-
position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
|
172 |
-
|
173 |
-
# 類別重要性評分 (權重: 10%)
|
174 |
-
class_importance = self._get_class_importance(obj.get("class_name", "unknown"))
|
175 |
-
class_score = class_importance * 0.1
|
176 |
-
|
177 |
-
total_score = confidence_score + size_score + position_score + class_score
|
178 |
-
|
179 |
-
# 確保評分在有效範圍內
|
180 |
-
return max(0.0, min(1.0, total_score))
|
181 |
-
|
182 |
-
except Exception as e:
|
183 |
-
self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
|
184 |
-
return 0.5 # 返回中等評分作為備用
|
185 |
-
|
186 |
-
def _get_class_importance(self, class_name: str) -> float:
|
187 |
-
"""
|
188 |
-
根據物件類別返回重要性係數
|
189 |
-
|
190 |
-
Args:
|
191 |
-
class_name: 物件類別名稱
|
192 |
-
|
193 |
-
Returns:
|
194 |
-
float: 類別重要性係數 (0.0-1.0)
|
195 |
-
"""
|
196 |
-
# 高重要性物件(人、車輛、建築)
|
197 |
-
high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
|
198 |
-
|
199 |
-
# 中等重要性物件(家具、電器)
|
200 |
-
medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
|
201 |
-
|
202 |
-
# 低重要性物件(小物品、配件)
|
203 |
-
low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
|
204 |
-
|
205 |
-
class_name_lower = class_name.lower()
|
206 |
-
|
207 |
-
if any(item in class_name_lower for item in high_importance):
|
208 |
-
return 1.0
|
209 |
-
elif any(item in class_name_lower for item in medium_importance):
|
210 |
-
return 0.7
|
211 |
-
elif any(item in class_name_lower for item in low_importance):
|
212 |
-
return 0.4
|
213 |
-
else:
|
214 |
-
return 0.6 # 預設中等重要性
|
215 |
-
|
216 |
def format_object_list_for_description(self,
|
217 |
objects: List[Dict],
|
218 |
use_indefinite_article_for_one: bool = False,
|
@@ -230,65 +122,12 @@ class ObjectDescriptionGenerator:
|
|
230 |
Returns:
|
231 |
str: 格式化的物件描述字符串
|
232 |
"""
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
name = obj.get("class_name", "unknown object")
|
240 |
-
if name == "unknown object" or not name:
|
241 |
-
continue
|
242 |
-
counts[name] = counts.get(name, 0) + 1
|
243 |
-
|
244 |
-
if not counts:
|
245 |
-
return "no specific objects clearly identified"
|
246 |
-
|
247 |
-
descriptions = []
|
248 |
-
# 按計數降序然後按名稱升序排序,限制物件類型數量
|
249 |
-
sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
|
250 |
-
|
251 |
-
for name, count in sorted_counts:
|
252 |
-
if count == 1:
|
253 |
-
if use_indefinite_article_for_one:
|
254 |
-
if name[0].lower() in 'aeiou':
|
255 |
-
descriptions.append(f"an {name}")
|
256 |
-
else:
|
257 |
-
descriptions.append(f"a {name}")
|
258 |
-
else:
|
259 |
-
descriptions.append(f"one {name}")
|
260 |
-
else:
|
261 |
-
# 處理複數形式
|
262 |
-
plural_name = name
|
263 |
-
if name.endswith("y") and not name.lower().endswith(("ay", "ey", "iy", "oy", "uy")):
|
264 |
-
plural_name = name[:-1] + "ies"
|
265 |
-
elif name.endswith(("s", "sh", "ch", "x", "z")):
|
266 |
-
plural_name = name + "es"
|
267 |
-
elif not name.endswith("s"):
|
268 |
-
plural_name = name + "s"
|
269 |
-
|
270 |
-
if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
|
271 |
-
if count <= count_threshold_for_generalization + 3:
|
272 |
-
descriptions.append(f"several {plural_name}")
|
273 |
-
else:
|
274 |
-
descriptions.append(f"many {plural_name}")
|
275 |
-
else:
|
276 |
-
descriptions.append(f"{count} {plural_name}")
|
277 |
-
|
278 |
-
if not descriptions:
|
279 |
-
return "no specific objects clearly identified"
|
280 |
-
|
281 |
-
if len(descriptions) == 1:
|
282 |
-
return descriptions[0]
|
283 |
-
elif len(descriptions) == 2:
|
284 |
-
return f"{descriptions[0]} and {descriptions[1]}"
|
285 |
-
else:
|
286 |
-
# 使用牛津逗號格式
|
287 |
-
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
288 |
-
|
289 |
-
except Exception as e:
|
290 |
-
self.logger.warning(f"Error formatting object list: {str(e)}")
|
291 |
-
return "various objects"
|
292 |
|
293 |
def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
294 |
image_height: Optional[int] = None,
|
@@ -305,95 +144,16 @@ class ObjectDescriptionGenerator:
|
|
305 |
Returns:
|
306 |
str: 空間描述字符串,空值region時返回空字串
|
307 |
"""
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
|
315 |
-
return "positioned in the scene"
|
316 |
-
elif object_type and "person" in object_type.lower():
|
317 |
-
return "present in the area"
|
318 |
-
else:
|
319 |
-
return "located in the scene"
|
320 |
-
|
321 |
-
# 如果提供了RegionAnalyzer實例,使用其標準化方法
|
322 |
-
if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
|
323 |
-
object_type = obj.get("class_name", "")
|
324 |
-
if hasattr(region_analyzer, 'get_contextual_spatial_description'):
|
325 |
-
spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
|
326 |
-
else:
|
327 |
-
spatial_desc = region_analyzer.get_spatial_description_phrase(region)
|
328 |
-
|
329 |
-
if spatial_desc:
|
330 |
-
return spatial_desc
|
331 |
-
|
332 |
-
# 備用邏輯:使用改進的內建映射
|
333 |
-
clean_region = region.replace('_', ' ').strip().lower()
|
334 |
-
|
335 |
-
region_map = {
|
336 |
-
"top left": "in the upper left area",
|
337 |
-
"top center": "in the upper area",
|
338 |
-
"top right": "in the upper right area",
|
339 |
-
"middle left": "on the left side",
|
340 |
-
"middle center": "in the center",
|
341 |
-
"center": "in the center",
|
342 |
-
"middle right": "on the right side",
|
343 |
-
"bottom left": "in the lower left area",
|
344 |
-
"bottom center": "in the lower area",
|
345 |
-
"bottom right": "in the lower right area"
|
346 |
-
}
|
347 |
-
|
348 |
-
# 直接映射匹配
|
349 |
-
if clean_region in region_map:
|
350 |
-
return region_map[clean_region]
|
351 |
-
|
352 |
-
# 模糊匹配處理
|
353 |
-
if "top" in clean_region and "left" in clean_region:
|
354 |
-
return "in the upper left area"
|
355 |
-
elif "top" in clean_region and "right" in clean_region:
|
356 |
-
return "in the upper right area"
|
357 |
-
elif "bottom" in clean_region and "left" in clean_region:
|
358 |
-
return "in the lower left area"
|
359 |
-
elif "bottom" in clean_region and "right" in clean_region:
|
360 |
-
return "in the lower right area"
|
361 |
-
elif "top" in clean_region:
|
362 |
-
return "in the upper area"
|
363 |
-
elif "bottom" in clean_region:
|
364 |
-
return "in the lower area"
|
365 |
-
elif "left" in clean_region:
|
366 |
-
return "on the left side"
|
367 |
-
elif "right" in clean_region:
|
368 |
-
return "on the right side"
|
369 |
-
elif "center" in clean_region or "middle" in clean_region:
|
370 |
-
return "in the center"
|
371 |
-
|
372 |
-
# 如果region無法識別,使用normalized_center作為最後備用
|
373 |
-
norm_center = obj.get("normalized_center")
|
374 |
-
if norm_center and image_width and image_height:
|
375 |
-
x_norm, y_norm = norm_center
|
376 |
-
h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
|
377 |
-
v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
|
378 |
-
|
379 |
-
if h_pos == "center" and v_pos == "center":
|
380 |
-
return "in the center"
|
381 |
-
return f"in the {v_pos} {h_pos} area"
|
382 |
-
|
383 |
-
# 如果所有方法都失敗,返回空字串
|
384 |
-
return ""
|
385 |
-
|
386 |
-
except Exception as e:
|
387 |
-
self.logger.warning(f"Error generating spatial description: {str(e)}")
|
388 |
-
return ""
|
389 |
|
390 |
def optimize_object_description(self, description: str) -> str:
|
391 |
"""
|
392 |
-
|
393 |
-
|
394 |
-
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
395 |
-
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
396 |
-
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
397 |
|
398 |
Args:
|
399 |
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
@@ -401,164 +161,7 @@ class ObjectDescriptionGenerator:
|
|
401 |
Returns:
|
402 |
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
403 |
"""
|
404 |
-
|
405 |
-
import re
|
406 |
-
|
407 |
-
# 1. 處理多餘的空間限定表達
|
408 |
-
# 使用通用模式來識別和移除不必要的空間描述
|
409 |
-
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
410 |
-
description = self._remove_redundant_spatial_qualifiers(description)
|
411 |
-
|
412 |
-
# 2. 辨識並處理物件列表的重複問題
|
413 |
-
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
414 |
-
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
415 |
-
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
416 |
-
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
417 |
-
|
418 |
-
# 遍歷每個找到的物件列表進行重複檢測和優化
|
419 |
-
for obj_list in object_lists:
|
420 |
-
# 3. 解析單個物件列表中的項目
|
421 |
-
# 使用更精確的正則表達式來分割物件項目
|
422 |
-
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
423 |
-
# 需要特別注意處理最後一個 "and" 的情況
|
424 |
-
|
425 |
-
# 先處理逗號格式 "A, B, and C"
|
426 |
-
if ", and " in obj_list:
|
427 |
-
# 分割 ", and " 前後的部分
|
428 |
-
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
429 |
-
last_item = obj_list.rsplit(", and ", 1)[1]
|
430 |
-
|
431 |
-
# 處理前面的項目(用逗號分割)
|
432 |
-
front_items = [item.strip() for item in before_last_and.split(",")]
|
433 |
-
# 添加最後一個項目
|
434 |
-
all_items = front_items + [last_item.strip()]
|
435 |
-
elif " and " in obj_list:
|
436 |
-
# 處理簡單的 "A and B" 格式
|
437 |
-
all_items = [item.strip() for item in obj_list.split(" and ")]
|
438 |
-
else:
|
439 |
-
# 處理純逗號分隔的列表
|
440 |
-
all_items = [item.strip() for item in obj_list.split(",")]
|
441 |
-
|
442 |
-
# 4. 統計物件出現頻率
|
443 |
-
# 建立字典來記錄每個物件的出現次數
|
444 |
-
item_counts = {}
|
445 |
-
|
446 |
-
for item in all_items:
|
447 |
-
# 清理項目文字並過濾無效內容
|
448 |
-
item = item.strip()
|
449 |
-
# 過濾掉連接詞和空白項目
|
450 |
-
if item and item not in ["and", "with", ""]:
|
451 |
-
# 移除可能的冠詞前綴以便正確計數
|
452 |
-
# 例如 "a car" 和 "car" 應該被視為同一項目
|
453 |
-
clean_item = self._normalize_item_for_counting(item)
|
454 |
-
if clean_item not in item_counts:
|
455 |
-
item_counts[clean_item] = 0
|
456 |
-
item_counts[clean_item] += 1
|
457 |
-
|
458 |
-
# 5. 生成優化後的物件列表
|
459 |
-
if item_counts:
|
460 |
-
new_items = []
|
461 |
-
|
462 |
-
for item, count in item_counts.items():
|
463 |
-
if count > 1:
|
464 |
-
# 對於重複項目,使用數字加複數形式
|
465 |
-
plural_item = self._make_plural(item)
|
466 |
-
new_items.append(f"{count} {plural_item}")
|
467 |
-
else:
|
468 |
-
# 單個項目保持原樣
|
469 |
-
new_items.append(item)
|
470 |
-
|
471 |
-
# 6. 重新格式化物件列表
|
472 |
-
# 使用標準的英文列表連接格式
|
473 |
-
if len(new_items) == 1:
|
474 |
-
new_list = new_items[0]
|
475 |
-
elif len(new_items) == 2:
|
476 |
-
new_list = f"{new_items[0]} and {new_items[1]}"
|
477 |
-
else:
|
478 |
-
# 使用逗號格式確保清晰度
|
479 |
-
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
480 |
-
|
481 |
-
# 7. 在原文中替換優化後的列表
|
482 |
-
# 將原始的多餘列表替換為優化後的簡潔版本
|
483 |
-
description = description.replace(obj_list, new_list)
|
484 |
-
|
485 |
-
return description
|
486 |
-
|
487 |
-
except Exception as e:
|
488 |
-
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
489 |
-
return description
|
490 |
-
|
491 |
-
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
492 |
-
"""
|
493 |
-
移除描述中冗餘的空間限定詞
|
494 |
-
|
495 |
-
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
496 |
-
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
497 |
-
床這個物件本身就是室內環境。
|
498 |
-
|
499 |
-
Args:
|
500 |
-
description: 包含可能多餘空間描述的文本
|
501 |
-
|
502 |
-
Returns:
|
503 |
-
str: 移除多餘空間限定詞後的文本
|
504 |
-
"""
|
505 |
-
import re
|
506 |
-
|
507 |
-
# 定義常見的多餘空間表達模式
|
508 |
-
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
509 |
-
redundant_patterns = [
|
510 |
-
# 室內物件的多餘房間描述
|
511 |
-
(r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
|
512 |
-
# 廚房物件的多餘描述
|
513 |
-
(r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
|
514 |
-
# 浴室物件的多餘描述
|
515 |
-
(r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
|
516 |
-
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
517 |
-
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
518 |
-
]
|
519 |
-
|
520 |
-
for pattern, replacement in redundant_patterns:
|
521 |
-
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
522 |
-
|
523 |
-
return description
|
524 |
-
|
525 |
-
|
526 |
-
def _normalize_item_for_counting(self, item: str) -> str:
|
527 |
-
"""
|
528 |
-
正規化物件項目以便準確計數
|
529 |
-
|
530 |
-
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
531 |
-
確保 "a car" 和 "car" 被視為同一物件類型。
|
532 |
-
|
533 |
-
Args:
|
534 |
-
item: 原始物件項目字串
|
535 |
-
|
536 |
-
Returns:
|
537 |
-
str: 正規化後的物件項目
|
538 |
-
"""
|
539 |
-
# 移除常見的英文冠詞
|
540 |
-
item = re.sub(r'^(a|an|the)\s+', '', item.lower())
|
541 |
-
return item.strip()
|
542 |
-
|
543 |
-
def _make_plural(self, item: str) -> str:
|
544 |
-
"""
|
545 |
-
將單數名詞轉換為複數形式
|
546 |
-
|
547 |
-
Args:
|
548 |
-
item: 單數形式的名詞
|
549 |
-
|
550 |
-
Returns:
|
551 |
-
str: 複數形式的名詞
|
552 |
-
"""
|
553 |
-
# 重用已經實現的複數化邏輯
|
554 |
-
if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
|
555 |
-
return item[:-1] + "ies"
|
556 |
-
elif item.endswith(("s", "sh", "ch", "x", "z")):
|
557 |
-
return item + "es"
|
558 |
-
elif not item.endswith("s"):
|
559 |
-
return item + "s"
|
560 |
-
else:
|
561 |
-
return item
|
562 |
|
563 |
def generate_dynamic_everyday_description(self,
|
564 |
detected_objects: List[Dict],
|
@@ -586,6 +189,7 @@ class ObjectDescriptionGenerator:
|
|
586 |
try:
|
587 |
description_segments = []
|
588 |
image_width, image_height = image_dimensions if image_dimensions else (None, None)
|
|
|
589 |
|
590 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
591 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
@@ -625,8 +229,6 @@ class ObjectDescriptionGenerator:
|
|
625 |
else:
|
626 |
description_segments.append("Within this setting, no specific objects were clearly identified.")
|
627 |
else:
|
628 |
-
objects_by_class: Dict[str, List[Dict]] = {}
|
629 |
-
|
630 |
# 使用置信度過濾
|
631 |
confident_objects = [obj for obj in detected_objects
|
632 |
if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
|
@@ -642,172 +244,29 @@ class ObjectDescriptionGenerator:
|
|
642 |
else:
|
643 |
description_segments.append(no_confident_obj_msg.lower().capitalize())
|
644 |
else:
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
avg_confidence = stats.get("avg_confidence", 0)
|
650 |
-
|
651 |
-
# 動態調整置信度閾值
|
652 |
-
dynamic_threshold = self.confidence_threshold_for_description
|
653 |
-
if class_name in ["potted plant", "vase", "clock", "book"]:
|
654 |
-
dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
|
655 |
-
elif count >= 3:
|
656 |
-
dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
|
657 |
-
|
658 |
-
if count > 0 and avg_confidence >= dynamic_threshold:
|
659 |
-
matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
|
660 |
-
if not matching_objects:
|
661 |
-
matching_objects = [obj for obj in detected_objects
|
662 |
-
if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
|
663 |
-
|
664 |
-
if matching_objects:
|
665 |
-
actual_count = min(stats["count"], len(matching_objects))
|
666 |
-
objects_by_class[class_name] = matching_objects[:actual_count]
|
667 |
-
else:
|
668 |
-
# 備用邏輯,同樣使用動態閾值
|
669 |
-
for obj in confident_objects:
|
670 |
-
name = obj.get("class_name", "unknown object")
|
671 |
-
if name == "unknown object" or not name:
|
672 |
-
continue
|
673 |
-
if name not in objects_by_class:
|
674 |
-
objects_by_class[name] = []
|
675 |
-
objects_by_class[name].append(obj)
|
676 |
-
|
677 |
-
print(f"DEBUG: Before spatial deduplication:")
|
678 |
-
for class_name in ["car", "traffic light", "person", "handbag"]:
|
679 |
-
if class_name in objects_by_class:
|
680 |
-
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
|
681 |
|
682 |
if not objects_by_class:
|
683 |
description_segments.append("No common objects were confidently identified for detailed description.")
|
684 |
else:
|
685 |
-
#
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
priority = 2
|
701 |
-
elif count >= 3:
|
702 |
-
priority = max(1, priority - 1)
|
703 |
-
elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
|
704 |
-
priority = 2
|
705 |
-
|
706 |
-
avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
|
707 |
-
quantity_bonus = min(count / 5.0, 1.0)
|
708 |
-
|
709 |
-
return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
|
710 |
-
|
711 |
-
# remove duplicate
|
712 |
-
deduplicated_objects_by_class = {}
|
713 |
-
processed_positions = []
|
714 |
-
|
715 |
-
for class_name, group_of_objects in objects_by_class.items():
|
716 |
-
unique_objects = []
|
717 |
-
|
718 |
-
for obj in group_of_objects:
|
719 |
-
obj_position = obj.get("normalized_center", [0.5, 0.5])
|
720 |
-
is_duplicate = False
|
721 |
-
|
722 |
-
for processed_pos in processed_positions:
|
723 |
-
position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
|
724 |
-
if position_distance < 0.15:
|
725 |
-
is_duplicate = True
|
726 |
-
break
|
727 |
-
|
728 |
-
if not is_duplicate:
|
729 |
-
unique_objects.append(obj)
|
730 |
-
processed_positions.append(obj_position)
|
731 |
-
|
732 |
-
if unique_objects:
|
733 |
-
deduplicated_objects_by_class[class_name] = unique_objects
|
734 |
-
|
735 |
-
objects_by_class = deduplicated_objects_by_class
|
736 |
-
print(f"DEBUG: After spatial deduplication:")
|
737 |
-
for class_name in ["car", "traffic light", "person", "handbag"]:
|
738 |
-
if class_name in objects_by_class:
|
739 |
-
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
|
740 |
-
|
741 |
-
sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
|
742 |
-
|
743 |
-
object_clauses = []
|
744 |
-
|
745 |
-
for class_name, group_of_objects in sorted_object_groups:
|
746 |
-
count = len(group_of_objects)
|
747 |
-
if class_name in ["car", "traffic light", "person", "handbag"]:
|
748 |
-
print(f"DEBUG: Final count for {class_name}: {count}")
|
749 |
-
if count == 0:
|
750 |
-
continue
|
751 |
-
|
752 |
-
# 標準化class name
|
753 |
-
normalized_class_name = self._normalize_object_class_name(class_name)
|
754 |
-
|
755 |
-
# 使用統計信息確保準確的數量描述
|
756 |
-
if object_statistics and class_name in object_statistics:
|
757 |
-
actual_count = object_statistics[class_name]["count"]
|
758 |
-
formatted_name_with_exact_count = self._format_object_count_description(
|
759 |
-
normalized_class_name,
|
760 |
-
actual_count,
|
761 |
-
scene_type=scene_type
|
762 |
-
)
|
763 |
-
else:
|
764 |
-
formatted_name_with_exact_count = self._format_object_count_description(
|
765 |
-
normalized_class_name,
|
766 |
-
count,
|
767 |
-
scene_type=scene_type
|
768 |
-
)
|
769 |
-
|
770 |
-
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
771 |
-
continue
|
772 |
-
|
773 |
-
# 確定群組的集體位置
|
774 |
-
location_description_suffix = ""
|
775 |
-
if count == 1:
|
776 |
-
spatial_desc = self.get_spatial_description(group_of_objects[0], image_width, image_height, self.region_analyzer)
|
777 |
-
if spatial_desc:
|
778 |
-
location_description_suffix = f"is {spatial_desc}"
|
779 |
-
else:
|
780 |
-
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
781 |
-
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
782 |
-
if not valid_regions:
|
783 |
-
location_description_suffix = "is positioned in the scene"
|
784 |
-
elif len(valid_regions) == 1:
|
785 |
-
spatial_desc = self.get_spatial_description_phrase(valid_regions[0])
|
786 |
-
location_description_suffix = f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
|
787 |
-
elif len(valid_regions) == 2:
|
788 |
-
clean_region1 = valid_regions[0].replace('_', ' ')
|
789 |
-
clean_region2 = valid_regions[1].replace('_', ' ')
|
790 |
-
location_description_suffix = f"is mainly across the {clean_region1} and {clean_region2} areas"
|
791 |
-
else:
|
792 |
-
location_description_suffix = "is distributed in various parts of the scene"
|
793 |
-
else:
|
794 |
-
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
795 |
-
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
796 |
-
if not valid_regions:
|
797 |
-
location_description_suffix = "are visible in the scene"
|
798 |
-
elif len(valid_regions) == 1:
|
799 |
-
clean_region = valid_regions[0].replace('_', ' ')
|
800 |
-
location_description_suffix = f"are primarily in the {clean_region} area"
|
801 |
-
elif len(valid_regions) == 2:
|
802 |
-
clean_region1 = valid_regions[0].replace('_', ' ')
|
803 |
-
clean_region2 = valid_regions[1].replace('_', ' ')
|
804 |
-
location_description_suffix = f"are mainly across the {clean_region1} and {clean_region2} areas"
|
805 |
-
else:
|
806 |
-
location_description_suffix = "are distributed in various parts of the scene"
|
807 |
-
|
808 |
-
# 首字母大寫
|
809 |
-
formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
|
810 |
-
object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
|
811 |
|
812 |
if object_clauses:
|
813 |
if not description_segments:
|
@@ -845,7 +304,7 @@ class ObjectDescriptionGenerator:
|
|
845 |
raw_description += "."
|
846 |
|
847 |
# 移除重複性和不適當的描述詞彙
|
848 |
-
raw_description = self.
|
849 |
|
850 |
if not raw_description or len(raw_description.strip()) < 20:
|
851 |
if 'confident_objects' in locals() and confident_objects:
|
@@ -860,586 +319,6 @@ class ObjectDescriptionGenerator:
|
|
860 |
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
861 |
raise ObjectDescriptionError(error_msg) from e
|
862 |
|
863 |
-
def _remove_repetitive_descriptors(self, description: str) -> str:
|
864 |
-
"""
|
865 |
-
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
866 |
-
|
867 |
-
Args:
|
868 |
-
description: 原始描述文本
|
869 |
-
|
870 |
-
Returns:
|
871 |
-
str: 清理後的描述文本
|
872 |
-
"""
|
873 |
-
try:
|
874 |
-
import re
|
875 |
-
|
876 |
-
# 定義需要移除或替換的模式
|
877 |
-
cleanup_patterns = [
|
878 |
-
# 移除 "identical" 描述模式
|
879 |
-
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
880 |
-
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
881 |
-
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
882 |
-
|
883 |
-
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
884 |
-
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
885 |
-
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
886 |
-
(r'\bcomprehensive display of\b', 'display of'),
|
887 |
-
|
888 |
-
# 簡化過度描述性的短語
|
889 |
-
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
890 |
-
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
891 |
-
]
|
892 |
-
|
893 |
-
processed_description = description
|
894 |
-
for pattern, replacement in cleanup_patterns:
|
895 |
-
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
896 |
-
|
897 |
-
# 進一步清理可能的多餘空格
|
898 |
-
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
899 |
-
|
900 |
-
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
901 |
-
return processed_description
|
902 |
-
|
903 |
-
except Exception as e:
|
904 |
-
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
905 |
-
return description
|
906 |
-
|
907 |
-
def _format_object_count_description(self, class_name: str, count: int,
|
908 |
-
scene_type: Optional[str] = None,
|
909 |
-
detected_objects: Optional[List[Dict]] = None,
|
910 |
-
avg_confidence: float = 0.0) -> str:
|
911 |
-
"""
|
912 |
-
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
913 |
-
|
914 |
-
這個方法是整個物件描述系統的核心,它將多個子功能整合在一起:
|
915 |
-
1. 數字到文字的轉換(避免阿拉伯數字)
|
916 |
-
2. 基於場景的材質推斷
|
917 |
-
3. 空間排列模式的描述
|
918 |
-
4. 語境化的物件描述
|
919 |
-
|
920 |
-
Args:
|
921 |
-
class_name: 標準化後的類別名稱
|
922 |
-
count: 物件數量
|
923 |
-
scene_type: 場景類型,用於語境化描述
|
924 |
-
detected_objects: 該類型的所有檢測物件,用於空間分析
|
925 |
-
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
926 |
-
|
927 |
-
Returns:
|
928 |
-
str: 完整的格式化數量描述
|
929 |
-
"""
|
930 |
-
try:
|
931 |
-
if count <= 0:
|
932 |
-
return ""
|
933 |
-
|
934 |
-
# 獲取基礎的複數形式
|
935 |
-
plural_form = self._get_plural_form(class_name)
|
936 |
-
|
937 |
-
# 單數情況的處理
|
938 |
-
if count == 1:
|
939 |
-
return self._format_single_object_description(class_name, scene_type,
|
940 |
-
detected_objects, avg_confidence)
|
941 |
-
|
942 |
-
# 複數情況的處理
|
943 |
-
return self._format_multiple_objects_description(class_name, count, plural_form,
|
944 |
-
scene_type, detected_objects, avg_confidence)
|
945 |
-
|
946 |
-
except Exception as e:
|
947 |
-
self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
|
948 |
-
return f"{count} {class_name}s" if count > 1 else class_name
|
949 |
-
|
950 |
-
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
951 |
-
detected_objects: Optional[List[Dict]],
|
952 |
-
avg_confidence: float) -> str:
|
953 |
-
"""
|
954 |
-
處理單個物件的描述生成
|
955 |
-
|
956 |
-
對於單個物件,我們重點在於通過材質推斷和位置描述來豐富描述內容,
|
957 |
-
避免簡單的 "a chair" 這樣的描述,而是生成 "a wooden dining chair" 這樣的表達
|
958 |
-
|
959 |
-
Args:
|
960 |
-
class_name: 物件類別名稱
|
961 |
-
scene_type: 場景類型
|
962 |
-
detected_objects: 檢測物件列表
|
963 |
-
avg_confidence: 平均置信度
|
964 |
-
|
965 |
-
Returns:
|
966 |
-
str: 單個物件的完整描述
|
967 |
-
"""
|
968 |
-
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
969 |
-
|
970 |
-
# 獲取材質描述符
|
971 |
-
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
972 |
-
|
973 |
-
# 獲取位置或特徵描述符
|
974 |
-
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
975 |
-
|
976 |
-
# 組合描述
|
977 |
-
descriptors = []
|
978 |
-
if material_descriptor:
|
979 |
-
descriptors.append(material_descriptor)
|
980 |
-
if feature_descriptor:
|
981 |
-
descriptors.append(feature_descriptor)
|
982 |
-
|
983 |
-
if descriptors:
|
984 |
-
return f"{article} {' '.join(descriptors)} {class_name}"
|
985 |
-
else:
|
986 |
-
return f"{article} {class_name}"
|
987 |
-
|
988 |
-
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
989 |
-
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
990 |
-
avg_confidence: float) -> str:
|
991 |
-
"""
|
992 |
-
處理多個物件的描述生成
|
993 |
-
|
994 |
-
對於多個物件,我們的重點是:
|
995 |
-
1. 將數字轉換為文字表達
|
996 |
-
2. 分析空間排列模式
|
997 |
-
3. 添加適當的材質或功能描述
|
998 |
-
4. 生成自然流暢的描述
|
999 |
-
|
1000 |
-
Args:
|
1001 |
-
class_name: 物件類別名稱
|
1002 |
-
count: 物件數量
|
1003 |
-
plural_form: 複數形式
|
1004 |
-
scene_type: 場景類型
|
1005 |
-
detected_objects: 檢測物件列表
|
1006 |
-
avg_confidence: 平均置信度
|
1007 |
-
|
1008 |
-
Returns:
|
1009 |
-
str: 多個物件的完整描述
|
1010 |
-
"""
|
1011 |
-
# 數字到文字的轉換映射
|
1012 |
-
number_words = {
|
1013 |
-
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
1014 |
-
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
1015 |
-
11: "eleven", 12: "twelve"
|
1016 |
-
}
|
1017 |
-
|
1018 |
-
# 確定基礎數量表達
|
1019 |
-
if count in number_words:
|
1020 |
-
count_expression = number_words[count]
|
1021 |
-
elif count <= 20:
|
1022 |
-
count_expression = "several"
|
1023 |
-
else:
|
1024 |
-
count_expression = "numerous"
|
1025 |
-
|
1026 |
-
# 獲取材質或功能描述符
|
1027 |
-
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
1028 |
-
|
1029 |
-
# 獲取空間排列描述
|
1030 |
-
spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
|
1031 |
-
detected_objects, count)
|
1032 |
-
|
1033 |
-
# 組合最終描述
|
1034 |
-
descriptors = []
|
1035 |
-
if material_descriptor:
|
1036 |
-
descriptors.append(material_descriptor)
|
1037 |
-
|
1038 |
-
# 構建基礎描述
|
1039 |
-
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
1040 |
-
|
1041 |
-
# 添加空間排列信息
|
1042 |
-
if spatial_descriptor:
|
1043 |
-
return f"{base_description} {spatial_descriptor}"
|
1044 |
-
else:
|
1045 |
-
return base_description
|
1046 |
-
|
1047 |
-
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
1048 |
-
avg_confidence: float) -> Optional[str]:
|
1049 |
-
"""
|
1050 |
-
基於場景語境和置信度進行材質推斷
|
1051 |
-
|
1052 |
-
這個方法實現了智能的材質推斷,它不依賴複雜的圖像分析,
|
1053 |
-
而是基於常識和場景邏輯來推斷最可能的材質描述
|
1054 |
-
|
1055 |
-
Args:
|
1056 |
-
class_name: 物件類別名稱
|
1057 |
-
scene_type: 場景類型
|
1058 |
-
avg_confidence: 檢測置信度,影響推斷的保守程度
|
1059 |
-
|
1060 |
-
Returns:
|
1061 |
-
Optional[str]: 材質描述符,如果無法推斷則返回None
|
1062 |
-
"""
|
1063 |
-
# 只有在置信度足夠高時才進行材質推斷
|
1064 |
-
if avg_confidence < 0.5:
|
1065 |
-
return None
|
1066 |
-
|
1067 |
-
# 餐廳和用餐相關場景
|
1068 |
-
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
1069 |
-
material_mapping = {
|
1070 |
-
"chair": "wooden" if avg_confidence > 0.7 else None,
|
1071 |
-
"dining table": "wooden",
|
1072 |
-
"couch": "upholstered",
|
1073 |
-
"vase": "decorative"
|
1074 |
-
}
|
1075 |
-
return material_mapping.get(class_name)
|
1076 |
-
|
1077 |
-
# 辦公場景
|
1078 |
-
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
1079 |
-
material_mapping = {
|
1080 |
-
"chair": "office",
|
1081 |
-
"dining table": "conference", # 在辦公環境中,餐桌通常是會議桌
|
1082 |
-
"laptop": "modern",
|
1083 |
-
"book": "reference"
|
1084 |
-
}
|
1085 |
-
return material_mapping.get(class_name)
|
1086 |
-
|
1087 |
-
# 客廳場景
|
1088 |
-
elif scene_type and scene_type in ["living_room"]:
|
1089 |
-
material_mapping = {
|
1090 |
-
"couch": "comfortable",
|
1091 |
-
"chair": "accent",
|
1092 |
-
"tv": "large",
|
1093 |
-
"vase": "decorative"
|
1094 |
-
}
|
1095 |
-
return material_mapping.get(class_name)
|
1096 |
-
|
1097 |
-
# 室外場景
|
1098 |
-
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
1099 |
-
material_mapping = {
|
1100 |
-
"car": "parked",
|
1101 |
-
"person": "walking",
|
1102 |
-
"bicycle": "stationed"
|
1103 |
-
}
|
1104 |
-
return material_mapping.get(class_name)
|
1105 |
-
|
1106 |
-
# 如果沒有特定的場景映射,返回通用描述符
|
1107 |
-
generic_mapping = {
|
1108 |
-
"chair": "comfortable",
|
1109 |
-
"dining table": "sturdy",
|
1110 |
-
"car": "parked",
|
1111 |
-
"person": "present"
|
1112 |
-
}
|
1113 |
-
|
1114 |
-
return generic_mapping.get(class_name)
|
1115 |
-
|
1116 |
-
def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
|
1117 |
-
detected_objects: Optional[List[Dict]],
|
1118 |
-
count: int) -> Optional[str]:
|
1119 |
-
"""
|
1120 |
-
分析物件的空間排列模式並生成相應描述
|
1121 |
-
|
1122 |
-
這個方法通過分析物件的位置分布來判斷排列模式,
|
1123 |
-
然後根據物件類型和場景生成適當的空間描述
|
1124 |
-
|
1125 |
-
Args:
|
1126 |
-
class_name: 物件類別名稱
|
1127 |
-
scene_type: 場景類型
|
1128 |
-
detected_objects: 該類型的所有檢測物件
|
1129 |
-
count: 物件數量
|
1130 |
-
|
1131 |
-
Returns:
|
1132 |
-
Optional[str]: 空間排列描述,如果無法分析則返回None
|
1133 |
-
"""
|
1134 |
-
if not detected_objects or len(detected_objects) < 2:
|
1135 |
-
return None
|
1136 |
-
|
1137 |
-
try:
|
1138 |
-
# 提取物件的標準化位置
|
1139 |
-
positions = []
|
1140 |
-
for obj in detected_objects:
|
1141 |
-
center = obj.get("normalized_center", [0.5, 0.5])
|
1142 |
-
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
1143 |
-
positions.append(center)
|
1144 |
-
|
1145 |
-
if len(positions) < 2:
|
1146 |
-
return None
|
1147 |
-
|
1148 |
-
# 分析排列模式
|
1149 |
-
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
1150 |
-
|
1151 |
-
# 根據物件類型和場景生成描述
|
1152 |
-
return self._generate_arrangement_description(class_name, scene_type,
|
1153 |
-
arrangement_pattern, count)
|
1154 |
-
|
1155 |
-
except Exception as e:
|
1156 |
-
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
1157 |
-
return None
|
1158 |
-
|
1159 |
-
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
1160 |
-
"""
|
1161 |
-
分析位置點的排列模式
|
1162 |
-
|
1163 |
-
這個方法使用簡單的幾何分析來判斷物件的排列類型,
|
1164 |
-
幫助我們理解物件在空間中的組織方式
|
1165 |
-
|
1166 |
-
Args:
|
1167 |
-
positions: 標準化的位置座標列表
|
1168 |
-
|
1169 |
-
Returns:
|
1170 |
-
str: 排列模式類型(linear, clustered, scattered, circular等)
|
1171 |
-
"""
|
1172 |
-
import numpy as np
|
1173 |
-
|
1174 |
-
if len(positions) < 2:
|
1175 |
-
return "single"
|
1176 |
-
|
1177 |
-
# 轉換為numpy陣列便於計算
|
1178 |
-
pos_array = np.array(positions)
|
1179 |
-
|
1180 |
-
# 計算位置的分布特徵
|
1181 |
-
x_coords = pos_array[:, 0]
|
1182 |
-
y_coords = pos_array[:, 1]
|
1183 |
-
|
1184 |
-
# 分析x和y方向的變異程度
|
1185 |
-
x_variance = np.var(x_coords)
|
1186 |
-
y_variance = np.var(y_coords)
|
1187 |
-
|
1188 |
-
# 計算物件間的平均距離
|
1189 |
-
distances = []
|
1190 |
-
for i in range(len(positions)):
|
1191 |
-
for j in range(i + 1, len(positions)):
|
1192 |
-
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
1193 |
-
(positions[i][1] - positions[j][1])**2)
|
1194 |
-
distances.append(dist)
|
1195 |
-
|
1196 |
-
avg_distance = np.mean(distances) if distances else 0
|
1197 |
-
distance_variance = np.var(distances) if distances else 0
|
1198 |
-
|
1199 |
-
# 判斷排列模式
|
1200 |
-
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
1201 |
-
return "circular"
|
1202 |
-
elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
|
1203 |
-
return "linear"
|
1204 |
-
elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
|
1205 |
-
return "clustered"
|
1206 |
-
elif avg_distance > 0.6: # 物件分散
|
1207 |
-
return "scattered"
|
1208 |
-
elif distance_variance < 0.03: # 距離一致,可能是規則排列
|
1209 |
-
return "regular"
|
1210 |
-
else:
|
1211 |
-
return "distributed"
|
1212 |
-
|
1213 |
-
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
1214 |
-
"""
|
1215 |
-
檢查位置是否形成圓形或環形排列
|
1216 |
-
|
1217 |
-
Args:
|
1218 |
-
positions: 位置座標列表
|
1219 |
-
|
1220 |
-
Returns:
|
1221 |
-
bool: 是否為圓形排列
|
1222 |
-
"""
|
1223 |
-
import numpy as np
|
1224 |
-
|
1225 |
-
if len(positions) < 4:
|
1226 |
-
return False
|
1227 |
-
|
1228 |
-
try:
|
1229 |
-
pos_array = np.array(positions)
|
1230 |
-
|
1231 |
-
# 計算中心點
|
1232 |
-
center_x = np.mean(pos_array[:, 0])
|
1233 |
-
center_y = np.mean(pos_array[:, 1])
|
1234 |
-
|
1235 |
-
# 計算每個點到中心的距離
|
1236 |
-
distances_to_center = []
|
1237 |
-
for pos in positions:
|
1238 |
-
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
1239 |
-
distances_to_center.append(dist)
|
1240 |
-
|
1241 |
-
# 如果所有距離都相近,可能是圓形排列
|
1242 |
-
distance_variance = np.var(distances_to_center)
|
1243 |
-
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
1244 |
-
|
1245 |
-
except:
|
1246 |
-
return False
|
1247 |
-
|
1248 |
-
def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
|
1249 |
-
arrangement_pattern: str, count: int) -> Optional[str]:
|
1250 |
-
"""
|
1251 |
-
根據物件類型、場景和排列模式生成空間描述
|
1252 |
-
|
1253 |
-
這個方法將抽象的排列模式轉換為自然語言描述,
|
1254 |
-
並根據具體的物件類型和場景語境進行定制
|
1255 |
-
|
1256 |
-
Args:
|
1257 |
-
class_name: 物件類別名稱
|
1258 |
-
scene_type: 場景類型
|
1259 |
-
arrangement_pattern: 排列模式
|
1260 |
-
count: 物件數量
|
1261 |
-
|
1262 |
-
Returns:
|
1263 |
-
Optional[str]: 生成的空間排列描述
|
1264 |
-
"""
|
1265 |
-
# 基於物件類型的描述模板
|
1266 |
-
arrangement_templates = {
|
1267 |
-
"chair": {
|
1268 |
-
"linear": "arranged in a row",
|
1269 |
-
"clustered": "grouped together for conversation",
|
1270 |
-
"circular": "arranged around the table",
|
1271 |
-
"scattered": "positioned throughout the space",
|
1272 |
-
"regular": "evenly spaced",
|
1273 |
-
"distributed": "thoughtfully positioned"
|
1274 |
-
},
|
1275 |
-
"dining table": {
|
1276 |
-
"linear": "aligned to create a unified dining space",
|
1277 |
-
"clustered": "grouped to form intimate dining areas",
|
1278 |
-
"scattered": "distributed to optimize space flow",
|
1279 |
-
"regular": "systematically positioned",
|
1280 |
-
"distributed": "strategically placed"
|
1281 |
-
},
|
1282 |
-
"car": {
|
1283 |
-
"linear": "parked in sequence",
|
1284 |
-
"clustered": "grouped in the parking area",
|
1285 |
-
"scattered": "distributed throughout the lot",
|
1286 |
-
"regular": "neatly parked",
|
1287 |
-
"distributed": "positioned across the area"
|
1288 |
-
},
|
1289 |
-
"person": {
|
1290 |
-
"linear": "moving in a line",
|
1291 |
-
"clustered": "gathered together",
|
1292 |
-
"circular": "forming a circle",
|
1293 |
-
"scattered": "spread across the area",
|
1294 |
-
"distributed": "positioned throughout the scene"
|
1295 |
-
}
|
1296 |
-
}
|
1297 |
-
|
1298 |
-
# 獲取對應的描述模板
|
1299 |
-
if class_name in arrangement_templates:
|
1300 |
-
template_dict = arrangement_templates[class_name]
|
1301 |
-
base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
|
1302 |
-
else:
|
1303 |
-
# 通用的排列描述
|
1304 |
-
generic_templates = {
|
1305 |
-
"linear": "arranged in a line",
|
1306 |
-
"clustered": "grouped together",
|
1307 |
-
"circular": "arranged in a circular pattern",
|
1308 |
-
"scattered": "distributed across the space",
|
1309 |
-
"regular": "evenly positioned",
|
1310 |
-
"distributed": "thoughtfully placed"
|
1311 |
-
}
|
1312 |
-
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
1313 |
-
|
1314 |
-
return base_description
|
1315 |
-
|
1316 |
-
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
1317 |
-
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
1318 |
-
"""
|
1319 |
-
為單個物件生成特徵描述符
|
1320 |
-
|
1321 |
-
當只有一個物件時,我們可以提供更具體的位置或功能描述
|
1322 |
-
|
1323 |
-
Args:
|
1324 |
-
class_name: 物件類別名稱
|
1325 |
-
scene_type: 場景類型
|
1326 |
-
detected_objects: 檢測物件(單個)
|
1327 |
-
|
1328 |
-
Returns:
|
1329 |
-
Optional[str]: 特徵描述符
|
1330 |
-
"""
|
1331 |
-
if not detected_objects or len(detected_objects) != 1:
|
1332 |
-
return None
|
1333 |
-
|
1334 |
-
obj = detected_objects[0]
|
1335 |
-
region = obj.get("region", "").lower()
|
1336 |
-
|
1337 |
-
# 基於位置的描述
|
1338 |
-
if "center" in region:
|
1339 |
-
if class_name == "dining table":
|
1340 |
-
return "central"
|
1341 |
-
elif class_name == "chair":
|
1342 |
-
return "centrally placed"
|
1343 |
-
elif "corner" in region or "left" in region or "right" in region:
|
1344 |
-
return "positioned"
|
1345 |
-
|
1346 |
-
# 基於場景的功能描述
|
1347 |
-
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
1348 |
-
if class_name == "chair":
|
1349 |
-
return "dining"
|
1350 |
-
elif class_name == "vase":
|
1351 |
-
return "decorative"
|
1352 |
-
|
1353 |
-
return None
|
1354 |
-
|
1355 |
-
def _get_plural_form(self, word: str) -> str:
|
1356 |
-
"""
|
1357 |
-
獲取詞彙的複數形式
|
1358 |
-
|
1359 |
-
Args:
|
1360 |
-
word: 單數詞彙
|
1361 |
-
|
1362 |
-
Returns:
|
1363 |
-
str: 複數形式
|
1364 |
-
"""
|
1365 |
-
try:
|
1366 |
-
# 特殊複數形式
|
1367 |
-
irregular_plurals = {
|
1368 |
-
'person': 'people',
|
1369 |
-
'child': 'children',
|
1370 |
-
'foot': 'feet',
|
1371 |
-
'tooth': 'teeth',
|
1372 |
-
'mouse': 'mice',
|
1373 |
-
'man': 'men',
|
1374 |
-
'woman': 'women'
|
1375 |
-
}
|
1376 |
-
|
1377 |
-
if word.lower() in irregular_plurals:
|
1378 |
-
return irregular_plurals[word.lower()]
|
1379 |
-
|
1380 |
-
# 規則複數形式
|
1381 |
-
if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
1382 |
-
return word + 'es'
|
1383 |
-
elif word.endswith('y') and word[-2] not in 'aeiou':
|
1384 |
-
return word[:-1] + 'ies'
|
1385 |
-
elif word.endswith('f'):
|
1386 |
-
return word[:-1] + 'ves'
|
1387 |
-
elif word.endswith('fe'):
|
1388 |
-
return word[:-2] + 'ves'
|
1389 |
-
else:
|
1390 |
-
return word + 's'
|
1391 |
-
|
1392 |
-
except Exception as e:
|
1393 |
-
self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
|
1394 |
-
return word + 's'
|
1395 |
-
|
1396 |
-
def _normalize_object_class_name(self, class_name: str) -> str:
|
1397 |
-
"""
|
1398 |
-
標準化物件類別名稱,確保輸出自然語言格式
|
1399 |
-
|
1400 |
-
Args:
|
1401 |
-
class_name: 原始類別名稱
|
1402 |
-
|
1403 |
-
Returns:
|
1404 |
-
str: 標準化後的類別名稱
|
1405 |
-
"""
|
1406 |
-
try:
|
1407 |
-
if not class_name or not isinstance(class_name, str):
|
1408 |
-
return "object"
|
1409 |
-
|
1410 |
-
# 移除可能的技術性前綴或後綴
|
1411 |
-
import re
|
1412 |
-
normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
|
1413 |
-
normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
|
1414 |
-
|
1415 |
-
# 將下劃線和連字符替換為空格
|
1416 |
-
normalized = normalized.replace('_', ' ').replace('-', ' ')
|
1417 |
-
|
1418 |
-
# 移除多餘空格
|
1419 |
-
normalized = ' '.join(normalized.split())
|
1420 |
-
|
1421 |
-
# 特殊類別名稱的標準化映射
|
1422 |
-
class_name_mapping = {
|
1423 |
-
'traffic light': 'traffic light',
|
1424 |
-
'stop sign': 'stop sign',
|
1425 |
-
'fire hydrant': 'fire hydrant',
|
1426 |
-
'dining table': 'dining table',
|
1427 |
-
'potted plant': 'potted plant',
|
1428 |
-
'tv monitor': 'television',
|
1429 |
-
'cell phone': 'mobile phone',
|
1430 |
-
'wine glass': 'wine glass',
|
1431 |
-
'hot dog': 'hot dog',
|
1432 |
-
'teddy bear': 'teddy bear',
|
1433 |
-
'hair drier': 'hair dryer',
|
1434 |
-
'toothbrush': 'toothbrush'
|
1435 |
-
}
|
1436 |
-
|
1437 |
-
return class_name_mapping.get(normalized, normalized)
|
1438 |
-
|
1439 |
-
except Exception as e:
|
1440 |
-
self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
|
1441 |
-
return class_name if isinstance(class_name, str) else "object"
|
1442 |
-
|
1443 |
def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
1444 |
"""
|
1445 |
當模板不可用時生成基本詳細��息
|
@@ -1588,7 +467,7 @@ class ObjectDescriptionGenerator:
|
|
1588 |
furniture_names = []
|
1589 |
for obj in furniture_objects[:3]:
|
1590 |
raw_name = obj.get("class_name", "furniture")
|
1591 |
-
normalized_name = self.
|
1592 |
furniture_names.append(normalized_name)
|
1593 |
|
1594 |
unique_names = list(set(furniture_names))
|
@@ -1786,7 +665,6 @@ class ObjectDescriptionGenerator:
|
|
1786 |
return "functional area"
|
1787 |
|
1788 |
# 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
|
1789 |
-
import re
|
1790 |
base_name = re.sub(r'_\d+$', '', zone_name)
|
1791 |
|
1792 |
# 將下劃線替換為空格
|
@@ -1851,9 +729,16 @@ class ObjectDescriptionGenerator:
|
|
1851 |
old_value = getattr(self, key)
|
1852 |
setattr(self, key, value)
|
1853 |
self.logger.info(f"Updated {key}: {old_value} -> {value}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1854 |
else:
|
1855 |
self.logger.warning(f"Unknown configuration parameter: {key}")
|
1856 |
|
1857 |
except Exception as e:
|
1858 |
self.logger.error(f"Error updating configuration: {str(e)}")
|
1859 |
-
raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
|
|
|
4 |
from typing import Dict, List, Tuple, Optional, Any
|
5 |
import numpy as np
|
6 |
|
7 |
+
from prominence_calculator import ProminenceCalculator
|
8 |
+
from spatial_location_handler import SpatialLocationHandler
|
9 |
+
from text_optimizer import TextOptimizer
|
10 |
+
from object_group_processor import ObjectGroupProcessor
|
11 |
+
|
12 |
class ObjectDescriptionError(Exception):
|
13 |
"""物件描述生成過程中的自定義異常"""
|
14 |
pass
|
|
|
17 |
class ObjectDescriptionGenerator:
|
18 |
"""
|
19 |
物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
|
20 |
+
匯總於EnhancedSceneDescriber
|
21 |
|
22 |
該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
|
23 |
空間位置描述、物件列表格式化以及描述文本的優化。
|
24 |
+
|
25 |
+
作為 Facade 模式的實現,協調四個專門的子組件來完成複雜的描述生成任務。
|
26 |
"""
|
27 |
|
28 |
def __init__(self,
|
|
|
39 |
max_categories_to_return: 返回的物件類別最大數量
|
40 |
max_total_objects: 返回的物件總數上限
|
41 |
confidence_threshold_for_description: 用於描述的置信度閾值
|
42 |
+
region_analyzer: 可選的RegionAnalyzer實例
|
43 |
"""
|
44 |
self.logger = logging.getLogger(self.__class__.__name__)
|
45 |
|
|
|
49 |
self.confidence_threshold_for_description = confidence_threshold_for_description
|
50 |
self.region_analyzer = region_analyzer
|
51 |
|
52 |
+
# 初始化子組件
|
53 |
+
self.prominence_calculator = ProminenceCalculator(
|
54 |
+
min_prominence_score=self.min_prominence_score
|
55 |
+
)
|
56 |
+
|
57 |
+
self.spatial_handler = SpatialLocationHandler(
|
58 |
+
region_analyzer=self.region_analyzer
|
59 |
+
)
|
60 |
+
|
61 |
+
self.text_optimizer = TextOptimizer()
|
62 |
+
|
63 |
+
self.object_group_processor = ObjectGroupProcessor(
|
64 |
+
confidence_threshold_for_description=self.confidence_threshold_for_description,
|
65 |
+
spatial_handler=self.spatial_handler,
|
66 |
+
text_optimizer=self.text_optimizer
|
67 |
+
)
|
68 |
+
|
69 |
self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
|
70 |
"max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
|
71 |
min_prominence_score, max_categories_to_return,
|
|
|
85 |
Returns:
|
86 |
List[Dict]: 按重要性排序的物件列表
|
87 |
"""
|
88 |
+
return self.prominence_calculator.filter_prominent_objects(
|
89 |
+
detected_objects=detected_objects,
|
90 |
+
min_prominence_score=min_prominence_score,
|
91 |
+
max_categories_to_return=max_categories_to_return
|
92 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
95 |
"""
|
|
|
100 |
"""
|
101 |
try:
|
102 |
self.region_analyzer = region_analyzer
|
103 |
+
self.spatial_handler.set_region_analyzer(region_analyzer)
|
104 |
self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
|
105 |
except Exception as e:
|
106 |
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def format_object_list_for_description(self,
|
109 |
objects: List[Dict],
|
110 |
use_indefinite_article_for_one: bool = False,
|
|
|
122 |
Returns:
|
123 |
str: 格式化的物件描述字符串
|
124 |
"""
|
125 |
+
return self.text_optimizer.format_object_list_for_description(
|
126 |
+
objects=objects,
|
127 |
+
use_indefinite_article_for_one=use_indefinite_article_for_one,
|
128 |
+
count_threshold_for_generalization=count_threshold_for_generalization,
|
129 |
+
max_types_to_list=max_types_to_list
|
130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
133 |
image_height: Optional[int] = None,
|
|
|
144 |
Returns:
|
145 |
str: 空間描述字符串,空值region時返回空字串
|
146 |
"""
|
147 |
+
return self.spatial_handler.generate_spatial_description(
|
148 |
+
obj=obj,
|
149 |
+
image_width=image_width,
|
150 |
+
image_height=image_height,
|
151 |
+
region_analyzer=region_analyzer
|
152 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
def optimize_object_description(self, description: str) -> str:
|
155 |
"""
|
156 |
+
優化物件描述文本,消除多餘重複並改善表達流暢度
|
|
|
|
|
|
|
|
|
157 |
|
158 |
Args:
|
159 |
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
|
|
161 |
Returns:
|
162 |
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
163 |
"""
|
164 |
+
return self.text_optimizer.optimize_object_description(description)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
def generate_dynamic_everyday_description(self,
|
167 |
detected_objects: List[Dict],
|
|
|
189 |
try:
|
190 |
description_segments = []
|
191 |
image_width, image_height = image_dimensions if image_dimensions else (None, None)
|
192 |
+
scene_type = places365_info.get("scene", "") if places365_info else ""
|
193 |
|
194 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
195 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
|
|
229 |
else:
|
230 |
description_segments.append("Within this setting, no specific objects were clearly identified.")
|
231 |
else:
|
|
|
|
|
232 |
# 使用置信度過濾
|
233 |
confident_objects = [obj for obj in detected_objects
|
234 |
if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
|
|
|
244 |
else:
|
245 |
description_segments.append(no_confident_obj_msg.lower().capitalize())
|
246 |
else:
|
247 |
+
# 使用 ObjectGroupProcessor 處理物件分組和排序
|
248 |
+
objects_by_class = self.object_group_processor.group_objects_by_class(
|
249 |
+
confident_objects, object_statistics
|
250 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
if not objects_by_class:
|
253 |
description_segments.append("No common objects were confidently identified for detailed description.")
|
254 |
else:
|
255 |
+
# 移除重複物件
|
256 |
+
deduplicated_objects_by_class = self.object_group_processor.remove_duplicate_objects(
|
257 |
+
objects_by_class
|
258 |
+
)
|
259 |
+
|
260 |
+
# 排序物件組
|
261 |
+
sorted_object_groups = self.object_group_processor.sort_object_groups(
|
262 |
+
deduplicated_objects_by_class
|
263 |
+
)
|
264 |
+
|
265 |
+
# 生成物件描述子句
|
266 |
+
object_clauses = self.object_group_processor.generate_object_clauses(
|
267 |
+
sorted_object_groups, object_statistics, scene_type,
|
268 |
+
image_width, image_height, self.region_analyzer
|
269 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
if object_clauses:
|
272 |
if not description_segments:
|
|
|
304 |
raw_description += "."
|
305 |
|
306 |
# 移除重複性和不適當的描述詞彙
|
307 |
+
raw_description = self.text_optimizer.remove_repetitive_descriptors(raw_description)
|
308 |
|
309 |
if not raw_description or len(raw_description.strip()) < 20:
|
310 |
if 'confident_objects' in locals() and confident_objects:
|
|
|
319 |
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
320 |
raise ObjectDescriptionError(error_msg) from e
|
321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
323 |
"""
|
324 |
當模板不可用時生成基本詳細��息
|
|
|
467 |
furniture_names = []
|
468 |
for obj in furniture_objects[:3]:
|
469 |
raw_name = obj.get("class_name", "furniture")
|
470 |
+
normalized_name = self.text_optimizer.normalize_object_class_name(raw_name)
|
471 |
furniture_names.append(normalized_name)
|
472 |
|
473 |
unique_names = list(set(furniture_names))
|
|
|
665 |
return "functional area"
|
666 |
|
667 |
# 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
|
|
|
668 |
base_name = re.sub(r'_\d+$', '', zone_name)
|
669 |
|
670 |
# 將下劃線替換為空格
|
|
|
729 |
old_value = getattr(self, key)
|
730 |
setattr(self, key, value)
|
731 |
self.logger.info(f"Updated {key}: {old_value} -> {value}")
|
732 |
+
|
733 |
+
# 同步更新子組件的配置
|
734 |
+
if key == "min_prominence_score" and hasattr(self, 'prominence_calculator'):
|
735 |
+
self.prominence_calculator.min_prominence_score = value
|
736 |
+
elif key == "confidence_threshold_for_description" and hasattr(self, 'object_group_processor'):
|
737 |
+
self.object_group_processor.confidence_threshold_for_description = value
|
738 |
+
|
739 |
else:
|
740 |
self.logger.warning(f"Unknown configuration parameter: {key}")
|
741 |
|
742 |
except Exception as e:
|
743 |
self.logger.error(f"Error updating configuration: {str(e)}")
|
744 |
+
raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
|
object_group_processor.py
ADDED
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, List, Tuple, Optional, Any
|
3 |
+
|
4 |
+
class ObjectGroupProcessor:
|
5 |
+
"""
|
6 |
+
物件組處理器 - 專門處理物件分組、排序和子句生成的邏輯
|
7 |
+
負責物件按類別分組、重複物件檢測移除、物件組優先級排序以及描述子句的生成
|
8 |
+
"""
|
9 |
+
|
10 |
+
def __init__(self, confidence_threshold_for_description: float = 0.25,
|
11 |
+
spatial_handler: Optional[Any] = None,
|
12 |
+
text_optimizer: Optional[Any] = None):
|
13 |
+
"""
|
14 |
+
初始化物件組處理器
|
15 |
+
|
16 |
+
Args:
|
17 |
+
confidence_threshold_for_description: 用於描述的置信度閾值
|
18 |
+
spatial_handler: 空間位置處理器實例
|
19 |
+
text_optimizer: 文本優化器實例
|
20 |
+
"""
|
21 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
22 |
+
self.confidence_threshold_for_description = confidence_threshold_for_description
|
23 |
+
self.spatial_handler = spatial_handler
|
24 |
+
self.text_optimizer = text_optimizer
|
25 |
+
|
26 |
+
def group_objects_by_class(self, confident_objects: List[Dict],
|
27 |
+
object_statistics: Optional[Dict]) -> Dict[str, List[Dict]]:
|
28 |
+
"""
|
29 |
+
按類別分組物件
|
30 |
+
|
31 |
+
Args:
|
32 |
+
confident_objects: 置信度過濾後的物件
|
33 |
+
object_statistics: 物件統計信息
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
Dict[str, List[Dict]]: 按類別分組的物件
|
37 |
+
"""
|
38 |
+
objects_by_class = {}
|
39 |
+
|
40 |
+
if object_statistics:
|
41 |
+
# 使用預計算的統計信息,採用動態的信心度
|
42 |
+
for class_name, stats in object_statistics.items():
|
43 |
+
count = stats.get("count", 0)
|
44 |
+
avg_confidence = stats.get("avg_confidence", 0)
|
45 |
+
|
46 |
+
# 動態調整置信度閾值
|
47 |
+
dynamic_threshold = self.confidence_threshold_for_description
|
48 |
+
if class_name in ["potted plant", "vase", "clock", "book"]:
|
49 |
+
dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
|
50 |
+
elif count >= 3:
|
51 |
+
dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
|
52 |
+
|
53 |
+
if count > 0 and avg_confidence >= dynamic_threshold:
|
54 |
+
matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
|
55 |
+
if not matching_objects:
|
56 |
+
matching_objects = [obj for obj in confident_objects
|
57 |
+
if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
|
58 |
+
|
59 |
+
if matching_objects:
|
60 |
+
actual_count = min(stats["count"], len(matching_objects))
|
61 |
+
objects_by_class[class_name] = matching_objects[:actual_count]
|
62 |
+
|
63 |
+
# Debug logging for specific classes
|
64 |
+
if class_name in ["car", "traffic light", "person", "handbag"]:
|
65 |
+
print(f"DEBUG: Before spatial deduplication:")
|
66 |
+
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
|
67 |
+
else:
|
68 |
+
# 備用邏輯,同樣使用動態閾值
|
69 |
+
for obj in confident_objects:
|
70 |
+
name = obj.get("class_name", "unknown object")
|
71 |
+
if name == "unknown object" or not name:
|
72 |
+
continue
|
73 |
+
if name not in objects_by_class:
|
74 |
+
objects_by_class[name] = []
|
75 |
+
objects_by_class[name].append(obj)
|
76 |
+
|
77 |
+
return objects_by_class
|
78 |
+
|
79 |
+
def remove_duplicate_objects(self, objects_by_class: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
|
80 |
+
"""
|
81 |
+
移除重複物件
|
82 |
+
|
83 |
+
Args:
|
84 |
+
objects_by_class: 按類別分組的物件
|
85 |
+
|
86 |
+
Returns:
|
87 |
+
Dict[str, List[Dict]]: 去重後的物件
|
88 |
+
"""
|
89 |
+
deduplicated_objects_by_class = {}
|
90 |
+
processed_positions = []
|
91 |
+
|
92 |
+
for class_name, group_of_objects in objects_by_class.items():
|
93 |
+
unique_objects = []
|
94 |
+
|
95 |
+
for obj in group_of_objects:
|
96 |
+
obj_position = obj.get("normalized_center", [0.5, 0.5])
|
97 |
+
is_duplicate = False
|
98 |
+
|
99 |
+
for processed_pos in processed_positions:
|
100 |
+
position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
|
101 |
+
if position_distance < 0.15:
|
102 |
+
is_duplicate = True
|
103 |
+
break
|
104 |
+
|
105 |
+
if not is_duplicate:
|
106 |
+
unique_objects.append(obj)
|
107 |
+
processed_positions.append(obj_position)
|
108 |
+
|
109 |
+
if unique_objects:
|
110 |
+
deduplicated_objects_by_class[class_name] = unique_objects
|
111 |
+
|
112 |
+
# Debug logging after deduplication
|
113 |
+
for class_name in ["car", "traffic light", "person", "handbag"]:
|
114 |
+
if class_name in deduplicated_objects_by_class:
|
115 |
+
print(f"DEBUG: After spatial deduplication:")
|
116 |
+
print(f"DEBUG: {class_name}: {len(deduplicated_objects_by_class[class_name])} objects after dedup")
|
117 |
+
|
118 |
+
return deduplicated_objects_by_class
|
119 |
+
|
120 |
+
def sort_object_groups(self, objects_by_class: Dict[str, List[Dict]]) -> List[Tuple[str, List[Dict]]]:
|
121 |
+
"""
|
122 |
+
排序物件組
|
123 |
+
|
124 |
+
Args:
|
125 |
+
objects_by_class: 按類別分組的物件
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
List[Tuple[str, List[Dict]]]: 排序後的物件組
|
129 |
+
"""
|
130 |
+
def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
|
131 |
+
class_name_key, obj_group_list = item_tuple
|
132 |
+
priority = 3
|
133 |
+
count = len(obj_group_list)
|
134 |
+
|
135 |
+
# 確保類別名稱已標準化
|
136 |
+
normalized_class_name = self._normalize_object_class_name(class_name_key)
|
137 |
+
|
138 |
+
# 動態優先級
|
139 |
+
if normalized_class_name == "person":
|
140 |
+
priority = 0
|
141 |
+
elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
|
142 |
+
priority = 1
|
143 |
+
elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
|
144 |
+
priority = 2
|
145 |
+
elif count >= 3:
|
146 |
+
priority = max(1, priority - 1)
|
147 |
+
elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
|
148 |
+
priority = 2
|
149 |
+
|
150 |
+
avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
|
151 |
+
quantity_bonus = min(count / 5.0, 1.0)
|
152 |
+
|
153 |
+
return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
|
154 |
+
|
155 |
+
return sorted(objects_by_class.items(), key=sort_key_object_groups)
|
156 |
+
|
157 |
+
def generate_object_clauses(self, sorted_object_groups: List[Tuple[str, List[Dict]]],
|
158 |
+
object_statistics: Optional[Dict],
|
159 |
+
scene_type: str,
|
160 |
+
image_width: Optional[int],
|
161 |
+
image_height: Optional[int],
|
162 |
+
region_analyzer: Optional[Any] = None) -> List[str]:
|
163 |
+
"""
|
164 |
+
生成物件描述子句
|
165 |
+
|
166 |
+
Args:
|
167 |
+
sorted_object_groups: 排序後的物件組
|
168 |
+
object_statistics: 物件統計信息
|
169 |
+
scene_type: 場景類型
|
170 |
+
image_width: 圖像寬度
|
171 |
+
image_height: 圖像高度
|
172 |
+
region_analyzer: 區域分析器實例
|
173 |
+
|
174 |
+
Returns:
|
175 |
+
List[str]: 物件描述子句列表
|
176 |
+
"""
|
177 |
+
object_clauses = []
|
178 |
+
|
179 |
+
for class_name, group_of_objects in sorted_object_groups:
|
180 |
+
count = len(group_of_objects)
|
181 |
+
|
182 |
+
# Debug logging for final count
|
183 |
+
if class_name in ["car", "traffic light", "person", "handbag"]:
|
184 |
+
print(f"DEBUG: Final count for {class_name}: {count}")
|
185 |
+
|
186 |
+
if count == 0:
|
187 |
+
continue
|
188 |
+
|
189 |
+
# 標準化class name
|
190 |
+
normalized_class_name = self._normalize_object_class_name(class_name)
|
191 |
+
|
192 |
+
# 使用統計信息確保準確的數量描述
|
193 |
+
if object_statistics and class_name in object_statistics:
|
194 |
+
actual_count = object_statistics[class_name]["count"]
|
195 |
+
formatted_name_with_exact_count = self._format_object_count_description(
|
196 |
+
normalized_class_name,
|
197 |
+
actual_count,
|
198 |
+
scene_type=scene_type
|
199 |
+
)
|
200 |
+
else:
|
201 |
+
formatted_name_with_exact_count = self._format_object_count_description(
|
202 |
+
normalized_class_name,
|
203 |
+
count,
|
204 |
+
scene_type=scene_type
|
205 |
+
)
|
206 |
+
|
207 |
+
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
208 |
+
continue
|
209 |
+
|
210 |
+
# 確定群組的集體位置
|
211 |
+
location_description_suffix = self._generate_location_description(
|
212 |
+
group_of_objects, count, image_width, image_height, region_analyzer
|
213 |
+
)
|
214 |
+
|
215 |
+
# 首字母大寫
|
216 |
+
formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
|
217 |
+
object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
|
218 |
+
|
219 |
+
return object_clauses
|
220 |
+
|
221 |
+
def format_object_clauses(self, object_clauses: List[str]) -> str:
|
222 |
+
"""
|
223 |
+
格式化物件描述子句
|
224 |
+
|
225 |
+
Args:
|
226 |
+
object_clauses: 物件描述子句列表
|
227 |
+
|
228 |
+
Returns:
|
229 |
+
str: 格式化後的描述
|
230 |
+
"""
|
231 |
+
if not object_clauses:
|
232 |
+
return "No common objects were confidently identified for detailed description."
|
233 |
+
|
234 |
+
# 處理第一個子句
|
235 |
+
first_clause = object_clauses.pop(0)
|
236 |
+
result = first_clause + "."
|
237 |
+
|
238 |
+
# 處理剩餘子句
|
239 |
+
if object_clauses:
|
240 |
+
result += " The scene features:"
|
241 |
+
joined_object_clauses = ". ".join(object_clauses)
|
242 |
+
if joined_object_clauses and not joined_object_clauses.endswith("."):
|
243 |
+
joined_object_clauses += "."
|
244 |
+
result += " " + joined_object_clauses
|
245 |
+
|
246 |
+
return result
|
247 |
+
|
248 |
+
def _generate_location_description(self, group_of_objects: List[Dict], count: int,
|
249 |
+
image_width: Optional[int], image_height: Optional[int],
|
250 |
+
region_analyzer: Optional[Any] = None) -> str:
|
251 |
+
"""
|
252 |
+
生成位置描述
|
253 |
+
|
254 |
+
Args:
|
255 |
+
group_of_objects: 物件組
|
256 |
+
count: 物件數量
|
257 |
+
image_width: 圖像寬度
|
258 |
+
image_height: 圖像高度
|
259 |
+
region_analyzer: 區域分析器實例
|
260 |
+
|
261 |
+
Returns:
|
262 |
+
str: 位置描述
|
263 |
+
"""
|
264 |
+
if count == 1:
|
265 |
+
if self.spatial_handler:
|
266 |
+
spatial_desc = self.spatial_handler.generate_spatial_description(
|
267 |
+
group_of_objects[0], image_width, image_height, region_analyzer
|
268 |
+
)
|
269 |
+
else:
|
270 |
+
spatial_desc = self._get_spatial_description_phrase(group_of_objects[0].get("region", ""))
|
271 |
+
|
272 |
+
if spatial_desc:
|
273 |
+
return f"is {spatial_desc}"
|
274 |
+
else:
|
275 |
+
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
276 |
+
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
277 |
+
if not valid_regions:
|
278 |
+
return "is positioned in the scene"
|
279 |
+
elif len(valid_regions) == 1:
|
280 |
+
spatial_desc = self._get_spatial_description_phrase(valid_regions[0])
|
281 |
+
return f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
|
282 |
+
elif len(valid_regions) == 2:
|
283 |
+
clean_region1 = valid_regions[0].replace('_', ' ')
|
284 |
+
clean_region2 = valid_regions[1].replace('_', ' ')
|
285 |
+
return f"is mainly across the {clean_region1} and {clean_region2} areas"
|
286 |
+
else:
|
287 |
+
return "is distributed in various parts of the scene"
|
288 |
+
else:
|
289 |
+
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
290 |
+
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
291 |
+
if not valid_regions:
|
292 |
+
return "are visible in the scene"
|
293 |
+
elif len(valid_regions) == 1:
|
294 |
+
clean_region = valid_regions[0].replace('_', ' ')
|
295 |
+
return f"are primarily in the {clean_region} area"
|
296 |
+
elif len(valid_regions) == 2:
|
297 |
+
clean_region1 = valid_regions[0].replace('_', ' ')
|
298 |
+
clean_region2 = valid_regions[1].replace('_', ' ')
|
299 |
+
return f"are mainly across the {clean_region1} and {clean_region2} areas"
|
300 |
+
else:
|
301 |
+
return "are distributed in various parts of the scene"
|
302 |
+
|
303 |
+
def _get_spatial_description_phrase(self, region: str) -> str:
|
304 |
+
"""
|
305 |
+
獲取空間描述短語的備用方法
|
306 |
+
|
307 |
+
Args:
|
308 |
+
region: 區域字符串
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
str: 空間描述短語
|
312 |
+
"""
|
313 |
+
if not region or region == "unknown":
|
314 |
+
return ""
|
315 |
+
|
316 |
+
clean_region = region.replace('_', ' ').strip().lower()
|
317 |
+
|
318 |
+
region_map = {
|
319 |
+
"top left": "in the upper left area",
|
320 |
+
"top center": "in the upper area",
|
321 |
+
"top right": "in the upper right area",
|
322 |
+
"middle left": "on the left side",
|
323 |
+
"middle center": "in the center",
|
324 |
+
"center": "in the center",
|
325 |
+
"middle right": "on the right side",
|
326 |
+
"bottom left": "in the lower left area",
|
327 |
+
"bottom center": "in the lower area",
|
328 |
+
"bottom right": "in the lower right area"
|
329 |
+
}
|
330 |
+
|
331 |
+
return region_map.get(clean_region, "")
|
332 |
+
|
333 |
+
def _normalize_object_class_name(self, class_name: str) -> str:
|
334 |
+
"""
|
335 |
+
標準化物件類別名稱
|
336 |
+
|
337 |
+
Args:
|
338 |
+
class_name: 原始類別名稱
|
339 |
+
|
340 |
+
Returns:
|
341 |
+
str: 標準化後的類別名稱
|
342 |
+
"""
|
343 |
+
if self.text_optimizer:
|
344 |
+
return self.text_optimizer.normalize_object_class_name(class_name)
|
345 |
+
else:
|
346 |
+
# 備用標準化邏輯
|
347 |
+
if not class_name or not isinstance(class_name, str):
|
348 |
+
return "object"
|
349 |
+
|
350 |
+
# 簡單的標準化處理
|
351 |
+
normalized = class_name.replace('_', ' ').strip().lower()
|
352 |
+
return normalized
|
353 |
+
|
354 |
+
def _format_object_count_description(self, class_name: str, count: int,
|
355 |
+
scene_type: Optional[str] = None,
|
356 |
+
detected_objects: Optional[List[Dict]] = None,
|
357 |
+
avg_confidence: float = 0.0) -> str:
|
358 |
+
"""
|
359 |
+
格式化物件數量描述
|
360 |
+
|
361 |
+
Args:
|
362 |
+
class_name: 標準化後的類別名稱
|
363 |
+
count: 物件數量
|
364 |
+
scene_type: 場景類型
|
365 |
+
detected_objects: 該類型的所有檢測物件
|
366 |
+
avg_confidence: 平均檢測置信度
|
367 |
+
|
368 |
+
Returns:
|
369 |
+
str: 完整的格式化數量描述
|
370 |
+
"""
|
371 |
+
if self.text_optimizer:
|
372 |
+
return self.text_optimizer.format_object_count_description(
|
373 |
+
class_name, count, scene_type, detected_objects, avg_confidence
|
374 |
+
)
|
375 |
+
else:
|
376 |
+
# 備用格式化邏輯
|
377 |
+
if count <= 0:
|
378 |
+
return ""
|
379 |
+
elif count == 1:
|
380 |
+
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
381 |
+
return f"{article} {class_name}"
|
382 |
+
else:
|
383 |
+
# 簡單的複數處理
|
384 |
+
plural_form = class_name + "s" if not class_name.endswith("s") else class_name
|
385 |
+
|
386 |
+
number_words = {
|
387 |
+
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
388 |
+
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
389 |
+
11: "eleven", 12: "twelve"
|
390 |
+
}
|
391 |
+
|
392 |
+
if count in number_words:
|
393 |
+
return f"{number_words[count]} {plural_form}"
|
394 |
+
elif count <= 20:
|
395 |
+
return f"several {plural_form}"
|
396 |
+
else:
|
397 |
+
return f"numerous {plural_form}"
|
pattern_analyzer.py
ADDED
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import logging
|
3 |
+
import traceback
|
4 |
+
import numpy as np
|
5 |
+
from typing import Dict, List, Any, Optional
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
class PatternAnalyzer:
|
10 |
+
"""
|
11 |
+
負責各種模式分析,包含交通流動、行人穿越、車輛分佈等的辨識
|
12 |
+
專門處理動態區域和移動相關的區域分析
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self):
|
16 |
+
"""初始化模式分析器"""
|
17 |
+
try:
|
18 |
+
logger.info("PatternAnalyzer initialized successfully")
|
19 |
+
except Exception as e:
|
20 |
+
logger.error(f"Failed to initialize PatternAnalyzer: {str(e)}")
|
21 |
+
logger.error(traceback.format_exc())
|
22 |
+
raise
|
23 |
+
|
24 |
+
def analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
|
25 |
+
"""
|
26 |
+
Analyze pedestrian crossing patterns to identify crossing zones.
|
27 |
+
若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
|
28 |
+
|
29 |
+
Args:
|
30 |
+
pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
31 |
+
traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
|
35 |
+
"""
|
36 |
+
try:
|
37 |
+
crossing_zones = {}
|
38 |
+
|
39 |
+
# 如果沒有任何行人,就不辨識任何 crossing zone
|
40 |
+
if not pedestrians:
|
41 |
+
return crossing_zones
|
42 |
+
|
43 |
+
# (1) 按照 region 分組行人
|
44 |
+
pedestrian_regions = {}
|
45 |
+
for p in pedestrians:
|
46 |
+
region = p["region"]
|
47 |
+
pedestrian_regions.setdefault(region, []).append(p)
|
48 |
+
|
49 |
+
# (2) 針對每個 region,看是否同時有紅綠燈
|
50 |
+
# 建立一個對照表 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
|
51 |
+
combined_regions = {}
|
52 |
+
for region, peds in pedestrian_regions.items():
|
53 |
+
# 取得該 region 下所有紅綠燈
|
54 |
+
tls_in_region = [t for t in traffic_lights if t["region"] == region]
|
55 |
+
combined_regions[region] = {
|
56 |
+
"pedestrians": peds,
|
57 |
+
"traffic_lights": tls_in_region
|
58 |
+
}
|
59 |
+
|
60 |
+
# (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
|
61 |
+
sorted_regions = sorted(
|
62 |
+
combined_regions.items(),
|
63 |
+
key=lambda x: len(x[1]["pedestrians"]),
|
64 |
+
reverse=True
|
65 |
+
)
|
66 |
+
|
67 |
+
# (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
|
68 |
+
for idx, (region, group) in enumerate(sorted_regions[:2]):
|
69 |
+
peds = group["pedestrians"]
|
70 |
+
tls = group["traffic_lights"]
|
71 |
+
has_nearby_signals = len(tls) > 0
|
72 |
+
|
73 |
+
# 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
|
74 |
+
direction = self._get_directional_description_local(region)
|
75 |
+
if direction and direction != "central":
|
76 |
+
zone_name = f"{direction} crossing area"
|
77 |
+
else:
|
78 |
+
zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
|
79 |
+
|
80 |
+
# 組合 description
|
81 |
+
description = f"Pedestrian crossing area with {len(peds)} "
|
82 |
+
description += "person" if len(peds) == 1 else "people"
|
83 |
+
if direction:
|
84 |
+
description += f" in {direction} direction"
|
85 |
+
if has_nearby_signals:
|
86 |
+
description += " near traffic signals"
|
87 |
+
|
88 |
+
# 將行人 + 同區紅綠燈一併放入 objects
|
89 |
+
obj_list = ["pedestrian"] * len(peds)
|
90 |
+
if has_nearby_signals:
|
91 |
+
obj_list += ["traffic light"] * len(tls)
|
92 |
+
|
93 |
+
crossing_zones[zone_name] = {
|
94 |
+
"region": region,
|
95 |
+
"objects": obj_list,
|
96 |
+
"description": description
|
97 |
+
}
|
98 |
+
|
99 |
+
return crossing_zones
|
100 |
+
|
101 |
+
except Exception as e:
|
102 |
+
logger.error(f"Error in analyze_crossing_patterns: {str(e)}")
|
103 |
+
logger.error(traceback.format_exc())
|
104 |
+
return {}
|
105 |
+
|
106 |
+
def analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
|
107 |
+
"""
|
108 |
+
分析車輛分布以識別具有方向感知的交通區域
|
109 |
+
|
110 |
+
Args:
|
111 |
+
vehicles: 車輛物件列表
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
識別出的交通區域字典
|
115 |
+
"""
|
116 |
+
try:
|
117 |
+
traffic_zones = {}
|
118 |
+
|
119 |
+
if not vehicles:
|
120 |
+
return traffic_zones
|
121 |
+
|
122 |
+
# 按區域分組車輛
|
123 |
+
vehicle_regions = {}
|
124 |
+
for v in vehicles:
|
125 |
+
region = v["region"]
|
126 |
+
if region not in vehicle_regions:
|
127 |
+
vehicle_regions[region] = []
|
128 |
+
vehicle_regions[region].append(v)
|
129 |
+
|
130 |
+
# 為有車輛的區域創建交通區域
|
131 |
+
main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
|
132 |
+
|
133 |
+
if main_traffic_region[0] is not None:
|
134 |
+
region = main_traffic_region[0]
|
135 |
+
vehicles_in_region = main_traffic_region[1]
|
136 |
+
|
137 |
+
# 獲取車輛類型列表用於描述
|
138 |
+
vehicle_types = [v["class_name"] for v in vehicles_in_region]
|
139 |
+
unique_types = list(set(vehicle_types))
|
140 |
+
|
141 |
+
# 獲取方向描述
|
142 |
+
direction = self._get_directional_description_local(region)
|
143 |
+
|
144 |
+
# 創建描述性區域
|
145 |
+
traffic_zones["vehicle_zone"] = {
|
146 |
+
"region": region,
|
147 |
+
"objects": vehicle_types,
|
148 |
+
"description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
|
149 |
+
(f" in {direction} area" if direction else "")
|
150 |
+
}
|
151 |
+
|
152 |
+
# 如果車輛分布在多個區域,創建次要區域
|
153 |
+
if len(vehicle_regions) > 1:
|
154 |
+
# 獲取第二大車輛聚集區域
|
155 |
+
sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
|
156 |
+
if len(sorted_regions) > 1:
|
157 |
+
second_region, second_vehicles = sorted_regions[1]
|
158 |
+
direction = self._get_directional_description_local(second_region)
|
159 |
+
vehicle_types = [v["class_name"] for v in second_vehicles]
|
160 |
+
unique_types = list(set(vehicle_types))
|
161 |
+
|
162 |
+
traffic_zones["secondary_vehicle_zone"] = {
|
163 |
+
"region": second_region,
|
164 |
+
"objects": vehicle_types,
|
165 |
+
"description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
|
166 |
+
(f" in {direction} direction" if direction else "")
|
167 |
+
}
|
168 |
+
|
169 |
+
return traffic_zones
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(f"Error analyzing traffic zones: {str(e)}")
|
173 |
+
logger.error(traceback.format_exc())
|
174 |
+
return {}
|
175 |
+
|
176 |
+
def analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
|
177 |
+
"""
|
178 |
+
分析空中視角的車輛交通模式
|
179 |
+
|
180 |
+
Args:
|
181 |
+
vehicle_objs: 車輛物件列表
|
182 |
+
|
183 |
+
Returns:
|
184 |
+
交通模式區域字典
|
185 |
+
"""
|
186 |
+
try:
|
187 |
+
zones = {}
|
188 |
+
|
189 |
+
if not vehicle_objs:
|
190 |
+
return zones
|
191 |
+
|
192 |
+
# 將位置轉換為數組進行模式分析
|
193 |
+
positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
|
194 |
+
|
195 |
+
if len(positions) >= 2:
|
196 |
+
# 計算分布指標
|
197 |
+
x_coords = positions[:, 0]
|
198 |
+
y_coords = positions[:, 1]
|
199 |
+
|
200 |
+
x_mean = np.mean(x_coords)
|
201 |
+
y_mean = np.mean(y_coords)
|
202 |
+
x_std = np.std(x_coords)
|
203 |
+
y_std = np.std(y_coords)
|
204 |
+
|
205 |
+
# 判斷車輛是否組織成車道
|
206 |
+
if x_std < y_std * 0.5:
|
207 |
+
# 車輛垂直對齊 - 代表南北交通
|
208 |
+
zones["vertical_traffic_flow"] = {
|
209 |
+
"region": "central_vertical",
|
210 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
211 |
+
"description": "North-south traffic flow visible from aerial view"
|
212 |
+
}
|
213 |
+
elif y_std < x_std * 0.5:
|
214 |
+
# 車輛水平對齊 - 代表東西交通
|
215 |
+
zones["horizontal_traffic_flow"] = {
|
216 |
+
"region": "central_horizontal",
|
217 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
218 |
+
"description": "East-west traffic flow visible from aerial view"
|
219 |
+
}
|
220 |
+
else:
|
221 |
+
# 車輛多方向 - 代表十字路口
|
222 |
+
zones["intersection_traffic"] = {
|
223 |
+
"region": "central",
|
224 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
225 |
+
"description": "Multi-directional traffic at intersection visible from aerial view"
|
226 |
+
}
|
227 |
+
|
228 |
+
return zones
|
229 |
+
|
230 |
+
except Exception as e:
|
231 |
+
logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
|
232 |
+
logger.error(traceback.format_exc())
|
233 |
+
return {}
|
234 |
+
|
235 |
+
def identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
|
236 |
+
"""
|
237 |
+
識別公園的休閒活動區域
|
238 |
+
|
239 |
+
Args:
|
240 |
+
detected_objects: 檢測到的物件列表
|
241 |
+
|
242 |
+
Returns:
|
243 |
+
休閒區域字典
|
244 |
+
"""
|
245 |
+
try:
|
246 |
+
zones = {}
|
247 |
+
|
248 |
+
# 尋找休閒物件(運動球、風箏等)
|
249 |
+
rec_items = []
|
250 |
+
rec_regions = {}
|
251 |
+
|
252 |
+
for obj in detected_objects:
|
253 |
+
if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
|
254 |
+
region = obj["region"]
|
255 |
+
if region not in rec_regions:
|
256 |
+
rec_regions[region] = []
|
257 |
+
rec_regions[region].append(obj)
|
258 |
+
rec_items.append(obj["class_name"])
|
259 |
+
|
260 |
+
if rec_items:
|
261 |
+
main_rec_region = max(rec_regions.items(),
|
262 |
+
key=lambda x: len(x[1]),
|
263 |
+
default=(None, []))
|
264 |
+
|
265 |
+
if main_rec_region[0] is not None:
|
266 |
+
zones["recreational_zone"] = {
|
267 |
+
"region": main_rec_region[0],
|
268 |
+
"objects": list(set(rec_items)),
|
269 |
+
"description": f"Recreational area with {', '.join(list(set(rec_items)))}"
|
270 |
+
}
|
271 |
+
|
272 |
+
return zones
|
273 |
+
|
274 |
+
except Exception as e:
|
275 |
+
logger.error(f"Error identifying park recreational zones: {str(e)}")
|
276 |
+
logger.error(traceback.format_exc())
|
277 |
+
return {}
|
278 |
+
|
279 |
+
def identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
|
280 |
+
"""
|
281 |
+
停車場的停車區域
|
282 |
+
|
283 |
+
Args:
|
284 |
+
detected_objects: 檢測到的物件列表
|
285 |
+
|
286 |
+
Returns:
|
287 |
+
停車區域字典
|
288 |
+
"""
|
289 |
+
try:
|
290 |
+
zones = {}
|
291 |
+
|
292 |
+
# 尋找停放的汽車
|
293 |
+
car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
|
294 |
+
|
295 |
+
if len(car_objs) >= 3:
|
296 |
+
# 檢查汽車是否按模式排列
|
297 |
+
car_positions = [obj["normalized_center"] for obj in car_objs]
|
298 |
+
|
299 |
+
# 通過分析垂直位置檢查行模式
|
300 |
+
y_coords = [pos[1] for pos in car_positions]
|
301 |
+
y_clusters = {}
|
302 |
+
|
303 |
+
# 按相似y坐標分組汽車
|
304 |
+
for i, y in enumerate(y_coords):
|
305 |
+
assigned = False
|
306 |
+
for cluster_y in y_clusters.keys():
|
307 |
+
if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
|
308 |
+
y_clusters[cluster_y].append(i)
|
309 |
+
assigned = True
|
310 |
+
break
|
311 |
+
|
312 |
+
if not assigned:
|
313 |
+
y_clusters[y] = [i]
|
314 |
+
|
315 |
+
# 如果有行模式
|
316 |
+
if max(len(indices) for indices in y_clusters.values()) >= 2:
|
317 |
+
zones["parking_row"] = {
|
318 |
+
"region": "central",
|
319 |
+
"objects": ["car"] * len(car_objs),
|
320 |
+
"description": f"Organized parking area with vehicles arranged in rows"
|
321 |
+
}
|
322 |
+
else:
|
323 |
+
zones["parking_area"] = {
|
324 |
+
"region": "wide",
|
325 |
+
"objects": ["car"] * len(car_objs),
|
326 |
+
"description": f"Parking area with {len(car_objs)} vehicles"
|
327 |
+
}
|
328 |
+
|
329 |
+
return zones
|
330 |
+
|
331 |
+
except Exception as e:
|
332 |
+
logger.error(f"Error identifying parking zones: {str(e)}")
|
333 |
+
logger.error(traceback.format_exc())
|
334 |
+
return {}
|
335 |
+
|
336 |
+
def _get_directional_description_local(self, region: str) -> str:
|
337 |
+
"""
|
338 |
+
本地方向描述方法
|
339 |
+
將區域名稱轉換為方位描述(東西南北)
|
340 |
+
|
341 |
+
Args:
|
342 |
+
region: 區域名稱
|
343 |
+
|
344 |
+
Returns:
|
345 |
+
方位描述字串
|
346 |
+
"""
|
347 |
+
try:
|
348 |
+
region_lower = region.lower()
|
349 |
+
|
350 |
+
if "top" in region_lower and "left" in region_lower:
|
351 |
+
return "northwest"
|
352 |
+
elif "top" in region_lower and "right" in region_lower:
|
353 |
+
return "northeast"
|
354 |
+
elif "bottom" in region_lower and "left" in region_lower:
|
355 |
+
return "southwest"
|
356 |
+
elif "bottom" in region_lower and "right" in region_lower:
|
357 |
+
return "southeast"
|
358 |
+
elif "top" in region_lower:
|
359 |
+
return "north"
|
360 |
+
elif "bottom" in region_lower:
|
361 |
+
return "south"
|
362 |
+
elif "left" in region_lower:
|
363 |
+
return "west"
|
364 |
+
elif "right" in region_lower:
|
365 |
+
return "east"
|
366 |
+
else:
|
367 |
+
return "central"
|
368 |
+
|
369 |
+
except Exception as e:
|
370 |
+
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
371 |
+
return "central"
|
prominence_calculator.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import numpy as np
|
3 |
+
from typing import Dict, List, Optional, Any
|
4 |
+
|
5 |
+
class ProminenceCalculator:
|
6 |
+
"""
|
7 |
+
重要性計算器 - 專門處理物件重要性評估和篩選邏輯
|
8 |
+
負責計算物件的重要性分數、類別重要性係數以及重要物件的篩選
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, min_prominence_score: float = 0.1):
|
12 |
+
"""
|
13 |
+
初始化重要性計算器
|
14 |
+
|
15 |
+
Args:
|
16 |
+
min_prominence_score: 物件顯著性的最低分數閾值
|
17 |
+
"""
|
18 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
19 |
+
self.min_prominence_score = min_prominence_score
|
20 |
+
|
21 |
+
def calculate_prominence_score(self, obj: Dict) -> float:
|
22 |
+
"""
|
23 |
+
計算物件的重要性評分
|
24 |
+
基本上權重設定為信心度 > 尺寸 > 空間 > 類別重要性
|
25 |
+
|
26 |
+
Args:
|
27 |
+
obj: 物件字典,包含檢測信息
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
float: 重要性評分 (0.0-1.0)
|
31 |
+
"""
|
32 |
+
try:
|
33 |
+
# 基礎置信度評分 (權重: 40%)
|
34 |
+
confidence = obj.get("confidence", 0.5)
|
35 |
+
confidence_score = confidence * 0.4
|
36 |
+
|
37 |
+
# 大小評分 (權重: 30%)
|
38 |
+
normalized_area = obj.get("normalized_area", 0.1)
|
39 |
+
# 使用對數縮放避免過大物件主導評分
|
40 |
+
size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
|
41 |
+
|
42 |
+
# 位置評分 (權重: 20%)
|
43 |
+
# 中心區域的物件通常更重要
|
44 |
+
center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
|
45 |
+
distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
|
46 |
+
position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
|
47 |
+
|
48 |
+
# 類別重要性評分 (權重: 10%)
|
49 |
+
class_importance = self.get_class_importance(obj.get("class_name", "unknown"))
|
50 |
+
class_score = class_importance * 0.1
|
51 |
+
|
52 |
+
total_score = confidence_score + size_score + position_score + class_score
|
53 |
+
|
54 |
+
# 確保評分在有效範圍內
|
55 |
+
return max(0.0, min(1.0, total_score))
|
56 |
+
|
57 |
+
except Exception as e:
|
58 |
+
self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
|
59 |
+
return 0.5 # 返回中等評分作為備用
|
60 |
+
|
61 |
+
def get_class_importance(self, class_name: str) -> float:
|
62 |
+
"""
|
63 |
+
根據物件類別返回重要性係數
|
64 |
+
|
65 |
+
Args:
|
66 |
+
class_name: 物件類別名稱
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
float: 類別重要性係數 (0.0-1.0)
|
70 |
+
"""
|
71 |
+
# 高重要性物件(人、車輛、建築)
|
72 |
+
high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
|
73 |
+
|
74 |
+
# 中等重要性物件(家具、電器)
|
75 |
+
medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
|
76 |
+
|
77 |
+
# 低重要性物件(小物品、配件)
|
78 |
+
low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
|
79 |
+
|
80 |
+
class_name_lower = class_name.lower()
|
81 |
+
|
82 |
+
if any(item in class_name_lower for item in high_importance):
|
83 |
+
return 1.0
|
84 |
+
elif any(item in class_name_lower for item in medium_importance):
|
85 |
+
return 0.7
|
86 |
+
elif any(item in class_name_lower for item in low_importance):
|
87 |
+
return 0.4
|
88 |
+
else:
|
89 |
+
return 0.6 # 預設中等重要性
|
90 |
+
|
91 |
+
def filter_prominent_objects(self, detected_objects: List[Dict],
|
92 |
+
min_prominence_score: float = 0.5,
|
93 |
+
max_categories_to_return: Optional[int] = None) -> List[Dict]:
|
94 |
+
"""
|
95 |
+
獲取最重要的物件,基於置信度、大小和位置計算重要性評分
|
96 |
+
|
97 |
+
Args:
|
98 |
+
detected_objects: 檢測到的物件列表
|
99 |
+
min_prominence_score: 最小重要性分數閾值,範圍 0.0-1.0
|
100 |
+
max_categories_to_return: 可選的最大返回類別數量限制
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
List[Dict]: 按重要性排序的物件列表
|
104 |
+
"""
|
105 |
+
try:
|
106 |
+
if not detected_objects:
|
107 |
+
return []
|
108 |
+
|
109 |
+
prominent_objects = []
|
110 |
+
|
111 |
+
for obj in detected_objects:
|
112 |
+
# 計算重要性評分
|
113 |
+
prominence_score = self.calculate_prominence_score(obj)
|
114 |
+
|
115 |
+
# 只保留超過閾值的物件
|
116 |
+
if prominence_score >= min_prominence_score:
|
117 |
+
obj_copy = obj.copy()
|
118 |
+
obj_copy['prominence_score'] = prominence_score
|
119 |
+
prominent_objects.append(obj_copy)
|
120 |
+
|
121 |
+
# 按重要性評分排序(從高到低)
|
122 |
+
prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
|
123 |
+
|
124 |
+
# 如果指定了最大類別數量限制,進行過濾
|
125 |
+
if max_categories_to_return is not None and max_categories_to_return > 0:
|
126 |
+
categories_seen = set()
|
127 |
+
filtered_objects = []
|
128 |
+
|
129 |
+
for obj in prominent_objects:
|
130 |
+
class_name = obj.get("class_name", "unknown")
|
131 |
+
|
132 |
+
# 如果是新類別且未達到限制
|
133 |
+
if class_name not in categories_seen:
|
134 |
+
if len(categories_seen) < max_categories_to_return:
|
135 |
+
categories_seen.add(class_name)
|
136 |
+
filtered_objects.append(obj)
|
137 |
+
else:
|
138 |
+
# 已見過的類別,直接添加
|
139 |
+
filtered_objects.append(obj)
|
140 |
+
|
141 |
+
return filtered_objects
|
142 |
+
|
143 |
+
return prominent_objects
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
self.logger.error(f"Error calculating prominent objects: {str(e)}")
|
147 |
+
return []
|
scene_zone_identifier.py
CHANGED
@@ -3,6 +3,9 @@ import logging
|
|
3 |
import traceback
|
4 |
import numpy as np
|
5 |
from typing import Dict, List, Any, Optional
|
|
|
|
|
|
|
6 |
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
@@ -10,11 +13,17 @@ class SceneZoneIdentifier:
|
|
10 |
"""
|
11 |
負責不同場景類型的區域識別邏輯
|
12 |
專注於根據場景類型執行相應的功能區域識別策略
|
|
|
13 |
"""
|
14 |
|
15 |
def __init__(self):
|
16 |
"""初始化場景區域辨識器"""
|
17 |
try:
|
|
|
|
|
|
|
|
|
|
|
18 |
logger.info("SceneZoneIdentifier initialized successfully")
|
19 |
|
20 |
except Exception as e:
|
@@ -39,18 +48,18 @@ class SceneZoneIdentifier:
|
|
39 |
zones = {}
|
40 |
|
41 |
# 主要功能區域(基於物件關聯性而非場景類型)
|
42 |
-
primary_zone = self.
|
43 |
if primary_zone:
|
44 |
# 基於區域內容生成描述性鍵名
|
45 |
-
descriptive_key = self.
|
46 |
zones[descriptive_key] = primary_zone
|
47 |
|
48 |
# 只有明確證據且物件數量足夠時創建次要功能區域
|
49 |
if len(zones) >= 1 and len(detected_objects) >= 6:
|
50 |
-
secondary_zone = self.
|
51 |
if secondary_zone:
|
52 |
# 基於區域內容生成描述性鍵名
|
53 |
-
descriptive_key = self.
|
54 |
zones[descriptive_key] = secondary_zone
|
55 |
|
56 |
logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
|
@@ -61,92 +70,9 @@ class SceneZoneIdentifier:
|
|
61 |
logger.error(traceback.format_exc())
|
62 |
return {}
|
63 |
|
64 |
-
def _generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
|
65 |
-
"""
|
66 |
-
基於區域數據生成描述性鍵名
|
67 |
-
|
68 |
-
Args:
|
69 |
-
zone_data: 區域數據字典
|
70 |
-
priority_level: 優先級別(primary/secondary)
|
71 |
-
|
72 |
-
Returns:
|
73 |
-
str: 描述性區域鍵名
|
74 |
-
"""
|
75 |
-
try:
|
76 |
-
objects = zone_data.get("objects", [])
|
77 |
-
region = zone_data.get("region", "")
|
78 |
-
description = zone_data.get("description", "")
|
79 |
-
|
80 |
-
# 基於物件內容確定功能類型
|
81 |
-
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
82 |
-
base_name = "dining area"
|
83 |
-
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
84 |
-
base_name = "seating area"
|
85 |
-
elif any("bed" in obj.lower() for obj in objects):
|
86 |
-
base_name = "sleeping area"
|
87 |
-
elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
88 |
-
base_name = "workspace area"
|
89 |
-
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
90 |
-
base_name = "decorative area"
|
91 |
-
elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
|
92 |
-
base_name = "kitchen area"
|
93 |
-
else:
|
94 |
-
# 基於描述內容推斷
|
95 |
-
if "dining" in description.lower():
|
96 |
-
base_name = "dining area"
|
97 |
-
elif "seating" in description.lower() or "relaxation" in description.lower():
|
98 |
-
base_name = "seating area"
|
99 |
-
elif "work" in description.lower():
|
100 |
-
base_name = "workspace area"
|
101 |
-
elif "decorative" in description.lower():
|
102 |
-
base_name = "decorative area"
|
103 |
-
else:
|
104 |
-
base_name = "functional area"
|
105 |
-
|
106 |
-
# 為次要區域添加位置標識以區分
|
107 |
-
if priority_level == "secondary" and region:
|
108 |
-
spatial_context = self._get_spatial_context_description(region)
|
109 |
-
if spatial_context:
|
110 |
-
return f"{spatial_context} {base_name}"
|
111 |
-
|
112 |
-
return base_name
|
113 |
-
|
114 |
-
except Exception as e:
|
115 |
-
logger.warning(f"Error generating descriptive zone key: {str(e)}")
|
116 |
-
return "activity area"
|
117 |
-
|
118 |
-
def _get_spatial_context_description(self, region: str) -> str:
|
119 |
-
"""
|
120 |
-
獲取空間上下文描述
|
121 |
-
|
122 |
-
Args:
|
123 |
-
region: 區域位置標識
|
124 |
-
|
125 |
-
Returns:
|
126 |
-
str: 空間上下文描述
|
127 |
-
"""
|
128 |
-
try:
|
129 |
-
spatial_mapping = {
|
130 |
-
"top_left": "upper left",
|
131 |
-
"top_center": "upper",
|
132 |
-
"top_right": "upper right",
|
133 |
-
"middle_left": "left side",
|
134 |
-
"middle_center": "central",
|
135 |
-
"middle_right": "right side",
|
136 |
-
"bottom_left": "lower left",
|
137 |
-
"bottom_center": "lower",
|
138 |
-
"bottom_right": "lower right"
|
139 |
-
}
|
140 |
-
|
141 |
-
return spatial_mapping.get(region, "")
|
142 |
-
|
143 |
-
except Exception as e:
|
144 |
-
logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
|
145 |
-
return ""
|
146 |
-
|
147 |
def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
|
148 |
"""
|
149 |
-
|
150 |
|
151 |
Args:
|
152 |
category_regions: 按類別和區域分組的物件字典
|
@@ -215,11 +141,11 @@ class SceneZoneIdentifier:
|
|
215 |
|
216 |
# 針對公園區域的特殊處理
|
217 |
if scene_type == "park_area":
|
218 |
-
zones.update(self.
|
219 |
|
220 |
# 針對停車場的特殊處理
|
221 |
if scene_type == "parking_lot":
|
222 |
-
zones.update(self.
|
223 |
|
224 |
logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
|
225 |
return zones
|
@@ -232,7 +158,7 @@ class SceneZoneIdentifier:
|
|
232 |
def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
|
233 |
"""
|
234 |
辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
|
235 |
-
|
236 |
|
237 |
Args:
|
238 |
category_regions: 按類別和 region 分組的物件字典
|
@@ -251,7 +177,7 @@ class SceneZoneIdentifier:
|
|
251 |
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
252 |
|
253 |
# 2. Step A: 無條件建立 Traffic Control Area
|
254 |
-
# 把每個 region 下的紅綠燈都先分群,生成對應 zone
|
255 |
signal_regions_all = {}
|
256 |
for t in traffic_light_objs:
|
257 |
region = t["region"]
|
@@ -285,8 +211,8 @@ class SceneZoneIdentifier:
|
|
285 |
|
286 |
# 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
|
287 |
if pedestrian_objs:
|
288 |
-
# 先呼叫
|
289 |
-
crossing_zones = self.
|
290 |
|
291 |
# 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
|
292 |
for zone_key, zone_info in crossing_zones.items():
|
@@ -323,8 +249,8 @@ class SceneZoneIdentifier:
|
|
323 |
|
324 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
325 |
if vehicle_objs:
|
326 |
-
traffic_zones = self.
|
327 |
-
#
|
328 |
for zone_key, zone_info in traffic_zones.items():
|
329 |
if zone_key in zones:
|
330 |
suffix = 1
|
@@ -396,15 +322,15 @@ class SceneZoneIdentifier:
|
|
396 |
# 識別車輛模式進行交通分析
|
397 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
398 |
if vehicle_objs:
|
399 |
-
zones.update(self.
|
400 |
|
401 |
# 針對十字路口特定空中視角的處理
|
402 |
if "intersection" in scene_type:
|
403 |
-
zones.update(self.
|
404 |
|
405 |
# 針對廣場空中視角的處理
|
406 |
if "plaza" in scene_type:
|
407 |
-
zones.update(self.
|
408 |
|
409 |
logger.info(f"Identified {len(zones)} aerial view zones")
|
410 |
return zones
|
@@ -460,11 +386,11 @@ class SceneZoneIdentifier:
|
|
460 |
"description": f"Asian commercial storefront with pedestrian activity"
|
461 |
}
|
462 |
|
463 |
-
# 辨識行人通道
|
464 |
-
zones.update(self.
|
465 |
|
466 |
# 辨識攤販區域(小攤/商店 - 從情境推斷)
|
467 |
-
zones.update(self.
|
468 |
|
469 |
# 針對夜市的特殊處理
|
470 |
if scene_type == "asian_night_market":
|
@@ -521,13 +447,13 @@ class SceneZoneIdentifier:
|
|
521 |
}
|
522 |
|
523 |
# 識別裝飾區域,增強檢測
|
524 |
-
zones.update(self.
|
525 |
|
526 |
# 識別座位安排區域
|
527 |
-
zones.update(self.
|
528 |
|
529 |
# 識別服務區域(如果與餐飲區域不同)
|
530 |
-
zones.update(self.
|
531 |
|
532 |
logger.info(f"Identified {len(zones)} upscale dining zones")
|
533 |
return zones
|
@@ -576,10 +502,10 @@ class SceneZoneIdentifier:
|
|
576 |
}
|
577 |
|
578 |
# 側邊建築區域(從場景情境推斷)
|
579 |
-
zones.update(self.
|
580 |
|
581 |
# 行人區域
|
582 |
-
zones.update(self.
|
583 |
|
584 |
logger.info(f"Identified {len(zones)} financial district zones")
|
585 |
return zones
|
@@ -666,7 +592,7 @@ class SceneZoneIdentifier:
|
|
666 |
}
|
667 |
|
668 |
# 創建相關輔助功能區,如攝影區、紀念品販賣區
|
669 |
-
auxiliary_zones = self.
|
670 |
if auxiliary_zones:
|
671 |
landmark_zones.update(auxiliary_zones)
|
672 |
|
@@ -678,357 +604,10 @@ class SceneZoneIdentifier:
|
|
678 |
logger.error(traceback.format_exc())
|
679 |
return {}
|
680 |
|
681 |
-
|
682 |
-
def _identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
|
683 |
-
"""
|
684 |
-
識別主要功能區域,基於最強的物件關聯性組合
|
685 |
-
採用通用邏輯處理各種室內場景
|
686 |
-
|
687 |
-
Args:
|
688 |
-
detected_objects: 檢測到的物件列表
|
689 |
-
|
690 |
-
Returns:
|
691 |
-
主要功能區域字典或None
|
692 |
-
"""
|
693 |
-
try:
|
694 |
-
# 用餐區域檢測(桌椅組合)
|
695 |
-
dining_area = self._detect_functional_combination(
|
696 |
-
detected_objects,
|
697 |
-
primary_objects=[60], # dining table
|
698 |
-
supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
|
699 |
-
min_supporting=2,
|
700 |
-
description_template="Dining area with table and seating arrangement"
|
701 |
-
)
|
702 |
-
if dining_area:
|
703 |
-
return dining_area
|
704 |
-
|
705 |
-
# 休息區域檢測(沙發電視組合或床)
|
706 |
-
seating_area = self._detect_functional_combination(
|
707 |
-
detected_objects,
|
708 |
-
primary_objects=[57, 59], # sofa, bed
|
709 |
-
supporting_objects=[62, 58, 56], # tv, potted plant, chair
|
710 |
-
min_supporting=1,
|
711 |
-
description_template="Seating and relaxation area"
|
712 |
-
)
|
713 |
-
if seating_area:
|
714 |
-
return seating_area
|
715 |
-
|
716 |
-
# 工作區域檢測(電子設備與家具組合)
|
717 |
-
work_area = self._detect_functional_combination(
|
718 |
-
detected_objects,
|
719 |
-
primary_objects=[63, 66], # laptop, keyboard
|
720 |
-
supporting_objects=[60, 56, 64], # dining table, chair, mouse
|
721 |
-
min_supporting=2,
|
722 |
-
description_template="Workspace area with electronics and furniture"
|
723 |
-
)
|
724 |
-
if work_area:
|
725 |
-
return work_area
|
726 |
-
|
727 |
-
return None
|
728 |
-
|
729 |
-
except Exception as e:
|
730 |
-
logger.error(f"Error identifying primary functional area: {str(e)}")
|
731 |
-
logger.error(traceback.format_exc())
|
732 |
-
return None
|
733 |
-
|
734 |
-
def _identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
735 |
-
"""
|
736 |
-
識別次要功能區域,避免與主要區域重疊
|
737 |
-
|
738 |
-
Args:
|
739 |
-
detected_objects: 檢測到的物件列表
|
740 |
-
existing_zones: 已存在的功能區域
|
741 |
-
|
742 |
-
Returns:
|
743 |
-
次要功能區域字典或None
|
744 |
-
"""
|
745 |
-
try:
|
746 |
-
# 獲取已使用的區域
|
747 |
-
used_regions = set(zone.get("region") for zone in existing_zones.values())
|
748 |
-
|
749 |
-
# 裝飾區域檢測(植物集中區域)
|
750 |
-
decorative_area = self._detect_functional_combination(
|
751 |
-
detected_objects,
|
752 |
-
primary_objects=[58], # potted plant
|
753 |
-
supporting_objects=[75], # vase
|
754 |
-
min_supporting=0,
|
755 |
-
min_primary=3, # 至少需要3個植物
|
756 |
-
description_template="Decorative area with plants and ornamental items",
|
757 |
-
exclude_regions=used_regions
|
758 |
-
)
|
759 |
-
if decorative_area:
|
760 |
-
return decorative_area
|
761 |
-
|
762 |
-
# 儲存區域檢測(廚房電器組合)
|
763 |
-
storage_area = self._detect_functional_combination(
|
764 |
-
detected_objects,
|
765 |
-
primary_objects=[72, 68, 69], # refrigerator, microwave, oven
|
766 |
-
supporting_objects=[71], # sink
|
767 |
-
min_supporting=0,
|
768 |
-
min_primary=2,
|
769 |
-
description_template="Kitchen appliance and storage area",
|
770 |
-
exclude_regions=used_regions
|
771 |
-
)
|
772 |
-
if storage_area:
|
773 |
-
return storage_area
|
774 |
-
|
775 |
-
return None
|
776 |
-
|
777 |
-
except Exception as e:
|
778 |
-
logger.error(f"Error identifying secondary functional area: {str(e)}")
|
779 |
-
logger.error(traceback.format_exc())
|
780 |
-
return None
|
781 |
-
|
782 |
-
def _detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
|
783 |
-
supporting_objects: List[int], min_supporting: int,
|
784 |
-
description_template: str, min_primary: int = 1,
|
785 |
-
exclude_regions: set = None) -> Dict:
|
786 |
-
"""
|
787 |
-
通用的功能組合檢測方法
|
788 |
-
基於主要物件和支持物件的組合判斷功能區域
|
789 |
-
|
790 |
-
Args:
|
791 |
-
detected_objects: 檢測到的物件列表
|
792 |
-
primary_objects: 主要物件的class_id列表
|
793 |
-
supporting_objects: 支持物件的class_id列表
|
794 |
-
min_supporting: 最少需要的支持物件數量
|
795 |
-
description_template: 描述模板
|
796 |
-
min_primary: 最少需要的主要物件數量
|
797 |
-
exclude_regions: 需要排除的區域集合
|
798 |
-
|
799 |
-
Returns:
|
800 |
-
功能區域資訊字典,如果不符合條件則返回None
|
801 |
-
"""
|
802 |
-
try:
|
803 |
-
if exclude_regions is None:
|
804 |
-
exclude_regions = set()
|
805 |
-
|
806 |
-
# 收集主要物件
|
807 |
-
primary_objs = [obj for obj in detected_objects
|
808 |
-
if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
|
809 |
-
|
810 |
-
# 收集支持物件
|
811 |
-
supporting_objs = [obj for obj in detected_objects
|
812 |
-
if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
|
813 |
-
|
814 |
-
# 檢查是否滿足最少數量要求
|
815 |
-
if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
|
816 |
-
return None
|
817 |
-
|
818 |
-
# 按區域組織物件
|
819 |
-
region_combinations = {}
|
820 |
-
all_relevant_objs = primary_objs + supporting_objs
|
821 |
-
|
822 |
-
for obj in all_relevant_objs:
|
823 |
-
region = obj.get("region")
|
824 |
-
|
825 |
-
# 排除指定區域
|
826 |
-
if region in exclude_regions:
|
827 |
-
continue
|
828 |
-
|
829 |
-
if region not in region_combinations:
|
830 |
-
region_combinations[region] = {"primary": [], "supporting": [], "all": []}
|
831 |
-
|
832 |
-
region_combinations[region]["all"].append(obj)
|
833 |
-
|
834 |
-
if obj.get("class_id") in primary_objects:
|
835 |
-
region_combinations[region]["primary"].append(obj)
|
836 |
-
else:
|
837 |
-
region_combinations[region]["supporting"].append(obj)
|
838 |
-
|
839 |
-
# 找到最佳區域組合
|
840 |
-
best_region = None
|
841 |
-
best_score = 0
|
842 |
-
|
843 |
-
for region, objs in region_combinations.items():
|
844 |
-
# 計算該區域的評分
|
845 |
-
primary_count = len(objs["primary"])
|
846 |
-
supporting_count = len(objs["supporting"])
|
847 |
-
|
848 |
-
# 必須滿足最低要求
|
849 |
-
if primary_count < min_primary or supporting_count < min_supporting:
|
850 |
-
continue
|
851 |
-
|
852 |
-
# 計算組合評分(主要物件權重較高)
|
853 |
-
score = primary_count * 2 + supporting_count
|
854 |
-
|
855 |
-
if score > best_score:
|
856 |
-
best_score = score
|
857 |
-
best_region = region
|
858 |
-
|
859 |
-
if best_region is None:
|
860 |
-
return None
|
861 |
-
|
862 |
-
best_combination = region_combinations[best_region]
|
863 |
-
all_objects = [obj["class_name"] for obj in best_combination["all"]]
|
864 |
-
|
865 |
-
return {
|
866 |
-
"region": best_region,
|
867 |
-
"objects": all_objects,
|
868 |
-
"description": description_template
|
869 |
-
}
|
870 |
-
|
871 |
-
except Exception as e:
|
872 |
-
logger.error(f"Error detecting functional combination: {str(e)}")
|
873 |
-
logger.error(traceback.format_exc())
|
874 |
-
return None
|
875 |
-
|
876 |
-
def _analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
|
877 |
-
"""
|
878 |
-
Analyze pedestrian crossing patterns to identify crossing zones.
|
879 |
-
若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
|
880 |
-
|
881 |
-
Args:
|
882 |
-
pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
883 |
-
traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
884 |
-
|
885 |
-
Returns:
|
886 |
-
crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
|
887 |
-
"""
|
888 |
-
try:
|
889 |
-
crossing_zones = {}
|
890 |
-
|
891 |
-
# 如果沒有任何行人,就不辨識任何 crossing zone
|
892 |
-
if not pedestrians:
|
893 |
-
return crossing_zones
|
894 |
-
|
895 |
-
# (1) 按照 region 分組行人
|
896 |
-
pedestrian_regions = {}
|
897 |
-
for p in pedestrians:
|
898 |
-
region = p["region"]
|
899 |
-
pedestrian_regions.setdefault(region, []).append(p)
|
900 |
-
|
901 |
-
# (2) 針對每個 region,看是否同時有紅綠燈
|
902 |
-
# 建立一個 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
|
903 |
-
combined_regions = {}
|
904 |
-
for region, peds in pedestrian_regions.items():
|
905 |
-
# 取得該 region 下所有紅綠燈
|
906 |
-
tls_in_region = [t for t in traffic_lights if t["region"] == region]
|
907 |
-
combined_regions[region] = {
|
908 |
-
"pedestrians": peds,
|
909 |
-
"traffic_lights": tls_in_region
|
910 |
-
}
|
911 |
-
|
912 |
-
# (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
|
913 |
-
sorted_regions = sorted(
|
914 |
-
combined_regions.items(),
|
915 |
-
key=lambda x: len(x[1]["pedestrians"]),
|
916 |
-
reverse=True
|
917 |
-
)
|
918 |
-
|
919 |
-
# (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
|
920 |
-
for idx, (region, group) in enumerate(sorted_regions[:2]):
|
921 |
-
peds = group["pedestrians"]
|
922 |
-
tls = group["traffic_lights"]
|
923 |
-
has_nearby_signals = len(tls) > 0
|
924 |
-
|
925 |
-
# 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
|
926 |
-
direction = self._get_directional_description(region)
|
927 |
-
if direction and direction != "central":
|
928 |
-
zone_name = f"{direction} crossing area"
|
929 |
-
else:
|
930 |
-
zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
|
931 |
-
|
932 |
-
# 組合 description
|
933 |
-
description = f"Pedestrian crossing area with {len(peds)} "
|
934 |
-
description += "person" if len(peds) == 1 else "people"
|
935 |
-
if direction:
|
936 |
-
description += f" in {direction} direction"
|
937 |
-
if has_nearby_signals:
|
938 |
-
description += " near traffic signals"
|
939 |
-
|
940 |
-
# ======= 將行人 + 同區紅綠燈一併放入 objects =======
|
941 |
-
obj_list = ["pedestrian"] * len(peds)
|
942 |
-
if has_nearby_signals:
|
943 |
-
obj_list += ["traffic light"] * len(tls)
|
944 |
-
|
945 |
-
crossing_zones[zone_name] = {
|
946 |
-
"region": region,
|
947 |
-
"objects": obj_list,
|
948 |
-
"description": description
|
949 |
-
}
|
950 |
-
|
951 |
-
return crossing_zones
|
952 |
-
|
953 |
-
except Exception as e:
|
954 |
-
logger.error(f"Error in _analyze_crossing_patterns: {str(e)}")
|
955 |
-
logger.error(traceback.format_exc())
|
956 |
-
return {}
|
957 |
-
|
958 |
-
|
959 |
-
def _analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
|
960 |
-
"""
|
961 |
-
分析車輛分布以識別具有方向感知的交通區域
|
962 |
-
|
963 |
-
Args:
|
964 |
-
vehicles: 車輛物件列表
|
965 |
-
|
966 |
-
Returns:
|
967 |
-
識別出的交通區域字典
|
968 |
-
"""
|
969 |
-
try:
|
970 |
-
traffic_zones = {}
|
971 |
-
|
972 |
-
if not vehicles:
|
973 |
-
return traffic_zones
|
974 |
-
|
975 |
-
# 按區域分組車輛
|
976 |
-
vehicle_regions = {}
|
977 |
-
for v in vehicles:
|
978 |
-
region = v["region"]
|
979 |
-
if region not in vehicle_regions:
|
980 |
-
vehicle_regions[region] = []
|
981 |
-
vehicle_regions[region].append(v)
|
982 |
-
|
983 |
-
# 為有車輛的區域創建交通區域
|
984 |
-
main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
|
985 |
-
|
986 |
-
if main_traffic_region[0] is not None:
|
987 |
-
region = main_traffic_region[0]
|
988 |
-
vehicles_in_region = main_traffic_region[1]
|
989 |
-
|
990 |
-
# 獲取車輛類型列表用於描述
|
991 |
-
vehicle_types = [v["class_name"] for v in vehicles_in_region]
|
992 |
-
unique_types = list(set(vehicle_types))
|
993 |
-
|
994 |
-
# 獲取方向描述
|
995 |
-
direction = self._get_directional_description(region)
|
996 |
-
|
997 |
-
# 創建描述性區域
|
998 |
-
traffic_zones["vehicle_zone"] = {
|
999 |
-
"region": region,
|
1000 |
-
"objects": vehicle_types,
|
1001 |
-
"description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
|
1002 |
-
(f" in {direction} area" if direction else "")
|
1003 |
-
}
|
1004 |
-
|
1005 |
-
# 如果車輛分布在多個區域,創建次要區域
|
1006 |
-
if len(vehicle_regions) > 1:
|
1007 |
-
# 獲取第二大車輛聚集區域
|
1008 |
-
sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
|
1009 |
-
if len(sorted_regions) > 1:
|
1010 |
-
second_region, second_vehicles = sorted_regions[1]
|
1011 |
-
direction = self._get_directional_description(second_region)
|
1012 |
-
vehicle_types = [v["class_name"] for v in second_vehicles]
|
1013 |
-
unique_types = list(set(vehicle_types))
|
1014 |
-
|
1015 |
-
traffic_zones["secondary_vehicle_zone"] = {
|
1016 |
-
"region": second_region,
|
1017 |
-
"objects": vehicle_types,
|
1018 |
-
"description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
|
1019 |
-
(f" in {direction} direction" if direction else "")
|
1020 |
-
}
|
1021 |
-
|
1022 |
-
return traffic_zones
|
1023 |
-
|
1024 |
-
except Exception as e:
|
1025 |
-
logger.error(f"Error analyzing traffic zones: {str(e)}")
|
1026 |
-
logger.error(traceback.format_exc())
|
1027 |
-
return {}
|
1028 |
-
|
1029 |
def _get_directional_description(self, region: str) -> str:
|
1030 |
"""
|
1031 |
將區域名稱轉換為方位描述(東西南北)
|
|
|
1032 |
|
1033 |
Args:
|
1034 |
region: 區域名稱
|
@@ -1061,668 +640,3 @@ class SceneZoneIdentifier:
|
|
1061 |
except Exception as e:
|
1062 |
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
1063 |
return "central"
|
1064 |
-
|
1065 |
-
def _identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
|
1066 |
-
"""
|
1067 |
-
識別公園的休閒活動區域
|
1068 |
-
|
1069 |
-
Args:
|
1070 |
-
detected_objects: 檢測到的物件列表
|
1071 |
-
|
1072 |
-
Returns:
|
1073 |
-
休閒區域字典
|
1074 |
-
"""
|
1075 |
-
try:
|
1076 |
-
zones = {}
|
1077 |
-
|
1078 |
-
# 尋找休閒物件(運動球、風箏等)
|
1079 |
-
rec_items = []
|
1080 |
-
rec_regions = {}
|
1081 |
-
|
1082 |
-
for obj in detected_objects:
|
1083 |
-
if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
|
1084 |
-
region = obj["region"]
|
1085 |
-
if region not in rec_regions:
|
1086 |
-
rec_regions[region] = []
|
1087 |
-
rec_regions[region].append(obj)
|
1088 |
-
rec_items.append(obj["class_name"])
|
1089 |
-
|
1090 |
-
if rec_items:
|
1091 |
-
main_rec_region = max(rec_regions.items(),
|
1092 |
-
key=lambda x: len(x[1]),
|
1093 |
-
default=(None, []))
|
1094 |
-
|
1095 |
-
if main_rec_region[0] is not None:
|
1096 |
-
zones["recreational_zone"] = {
|
1097 |
-
"region": main_rec_region[0],
|
1098 |
-
"objects": list(set(rec_items)),
|
1099 |
-
"description": f"Recreational area with {', '.join(list(set(rec_items)))}"
|
1100 |
-
}
|
1101 |
-
|
1102 |
-
return zones
|
1103 |
-
|
1104 |
-
except Exception as e:
|
1105 |
-
logger.error(f"Error identifying park recreational zones: {str(e)}")
|
1106 |
-
logger.error(traceback.format_exc())
|
1107 |
-
return {}
|
1108 |
-
|
1109 |
-
def _identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
|
1110 |
-
"""
|
1111 |
-
停車場的停車區域
|
1112 |
-
|
1113 |
-
Args:
|
1114 |
-
detected_objects: 檢測到的物件列表
|
1115 |
-
|
1116 |
-
Returns:
|
1117 |
-
停車區域字典
|
1118 |
-
"""
|
1119 |
-
try:
|
1120 |
-
zones = {}
|
1121 |
-
|
1122 |
-
# 尋找停放的汽車
|
1123 |
-
car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
|
1124 |
-
|
1125 |
-
if len(car_objs) >= 3:
|
1126 |
-
# 檢查汽車是否按模式排列(簡化)
|
1127 |
-
car_positions = [obj["normalized_center"] for obj in car_objs]
|
1128 |
-
|
1129 |
-
# 通過分析垂直位置檢查行模式
|
1130 |
-
y_coords = [pos[1] for pos in car_positions]
|
1131 |
-
y_clusters = {}
|
1132 |
-
|
1133 |
-
# 簡化聚類 - 按相似y坐標分組汽車
|
1134 |
-
for i, y in enumerate(y_coords):
|
1135 |
-
assigned = False
|
1136 |
-
for cluster_y in y_clusters.keys():
|
1137 |
-
if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
|
1138 |
-
y_clusters[cluster_y].append(i)
|
1139 |
-
assigned = True
|
1140 |
-
break
|
1141 |
-
|
1142 |
-
if not assigned:
|
1143 |
-
y_clusters[y] = [i]
|
1144 |
-
|
1145 |
-
# 如果有行模式
|
1146 |
-
if max(len(indices) for indices in y_clusters.values()) >= 2:
|
1147 |
-
zones["parking_row"] = {
|
1148 |
-
"region": "central",
|
1149 |
-
"objects": ["car"] * len(car_objs),
|
1150 |
-
"description": f"Organized parking area with vehicles arranged in rows"
|
1151 |
-
}
|
1152 |
-
else:
|
1153 |
-
zones["parking_area"] = {
|
1154 |
-
"region": "wide",
|
1155 |
-
"objects": ["car"] * len(car_objs),
|
1156 |
-
"description": f"Parking area with {len(car_objs)} vehicles"
|
1157 |
-
}
|
1158 |
-
|
1159 |
-
return zones
|
1160 |
-
|
1161 |
-
except Exception as e:
|
1162 |
-
logger.error(f"Error identifying parking zones: {str(e)}")
|
1163 |
-
logger.error(traceback.format_exc())
|
1164 |
-
return {}
|
1165 |
-
|
1166 |
-
def _analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
|
1167 |
-
"""
|
1168 |
-
分析空中視角的車輛交通模式
|
1169 |
-
|
1170 |
-
Args:
|
1171 |
-
vehicle_objs: 車輛物件列表
|
1172 |
-
|
1173 |
-
Returns:
|
1174 |
-
交通模式區域字典
|
1175 |
-
"""
|
1176 |
-
try:
|
1177 |
-
zones = {}
|
1178 |
-
|
1179 |
-
if not vehicle_objs:
|
1180 |
-
return zones
|
1181 |
-
|
1182 |
-
# 將位置轉換為數組進行模式分析
|
1183 |
-
positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
|
1184 |
-
|
1185 |
-
if len(positions) >= 2:
|
1186 |
-
# 計算分布指標
|
1187 |
-
x_coords = positions[:, 0]
|
1188 |
-
y_coords = positions[:, 1]
|
1189 |
-
|
1190 |
-
x_mean = np.mean(x_coords)
|
1191 |
-
y_mean = np.mean(y_coords)
|
1192 |
-
x_std = np.std(x_coords)
|
1193 |
-
y_std = np.std(y_coords)
|
1194 |
-
|
1195 |
-
# 判斷車輛是否組織成車道
|
1196 |
-
if x_std < y_std * 0.5:
|
1197 |
-
# 車輛垂直對齊 - 表示南北交通
|
1198 |
-
zones["vertical_traffic_flow"] = {
|
1199 |
-
"region": "central_vertical",
|
1200 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
1201 |
-
"description": "North-south traffic flow visible from aerial view"
|
1202 |
-
}
|
1203 |
-
elif y_std < x_std * 0.5:
|
1204 |
-
# 車輛水平對齊 - 表示東西交通
|
1205 |
-
zones["horizontal_traffic_flow"] = {
|
1206 |
-
"region": "central_horizontal",
|
1207 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
1208 |
-
"description": "East-west traffic flow visible from aerial view"
|
1209 |
-
}
|
1210 |
-
else:
|
1211 |
-
# 車輛多方向 - 表示十字路口
|
1212 |
-
zones["intersection_traffic"] = {
|
1213 |
-
"region": "central",
|
1214 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
1215 |
-
"description": "Multi-directional traffic at intersection visible from aerial view"
|
1216 |
-
}
|
1217 |
-
|
1218 |
-
return zones
|
1219 |
-
|
1220 |
-
except Exception as e:
|
1221 |
-
logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
|
1222 |
-
logger.error(traceback.format_exc())
|
1223 |
-
return {}
|
1224 |
-
|
1225 |
-
def _identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
|
1226 |
-
"""
|
1227 |
-
空中視角十字路口特徵
|
1228 |
-
|
1229 |
-
Args:
|
1230 |
-
detected_objects: 檢測到的物件列表
|
1231 |
-
|
1232 |
-
Returns:
|
1233 |
-
十字路口特徵區域字典
|
1234 |
-
"""
|
1235 |
-
try:
|
1236 |
-
zones = {}
|
1237 |
-
|
1238 |
-
# 檢查交通信號
|
1239 |
-
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
1240 |
-
if traffic_light_objs:
|
1241 |
-
zones["traffic_control_pattern"] = {
|
1242 |
-
"region": "intersection",
|
1243 |
-
"objects": ["traffic light"] * len(traffic_light_objs),
|
1244 |
-
"description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
|
1245 |
-
}
|
1246 |
-
|
1247 |
-
# 人行道從空中視角的情境推斷
|
1248 |
-
zones["crossing_pattern"] = {
|
1249 |
-
"region": "central",
|
1250 |
-
"objects": ["inferred crosswalk"],
|
1251 |
-
"description": "Crossing pattern visible from aerial perspective"
|
1252 |
-
}
|
1253 |
-
|
1254 |
-
return zones
|
1255 |
-
|
1256 |
-
except Exception as e:
|
1257 |
-
logger.error(f"Error identifying aerial intersection features: {str(e)}")
|
1258 |
-
logger.error(traceback.format_exc())
|
1259 |
-
return {}
|
1260 |
-
|
1261 |
-
def _identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
|
1262 |
-
"""
|
1263 |
-
識別空中視角廣場特徵
|
1264 |
-
|
1265 |
-
Args:
|
1266 |
-
people_objs: 行人物件列表
|
1267 |
-
|
1268 |
-
Returns:
|
1269 |
-
廣場特徵區域字典
|
1270 |
-
"""
|
1271 |
-
try:
|
1272 |
-
zones = {}
|
1273 |
-
|
1274 |
-
if people_objs:
|
1275 |
-
# 檢查人群是否聚集在中央區域
|
1276 |
-
central_people = [obj for obj in people_objs
|
1277 |
-
if "middle" in obj["region"]]
|
1278 |
-
|
1279 |
-
if central_people:
|
1280 |
-
zones["central_gathering"] = {
|
1281 |
-
"region": "middle_center",
|
1282 |
-
"objects": ["person"] * len(central_people),
|
1283 |
-
"description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
|
1284 |
-
}
|
1285 |
-
|
1286 |
-
return zones
|
1287 |
-
|
1288 |
-
except Exception as e:
|
1289 |
-
logger.error(f"Error identifying aerial plaza features: {str(e)}")
|
1290 |
-
logger.error(traceback.format_exc())
|
1291 |
-
return {}
|
1292 |
-
|
1293 |
-
def _identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
|
1294 |
-
"""
|
1295 |
-
亞洲文化場景中的行人通道
|
1296 |
-
|
1297 |
-
Args:
|
1298 |
-
detected_objects: 檢測到的物件列表
|
1299 |
-
|
1300 |
-
Returns:
|
1301 |
-
行人通道區域字典
|
1302 |
-
"""
|
1303 |
-
try:
|
1304 |
-
zones = {}
|
1305 |
-
|
1306 |
-
pathway_items = []
|
1307 |
-
pathway_regions = {}
|
1308 |
-
|
1309 |
-
# 提取人群用於通道分析
|
1310 |
-
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
1311 |
-
|
1312 |
-
# 分析人群是否形成線形(商業街的特徵)
|
1313 |
-
people_positions = [obj["normalized_center"] for obj in people_objs]
|
1314 |
-
|
1315 |
-
structured_path = False
|
1316 |
-
path_direction = "meandering"
|
1317 |
-
|
1318 |
-
if len(people_positions) >= 3:
|
1319 |
-
# 檢查人群是否沿相似y坐標排列(水平路徑)
|
1320 |
-
y_coords = [pos[1] for pos in people_positions]
|
1321 |
-
y_mean = sum(y_coords) / len(y_coords)
|
1322 |
-
y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
|
1323 |
-
|
1324 |
-
horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
|
1325 |
-
|
1326 |
-
# 檢查人群是否沿相似x坐標排列(垂直路徑)
|
1327 |
-
x_coords = [pos[0] for pos in people_positions]
|
1328 |
-
x_mean = sum(x_coords) / len(x_coords)
|
1329 |
-
x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
|
1330 |
-
|
1331 |
-
vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
|
1332 |
-
|
1333 |
-
structured_path = horizontal_path or vertical_path
|
1334 |
-
path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
|
1335 |
-
|
1336 |
-
# 收集通道物件(人、自行車、摩托車在中間區域)
|
1337 |
-
for obj in detected_objects:
|
1338 |
-
if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
|
1339 |
-
y_pos = obj["normalized_center"][1]
|
1340 |
-
# 按垂直位置分組(圖像中間可能是通道)
|
1341 |
-
if 0.25 <= y_pos <= 0.75:
|
1342 |
-
region = obj["region"]
|
1343 |
-
if region not in pathway_regions:
|
1344 |
-
pathway_regions[region] = []
|
1345 |
-
pathway_regions[region].append(obj)
|
1346 |
-
pathway_items.append(obj["class_name"])
|
1347 |
-
|
1348 |
-
if pathway_items:
|
1349 |
-
path_desc = "Pedestrian walkway with people moving through the commercial area"
|
1350 |
-
if structured_path:
|
1351 |
-
path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
|
1352 |
-
|
1353 |
-
zones["pedestrian_pathway"] = {
|
1354 |
-
"region": "middle_center", # 假設:通道通常在中間
|
1355 |
-
"objects": list(set(pathway_items)),
|
1356 |
-
"description": path_desc
|
1357 |
-
}
|
1358 |
-
|
1359 |
-
return zones
|
1360 |
-
|
1361 |
-
except Exception as e:
|
1362 |
-
logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
|
1363 |
-
logger.error(traceback.format_exc())
|
1364 |
-
return {}
|
1365 |
-
|
1366 |
-
def _identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
|
1367 |
-
"""
|
1368 |
-
識別攤販區域
|
1369 |
-
|
1370 |
-
Args:
|
1371 |
-
detected_objects: 檢測到的物件列表
|
1372 |
-
|
1373 |
-
Returns:
|
1374 |
-
攤販區域字典
|
1375 |
-
"""
|
1376 |
-
try:
|
1377 |
-
zones = {}
|
1378 |
-
|
1379 |
-
# 識別攤販區域(小攤/商店 - 從情境推斷)
|
1380 |
-
has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
|
1381 |
-
has_people = any(obj["class_id"] == 0 for obj in detected_objects)
|
1382 |
-
|
1383 |
-
if has_small_objects and has_people:
|
1384 |
-
# 可能的攤販區域是人群和小物件聚集的地方
|
1385 |
-
small_obj_regions = {}
|
1386 |
-
|
1387 |
-
for obj in detected_objects:
|
1388 |
-
if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
|
1389 |
-
region = obj["region"]
|
1390 |
-
if region not in small_obj_regions:
|
1391 |
-
small_obj_regions[region] = []
|
1392 |
-
small_obj_regions[region].append(obj)
|
1393 |
-
|
1394 |
-
if small_obj_regions:
|
1395 |
-
main_vendor_region = max(small_obj_regions.items(),
|
1396 |
-
key=lambda x: len(x[1]),
|
1397 |
-
default=(None, []))
|
1398 |
-
|
1399 |
-
if main_vendor_region[0] is not None:
|
1400 |
-
vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
|
1401 |
-
zones["vendor_zone"] = {
|
1402 |
-
"region": main_vendor_region[0],
|
1403 |
-
"objects": list(set(vendor_items)),
|
1404 |
-
"description": "Vendor or market stall area with small merchandise"
|
1405 |
-
}
|
1406 |
-
|
1407 |
-
return zones
|
1408 |
-
|
1409 |
-
except Exception as e:
|
1410 |
-
logger.error(f"Error identifying vendor zones: {str(e)}")
|
1411 |
-
logger.error(traceback.format_exc())
|
1412 |
-
return {}
|
1413 |
-
|
1414 |
-
def _identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
|
1415 |
-
"""
|
1416 |
-
識別高級餐飲的裝飾區域
|
1417 |
-
|
1418 |
-
Args:
|
1419 |
-
detected_objects: 檢測到的物件列表
|
1420 |
-
|
1421 |
-
Returns:
|
1422 |
-
裝飾區域字典
|
1423 |
-
"""
|
1424 |
-
try:
|
1425 |
-
zones = {}
|
1426 |
-
|
1427 |
-
decor_items = []
|
1428 |
-
decor_regions = {}
|
1429 |
-
|
1430 |
-
# 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
|
1431 |
-
for obj in detected_objects:
|
1432 |
-
if obj["class_id"] in [75, 40]: # Vase, wine glass
|
1433 |
-
region = obj["region"]
|
1434 |
-
if region not in decor_regions:
|
1435 |
-
decor_regions[region] = []
|
1436 |
-
decor_regions[region].append(obj)
|
1437 |
-
decor_items.append(obj["class_name"])
|
1438 |
-
|
1439 |
-
if decor_items:
|
1440 |
-
main_decor_region = max(decor_regions.items(),
|
1441 |
-
key=lambda x: len(x[1]),
|
1442 |
-
default=(None, []))
|
1443 |
-
|
1444 |
-
if main_decor_region[0] is not None:
|
1445 |
-
zones["decorative_zone"] = {
|
1446 |
-
"region": main_decor_region[0],
|
1447 |
-
"objects": list(set(decor_items)),
|
1448 |
-
"description": f"Decorative area with {', '.join(list(set(decor_items)))}"
|
1449 |
-
}
|
1450 |
-
|
1451 |
-
return zones
|
1452 |
-
|
1453 |
-
except Exception as e:
|
1454 |
-
logger.error(f"Error identifying upscale decorative zones: {str(e)}")
|
1455 |
-
logger.error(traceback.format_exc())
|
1456 |
-
return {}
|
1457 |
-
|
1458 |
-
def _identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
|
1459 |
-
"""
|
1460 |
-
識別餐廳座位安排區域
|
1461 |
-
|
1462 |
-
Args:
|
1463 |
-
detected_objects: 檢測到的物件列表
|
1464 |
-
|
1465 |
-
Returns:
|
1466 |
-
座位區域字典
|
1467 |
-
"""
|
1468 |
-
try:
|
1469 |
-
zones = {}
|
1470 |
-
|
1471 |
-
# 識別座位安排區域
|
1472 |
-
chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
|
1473 |
-
if len(chairs) >= 2:
|
1474 |
-
chair_regions = {}
|
1475 |
-
for obj in chairs:
|
1476 |
-
region = obj["region"]
|
1477 |
-
if region not in chair_regions:
|
1478 |
-
chair_regions[region] = []
|
1479 |
-
chair_regions[region].append(obj)
|
1480 |
-
|
1481 |
-
if chair_regions:
|
1482 |
-
main_seating_region = max(chair_regions.items(),
|
1483 |
-
key=lambda x: len(x[1]),
|
1484 |
-
default=(None, []))
|
1485 |
-
|
1486 |
-
if main_seating_region[0] is not None:
|
1487 |
-
zones["dining_seating_zone"] = {
|
1488 |
-
"region": main_seating_region[0],
|
1489 |
-
"objects": ["chair"] * len(main_seating_region[1]),
|
1490 |
-
"description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
|
1491 |
-
}
|
1492 |
-
|
1493 |
-
return zones
|
1494 |
-
|
1495 |
-
except Exception as e:
|
1496 |
-
logger.error(f"Error identifying dining seating zones: {str(e)}")
|
1497 |
-
logger.error(traceback.format_exc())
|
1498 |
-
return {}
|
1499 |
-
|
1500 |
-
def _identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
1501 |
-
"""
|
1502 |
-
識別服務區域
|
1503 |
-
|
1504 |
-
Args:
|
1505 |
-
detected_objects: 檢測到的物件列表
|
1506 |
-
existing_zones: 已存在的功能區域
|
1507 |
-
|
1508 |
-
Returns:
|
1509 |
-
服務區域字典
|
1510 |
-
"""
|
1511 |
-
try:
|
1512 |
-
zones = {}
|
1513 |
-
|
1514 |
-
serving_items = []
|
1515 |
-
serving_regions = {}
|
1516 |
-
|
1517 |
-
# 服務區域可能有瓶子、碗、容器
|
1518 |
-
for obj in detected_objects:
|
1519 |
-
if obj["class_id"] in [39, 45]: # Bottle, bowl
|
1520 |
-
# 檢查是否在與主餐桌不同的區域
|
1521 |
-
if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
|
1522 |
-
region = obj["region"]
|
1523 |
-
if region not in serving_regions:
|
1524 |
-
serving_regions[region] = []
|
1525 |
-
serving_regions[region].append(obj)
|
1526 |
-
serving_items.append(obj["class_name"])
|
1527 |
-
|
1528 |
-
if serving_items:
|
1529 |
-
main_serving_region = max(serving_regions.items(),
|
1530 |
-
key=lambda x: len(x[1]),
|
1531 |
-
default=(None, []))
|
1532 |
-
|
1533 |
-
if main_serving_region[0] is not None:
|
1534 |
-
zones["serving_zone"] = {
|
1535 |
-
"region": main_serving_region[0],
|
1536 |
-
"objects": list(set(serving_items)),
|
1537 |
-
"description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
|
1538 |
-
}
|
1539 |
-
|
1540 |
-
return zones
|
1541 |
-
|
1542 |
-
except Exception as e:
|
1543 |
-
logger.error(f"Error identifying serving zones: {str(e)}")
|
1544 |
-
logger.error(traceback.format_exc())
|
1545 |
-
return {}
|
1546 |
-
|
1547 |
-
def _identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
|
1548 |
-
"""
|
1549 |
-
識別建築區域(從場景情境推斷)
|
1550 |
-
|
1551 |
-
Args:
|
1552 |
-
detected_objects: 檢測到的物件列表
|
1553 |
-
|
1554 |
-
Returns:
|
1555 |
-
建築區域字典
|
1556 |
-
"""
|
1557 |
-
try:
|
1558 |
-
zones = {}
|
1559 |
-
|
1560 |
-
# 側邊建築區域(從場景情境推斷)
|
1561 |
-
# 檢查是否有實際可能包含建築物的區域
|
1562 |
-
left_side_regions = ["top_left", "middle_left", "bottom_left"]
|
1563 |
-
right_side_regions = ["top_right", "middle_right", "bottom_right"]
|
1564 |
-
|
1565 |
-
# 檢查左側
|
1566 |
-
left_building_evidence = True
|
1567 |
-
for region in left_side_regions:
|
1568 |
-
# 如果此區域有很多車輛或人群,不太可能是建築物
|
1569 |
-
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
1570 |
-
for obj in detected_objects)
|
1571 |
-
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
1572 |
-
for obj in detected_objects)
|
1573 |
-
|
1574 |
-
if vehicle_in_region or people_in_region:
|
1575 |
-
left_building_evidence = False
|
1576 |
-
break
|
1577 |
-
|
1578 |
-
# 檢查右側
|
1579 |
-
right_building_evidence = True
|
1580 |
-
for region in right_side_regions:
|
1581 |
-
# 如果此區域有很多車輛或人群,不太可能是建築物
|
1582 |
-
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
1583 |
-
for obj in detected_objects)
|
1584 |
-
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
1585 |
-
for obj in detected_objects)
|
1586 |
-
|
1587 |
-
if vehicle_in_region or people_in_region:
|
1588 |
-
right_building_evidence = False
|
1589 |
-
break
|
1590 |
-
|
1591 |
-
# 如果證據支持,添加建築區域
|
1592 |
-
if left_building_evidence:
|
1593 |
-
zones["building_zone_left"] = {
|
1594 |
-
"region": "middle_left",
|
1595 |
-
"objects": ["building"], # 推斷
|
1596 |
-
"description": "Tall buildings line the left side of the street"
|
1597 |
-
}
|
1598 |
-
|
1599 |
-
if right_building_evidence:
|
1600 |
-
zones["building_zone_right"] = {
|
1601 |
-
"region": "middle_right",
|
1602 |
-
"objects": ["building"], # 推斷
|
1603 |
-
"description": "Tall buildings line the right side of the street"
|
1604 |
-
}
|
1605 |
-
|
1606 |
-
return zones
|
1607 |
-
|
1608 |
-
except Exception as e:
|
1609 |
-
logger.error(f"Error identifying building zones: {str(e)}")
|
1610 |
-
logger.error(traceback.format_exc())
|
1611 |
-
return {}
|
1612 |
-
|
1613 |
-
def _identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
|
1614 |
-
"""
|
1615 |
-
識別金融區的行人區域
|
1616 |
-
|
1617 |
-
Args:
|
1618 |
-
detected_objects: 檢測到的物件列表
|
1619 |
-
|
1620 |
-
Returns:
|
1621 |
-
行人區域字典
|
1622 |
-
"""
|
1623 |
-
try:
|
1624 |
-
zones = {}
|
1625 |
-
|
1626 |
-
# 識別行人區域(如果有人群)
|
1627 |
-
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
1628 |
-
if people_objs:
|
1629 |
-
people_regions = {}
|
1630 |
-
for obj in people_objs:
|
1631 |
-
region = obj["region"]
|
1632 |
-
if region not in people_regions:
|
1633 |
-
people_regions[region] = []
|
1634 |
-
people_regions[region].append(obj)
|
1635 |
-
|
1636 |
-
if people_regions:
|
1637 |
-
main_pedestrian_region = max(people_regions.items(),
|
1638 |
-
key=lambda x: len(x[1]),
|
1639 |
-
default=(None, []))
|
1640 |
-
|
1641 |
-
if main_pedestrian_region[0] is not None:
|
1642 |
-
zones["pedestrian_zone"] = {
|
1643 |
-
"region": main_pedestrian_region[0],
|
1644 |
-
"objects": ["person"] * len(main_pedestrian_region[1]),
|
1645 |
-
"description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
|
1646 |
-
}
|
1647 |
-
|
1648 |
-
return zones
|
1649 |
-
|
1650 |
-
except Exception as e:
|
1651 |
-
logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
|
1652 |
-
logger.error(traceback.format_exc())
|
1653 |
-
return {}
|
1654 |
-
|
1655 |
-
def _create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
|
1656 |
-
"""
|
1657 |
-
創建地標相關的輔助區域(攝影區、紀念品區等)
|
1658 |
-
|
1659 |
-
Args:
|
1660 |
-
landmark: 地標物件字典
|
1661 |
-
index: 地標索引
|
1662 |
-
|
1663 |
-
Returns:
|
1664 |
-
輔助區域字典
|
1665 |
-
"""
|
1666 |
-
try:
|
1667 |
-
auxiliary_zones = {}
|
1668 |
-
landmark_region = landmark.get("region", "middle_center")
|
1669 |
-
landmark_name = landmark.get("class_name", "Landmark")
|
1670 |
-
|
1671 |
-
# 創建攝影區
|
1672 |
-
# 根據地標位置調整攝影區位置(地標前方通常是攝影區)
|
1673 |
-
region_mapping = {
|
1674 |
-
"top_left": "bottom_right",
|
1675 |
-
"top_center": "bottom_center",
|
1676 |
-
"top_right": "bottom_left",
|
1677 |
-
"middle_left": "middle_right",
|
1678 |
-
"middle_center": "bottom_center",
|
1679 |
-
"middle_right": "middle_left",
|
1680 |
-
"bottom_left": "top_right",
|
1681 |
-
"bottom_center": "top_center",
|
1682 |
-
"bottom_right": "top_left"
|
1683 |
-
}
|
1684 |
-
|
1685 |
-
photo_region = region_mapping.get(landmark_region, landmark_region)
|
1686 |
-
|
1687 |
-
photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
|
1688 |
-
auxiliary_zones[photo_key] = {
|
1689 |
-
"name": f"{landmark_name} Photography Spot",
|
1690 |
-
"description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
|
1691 |
-
"objects": ["camera", "person", "cell phone"],
|
1692 |
-
"region": photo_region,
|
1693 |
-
"primary_function": "Tourist photography"
|
1694 |
-
}
|
1695 |
-
|
1696 |
-
# 如果是著名地標,可能有紀念品販售區
|
1697 |
-
if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
|
1698 |
-
# 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
|
1699 |
-
adjacent_regions = {
|
1700 |
-
"top_left": ["top_center", "middle_left"],
|
1701 |
-
"top_center": ["top_left", "top_right"],
|
1702 |
-
"top_right": ["top_center", "middle_right"],
|
1703 |
-
"middle_left": ["top_left", "bottom_left"],
|
1704 |
-
"middle_center": ["middle_left", "middle_right"],
|
1705 |
-
"middle_right": ["top_right", "bottom_right"],
|
1706 |
-
"bottom_left": ["middle_left", "bottom_center"],
|
1707 |
-
"bottom_center": ["bottom_left", "bottom_right"],
|
1708 |
-
"bottom_right": ["bottom_center", "middle_right"]
|
1709 |
-
}
|
1710 |
-
|
1711 |
-
if landmark_region in adjacent_regions:
|
1712 |
-
souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
|
1713 |
-
|
1714 |
-
souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
|
1715 |
-
auxiliary_zones[souvenir_key] = {
|
1716 |
-
"name": f"{landmark_name} Souvenir Area",
|
1717 |
-
"description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
|
1718 |
-
"objects": ["person", "handbag", "backpack"],
|
1719 |
-
"region": souvenir_region,
|
1720 |
-
"primary_function": "Tourism commerce"
|
1721 |
-
}
|
1722 |
-
|
1723 |
-
return auxiliary_zones
|
1724 |
-
|
1725 |
-
except Exception as e:
|
1726 |
-
logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
|
1727 |
-
logger.error(traceback.format_exc())
|
1728 |
-
return {}
|
|
|
3 |
import traceback
|
4 |
import numpy as np
|
5 |
from typing import Dict, List, Any, Optional
|
6 |
+
from functional_zone_detector import FunctionalZoneDetector
|
7 |
+
from pattern_analyzer import PatternAnalyzer
|
8 |
+
from specialized_scene_processor import SpecializedSceneProcessor
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
|
|
13 |
"""
|
14 |
負責不同場景類型的區域識別邏輯
|
15 |
專注於根據場景類型執行相應的功能區域識別策略
|
16 |
+
整合所有專門的區域辨識組件,主要須整合至SpatialAnalyzer
|
17 |
"""
|
18 |
|
19 |
def __init__(self):
|
20 |
"""初始化場景區域辨識器"""
|
21 |
try:
|
22 |
+
# 初始化各個專門組件
|
23 |
+
self.functional_detector = FunctionalZoneDetector()
|
24 |
+
self.pattern_analyzer = PatternAnalyzer()
|
25 |
+
self.scene_processor = SpecializedSceneProcessor()
|
26 |
+
|
27 |
logger.info("SceneZoneIdentifier initialized successfully")
|
28 |
|
29 |
except Exception as e:
|
|
|
48 |
zones = {}
|
49 |
|
50 |
# 主要功能區域(基於物件關聯性而非場景類型)
|
51 |
+
primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
|
52 |
if primary_zone:
|
53 |
# 基於區域內容生成描述性鍵名
|
54 |
+
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
|
55 |
zones[descriptive_key] = primary_zone
|
56 |
|
57 |
# 只有明確證據且物件數量足夠時創建次要功能區域
|
58 |
if len(zones) >= 1 and len(detected_objects) >= 6:
|
59 |
+
secondary_zone = self.functional_detector.identify_secondary_functional_area(detected_objects, zones)
|
60 |
if secondary_zone:
|
61 |
# 基於區域內容生成描述性鍵名
|
62 |
+
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
|
63 |
zones[descriptive_key] = secondary_zone
|
64 |
|
65 |
logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
|
|
|
70 |
logger.error(traceback.format_exc())
|
71 |
return {}
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
|
74 |
"""
|
75 |
+
辨識一般戶外場景的功能區域
|
76 |
|
77 |
Args:
|
78 |
category_regions: 按類別和區域分組的物件字典
|
|
|
141 |
|
142 |
# 針對公園區域的特殊處理
|
143 |
if scene_type == "park_area":
|
144 |
+
zones.update(self.pattern_analyzer.identify_park_recreational_zones(detected_objects))
|
145 |
|
146 |
# 針對停車場的特殊處理
|
147 |
if scene_type == "parking_lot":
|
148 |
+
zones.update(self.pattern_analyzer.identify_parking_zones(detected_objects))
|
149 |
|
150 |
logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
|
151 |
return zones
|
|
|
158 |
def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
|
159 |
"""
|
160 |
辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
|
161 |
+
如果有行人,則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
|
162 |
|
163 |
Args:
|
164 |
category_regions: 按類別和 region 分組的物件字典
|
|
|
177 |
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
178 |
|
179 |
# 2. Step A: 無條件建立 Traffic Control Area
|
180 |
+
# 把每個 region 下的紅綠燈都先分群,生成對應 zone,確保"只要偵測到紅綠燈就一定顯示"
|
181 |
signal_regions_all = {}
|
182 |
for t in traffic_light_objs:
|
183 |
region = t["region"]
|
|
|
211 |
|
212 |
# 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
|
213 |
if pedestrian_objs:
|
214 |
+
# 先呼叫 analyze_crossing_patterns,讓它回傳「行人 + 同 region 的紅綠燈」區
|
215 |
+
crossing_zones = self.pattern_analyzer.analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
|
216 |
|
217 |
# 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
|
218 |
for zone_key, zone_info in crossing_zones.items():
|
|
|
249 |
|
250 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
251 |
if vehicle_objs:
|
252 |
+
traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
|
253 |
+
# analyze_traffic_zones 內部已用英文 debug,直接更新
|
254 |
for zone_key, zone_info in traffic_zones.items():
|
255 |
if zone_key in zones:
|
256 |
suffix = 1
|
|
|
322 |
# 識別車輛模式進行交通分析
|
323 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
324 |
if vehicle_objs:
|
325 |
+
zones.update(self.pattern_analyzer.analyze_aerial_traffic_patterns(vehicle_objs))
|
326 |
|
327 |
# 針對十字路口特定空中視角的處理
|
328 |
if "intersection" in scene_type:
|
329 |
+
zones.update(self.scene_processor.identify_aerial_intersection_features(detected_objects))
|
330 |
|
331 |
# 針對廣場空中視角的處理
|
332 |
if "plaza" in scene_type:
|
333 |
+
zones.update(self.scene_processor.identify_aerial_plaza_features(people_objs))
|
334 |
|
335 |
logger.info(f"Identified {len(zones)} aerial view zones")
|
336 |
return zones
|
|
|
386 |
"description": f"Asian commercial storefront with pedestrian activity"
|
387 |
}
|
388 |
|
389 |
+
# 辨識行人通道
|
390 |
+
zones.update(self.scene_processor.identify_asian_pedestrian_pathway(detected_objects))
|
391 |
|
392 |
# 辨識攤販區域(小攤/商店 - 從情境推斷)
|
393 |
+
zones.update(self.scene_processor.identify_vendor_zones(detected_objects))
|
394 |
|
395 |
# 針對夜市的特殊處理
|
396 |
if scene_type == "asian_night_market":
|
|
|
447 |
}
|
448 |
|
449 |
# 識別裝飾區域,增強檢測
|
450 |
+
zones.update(self.scene_processor.identify_upscale_decorative_zones(detected_objects))
|
451 |
|
452 |
# 識別座位安排區域
|
453 |
+
zones.update(self.scene_processor.identify_dining_seating_zones(detected_objects))
|
454 |
|
455 |
# 識別服務區域(如果與餐飲區域不同)
|
456 |
+
zones.update(self.scene_processor.identify_serving_zones(detected_objects, zones))
|
457 |
|
458 |
logger.info(f"Identified {len(zones)} upscale dining zones")
|
459 |
return zones
|
|
|
502 |
}
|
503 |
|
504 |
# 側邊建築區域(從場景情境推斷)
|
505 |
+
zones.update(self.scene_processor.identify_building_zones(detected_objects))
|
506 |
|
507 |
# 行人區域
|
508 |
+
zones.update(self.scene_processor.identify_financial_pedestrian_zones(detected_objects))
|
509 |
|
510 |
logger.info(f"Identified {len(zones)} financial district zones")
|
511 |
return zones
|
|
|
592 |
}
|
593 |
|
594 |
# 創建相關輔助功能區,如攝影區、紀念品販賣區
|
595 |
+
auxiliary_zones = self.scene_processor.create_landmark_auxiliary_zones(landmark, 0)
|
596 |
if auxiliary_zones:
|
597 |
landmark_zones.update(auxiliary_zones)
|
598 |
|
|
|
604 |
logger.error(traceback.format_exc())
|
605 |
return {}
|
606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
def _get_directional_description(self, region: str) -> str:
|
608 |
"""
|
609 |
將區域名稱轉換為方位描述(東西南北)
|
610 |
+
這是核心工具方法,供所有組件使用
|
611 |
|
612 |
Args:
|
613 |
region: 區域名稱
|
|
|
640 |
except Exception as e:
|
641 |
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
642 |
return "central"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spatial_location_handler.py
ADDED
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import traceback
|
3 |
+
import numpy as np
|
4 |
+
from typing import Dict, List, Optional, Any, Tuple
|
5 |
+
|
6 |
+
class SpatialLocationHandler:
|
7 |
+
"""
|
8 |
+
空間位置處理器 - 專門處理空間描述生成和排列模式分析
|
9 |
+
負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合
|
10 |
+
"""
|
11 |
+
|
12 |
+
def __init__(self, region_analyzer: Optional[Any] = None):
|
13 |
+
"""
|
14 |
+
初始化空間位置處理器
|
15 |
+
|
16 |
+
Args:
|
17 |
+
region_analyzer: RegionAnalyzer實例
|
18 |
+
"""
|
19 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
20 |
+
self.region_analyzer = region_analyzer
|
21 |
+
|
22 |
+
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
23 |
+
"""
|
24 |
+
設置RegionAnalyzer,用於標準化空間描述生成
|
25 |
+
|
26 |
+
Args:
|
27 |
+
region_analyzer: RegionAnalyzer實例
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
self.region_analyzer = region_analyzer
|
31 |
+
self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler")
|
32 |
+
except Exception as e:
|
33 |
+
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
34 |
+
|
35 |
+
def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
36 |
+
image_height: Optional[int] = None,
|
37 |
+
region_analyzer: Optional[Any] = None) -> str:
|
38 |
+
"""
|
39 |
+
為物件生成空間位置描述
|
40 |
+
|
41 |
+
Args:
|
42 |
+
obj: 物件字典
|
43 |
+
image_width: 可選的圖像寬度
|
44 |
+
image_height: 可選的圖像高度
|
45 |
+
region_analyzer: 可選的RegionAnalyzer實例,用於生成標準化描述
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
str: 空間描述字符串,空值region時返回空字串
|
49 |
+
"""
|
50 |
+
try:
|
51 |
+
region = obj.get("region") or ""
|
52 |
+
object_type = obj.get("class_name", "")
|
53 |
+
|
54 |
+
# 處理空值或無效region,直接返回空字串避免不完整描述
|
55 |
+
if not region.strip() or region == "unknown":
|
56 |
+
# 根據物件類型提供合適的預設位置描述
|
57 |
+
if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
|
58 |
+
return "positioned in the scene"
|
59 |
+
elif object_type and "person" in object_type.lower():
|
60 |
+
return "present in the area"
|
61 |
+
else:
|
62 |
+
return "located in the scene"
|
63 |
+
|
64 |
+
# 如果提供了RegionAnalyzer實例,使用其標準化方法
|
65 |
+
if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
|
66 |
+
if hasattr(region_analyzer, 'get_contextual_spatial_description'):
|
67 |
+
spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
|
68 |
+
else:
|
69 |
+
spatial_desc = region_analyzer.get_spatial_description_phrase(region)
|
70 |
+
|
71 |
+
if spatial_desc:
|
72 |
+
return spatial_desc
|
73 |
+
|
74 |
+
# 備用邏輯:使用改進的內建映射
|
75 |
+
clean_region = region.replace('_', ' ').strip().lower()
|
76 |
+
|
77 |
+
region_map = {
|
78 |
+
"top left": "in the upper left area",
|
79 |
+
"top center": "in the upper area",
|
80 |
+
"top right": "in the upper right area",
|
81 |
+
"middle left": "on the left side",
|
82 |
+
"middle center": "in the center",
|
83 |
+
"center": "in the center",
|
84 |
+
"middle right": "on the right side",
|
85 |
+
"bottom left": "in the lower left area",
|
86 |
+
"bottom center": "in the lower area",
|
87 |
+
"bottom right": "in the lower right area"
|
88 |
+
}
|
89 |
+
|
90 |
+
# 直接映射匹配
|
91 |
+
if clean_region in region_map:
|
92 |
+
return region_map[clean_region]
|
93 |
+
|
94 |
+
# 比較模糊籠統的方位匹配
|
95 |
+
if "top" in clean_region and "left" in clean_region:
|
96 |
+
return "in the upper left area"
|
97 |
+
elif "top" in clean_region and "right" in clean_region:
|
98 |
+
return "in the upper right area"
|
99 |
+
elif "bottom" in clean_region and "left" in clean_region:
|
100 |
+
return "in the lower left area"
|
101 |
+
elif "bottom" in clean_region and "right" in clean_region:
|
102 |
+
return "in the lower right area"
|
103 |
+
elif "top" in clean_region:
|
104 |
+
return "in the upper area"
|
105 |
+
elif "bottom" in clean_region:
|
106 |
+
return "in the lower area"
|
107 |
+
elif "left" in clean_region:
|
108 |
+
return "on the left side"
|
109 |
+
elif "right" in clean_region:
|
110 |
+
return "on the right side"
|
111 |
+
elif "center" in clean_region or "middle" in clean_region:
|
112 |
+
return "in the center"
|
113 |
+
|
114 |
+
# 如果region無法辨識,使用normalized_center作為備用
|
115 |
+
norm_center = obj.get("normalized_center")
|
116 |
+
if norm_center and image_width and image_height:
|
117 |
+
x_norm, y_norm = norm_center
|
118 |
+
h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
|
119 |
+
v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
|
120 |
+
|
121 |
+
if h_pos == "center" and v_pos == "center":
|
122 |
+
return "in the center"
|
123 |
+
return f"in the {v_pos} {h_pos} area"
|
124 |
+
|
125 |
+
# 如果所有方法都失敗,返回空字串
|
126 |
+
return ""
|
127 |
+
|
128 |
+
except Exception as e:
|
129 |
+
self.logger.warning(f"Error generating spatial description: {str(e)}")
|
130 |
+
return ""
|
131 |
+
|
132 |
+
def get_standardized_spatial_description(self, obj: Dict) -> str:
|
133 |
+
"""
|
134 |
+
使用RegionAnalyzer生成標準化空間描述的內部方法
|
135 |
+
|
136 |
+
Args:
|
137 |
+
obj: 物件字典
|
138 |
+
|
139 |
+
Returns:
|
140 |
+
str: 標準化空間描述,失敗時返回空字串
|
141 |
+
"""
|
142 |
+
try:
|
143 |
+
if hasattr(self, 'region_analyzer') and self.region_analyzer:
|
144 |
+
region = obj.get("region", "")
|
145 |
+
object_type = obj.get("class_name", "")
|
146 |
+
|
147 |
+
if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
|
148 |
+
return self.region_analyzer.get_contextual_spatial_description(region, object_type)
|
149 |
+
elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
|
150 |
+
return self.region_analyzer.get_spatial_description_phrase(region)
|
151 |
+
|
152 |
+
return ""
|
153 |
+
|
154 |
+
except Exception as e:
|
155 |
+
self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
|
156 |
+
object_type = obj.get("class_name", "")
|
157 |
+
if object_type:
|
158 |
+
return "visible in the scene"
|
159 |
+
return "present in the view"
|
160 |
+
|
161 |
+
def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str],
|
162 |
+
detected_objects: Optional[List[Dict]],
|
163 |
+
count: int) -> Optional[str]:
|
164 |
+
"""
|
165 |
+
分析物件的空間排列模式並生成相應描述
|
166 |
+
|
167 |
+
Args:
|
168 |
+
class_name: 物件類別名稱
|
169 |
+
scene_type: 場景類型
|
170 |
+
detected_objects: 該類型的所有檢測物件
|
171 |
+
count: 物件數量
|
172 |
+
|
173 |
+
Returns:
|
174 |
+
Optional[str]: 空間排列描述,如果無法分析則返回None
|
175 |
+
"""
|
176 |
+
if not detected_objects or len(detected_objects) < 2:
|
177 |
+
return None
|
178 |
+
|
179 |
+
try:
|
180 |
+
# 提取物件的標準化位置
|
181 |
+
positions = []
|
182 |
+
for obj in detected_objects:
|
183 |
+
center = obj.get("normalized_center", [0.5, 0.5])
|
184 |
+
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
185 |
+
positions.append(center)
|
186 |
+
|
187 |
+
if len(positions) < 2:
|
188 |
+
return None
|
189 |
+
|
190 |
+
# 分析排列模式
|
191 |
+
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
192 |
+
|
193 |
+
# 根據物件類型和場景生成描述
|
194 |
+
return self._generate_arrangement_description(class_name, scene_type,
|
195 |
+
arrangement_pattern, count)
|
196 |
+
|
197 |
+
except Exception as e:
|
198 |
+
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
199 |
+
return None
|
200 |
+
|
201 |
+
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
202 |
+
"""
|
203 |
+
分析位置點的排列模式
|
204 |
+
|
205 |
+
Args:
|
206 |
+
positions: 標準化的位置座標列表
|
207 |
+
|
208 |
+
Returns:
|
209 |
+
str: 排列模式類型(linear, clustered, scattered, circular等)
|
210 |
+
"""
|
211 |
+
if len(positions) < 2:
|
212 |
+
return "single"
|
213 |
+
|
214 |
+
# 轉換為numpy陣列便於計算
|
215 |
+
pos_array = np.array(positions)
|
216 |
+
|
217 |
+
# 計算位置的分布特徵
|
218 |
+
x_coords = pos_array[:, 0]
|
219 |
+
y_coords = pos_array[:, 1]
|
220 |
+
|
221 |
+
# 分析x和y方向的變異程度
|
222 |
+
x_variance = np.var(x_coords)
|
223 |
+
y_variance = np.var(y_coords)
|
224 |
+
|
225 |
+
# 計算物件間的平均距離
|
226 |
+
distances = []
|
227 |
+
for i in range(len(positions)):
|
228 |
+
for j in range(i + 1, len(positions)):
|
229 |
+
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
230 |
+
(positions[i][1] - positions[j][1])**2)
|
231 |
+
distances.append(dist)
|
232 |
+
|
233 |
+
avg_distance = np.mean(distances) if distances else 0
|
234 |
+
distance_variance = np.var(distances) if distances else 0
|
235 |
+
|
236 |
+
# 判斷排列模式
|
237 |
+
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
238 |
+
return "circular"
|
239 |
+
elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
|
240 |
+
return "linear"
|
241 |
+
elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
|
242 |
+
return "clustered"
|
243 |
+
elif avg_distance > 0.6: # 物件分散
|
244 |
+
return "scattered"
|
245 |
+
elif distance_variance < 0.03: # 距離一致,可能是規則排列
|
246 |
+
return "regular"
|
247 |
+
else:
|
248 |
+
return "distributed"
|
249 |
+
|
250 |
+
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
251 |
+
"""
|
252 |
+
檢查位置是否形成圓形或環形排列
|
253 |
+
|
254 |
+
Args:
|
255 |
+
positions: 位置座標列表
|
256 |
+
|
257 |
+
Returns:
|
258 |
+
bool: 是否為圓形排列
|
259 |
+
"""
|
260 |
+
if len(positions) < 4:
|
261 |
+
return False
|
262 |
+
|
263 |
+
try:
|
264 |
+
pos_array = np.array(positions)
|
265 |
+
|
266 |
+
# 計算中心點
|
267 |
+
center_x = np.mean(pos_array[:, 0])
|
268 |
+
center_y = np.mean(pos_array[:, 1])
|
269 |
+
|
270 |
+
# 計算每個點到中心的距離
|
271 |
+
distances_to_center = []
|
272 |
+
for pos in positions:
|
273 |
+
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
274 |
+
distances_to_center.append(dist)
|
275 |
+
|
276 |
+
# 如果所有距離都相近,可能是圓形排列
|
277 |
+
distance_variance = np.var(distances_to_center)
|
278 |
+
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
279 |
+
|
280 |
+
except:
|
281 |
+
return False
|
282 |
+
|
283 |
+
def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
|
284 |
+
arrangement_pattern: str, count: int) -> Optional[str]:
|
285 |
+
"""
|
286 |
+
根據物件類型、場景和排列模式生成空間描述
|
287 |
+
|
288 |
+
Args:
|
289 |
+
class_name: 物件類別名稱
|
290 |
+
scene_type: 場景類型
|
291 |
+
arrangement_pattern: 排列模式
|
292 |
+
count: 物件數量
|
293 |
+
|
294 |
+
Returns:
|
295 |
+
Optional[str]: 生成的空間排列描述
|
296 |
+
"""
|
297 |
+
# 基於物件類型的描述模板
|
298 |
+
arrangement_templates = {
|
299 |
+
"chair": {
|
300 |
+
"linear": "arranged in a row",
|
301 |
+
"clustered": "grouped together for conversation",
|
302 |
+
"circular": "arranged around the table",
|
303 |
+
"scattered": "positioned throughout the space",
|
304 |
+
"regular": "evenly spaced",
|
305 |
+
"distributed": "thoughtfully positioned"
|
306 |
+
},
|
307 |
+
"dining table": {
|
308 |
+
"linear": "aligned to create a unified dining space",
|
309 |
+
"clustered": "grouped to form intimate dining areas",
|
310 |
+
"scattered": "distributed to optimize space flow",
|
311 |
+
"regular": "systematically positioned",
|
312 |
+
"distributed": "strategically placed"
|
313 |
+
},
|
314 |
+
"car": {
|
315 |
+
"linear": "parked in sequence",
|
316 |
+
"clustered": "grouped in the parking area",
|
317 |
+
"scattered": "distributed throughout the lot",
|
318 |
+
"regular": "neatly parked",
|
319 |
+
"distributed": "positioned across the area"
|
320 |
+
},
|
321 |
+
"person": {
|
322 |
+
"linear": "moving in a line",
|
323 |
+
"clustered": "gathered together",
|
324 |
+
"circular": "forming a circle",
|
325 |
+
"scattered": "spread across the area",
|
326 |
+
"distributed": "positioned throughout the scene"
|
327 |
+
}
|
328 |
+
}
|
329 |
+
|
330 |
+
# 獲取對應的描述模板
|
331 |
+
if class_name in arrangement_templates:
|
332 |
+
template_dict = arrangement_templates[class_name]
|
333 |
+
base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
|
334 |
+
else:
|
335 |
+
# 通用的排列描述
|
336 |
+
generic_templates = {
|
337 |
+
"linear": "arranged in a line",
|
338 |
+
"clustered": "grouped together",
|
339 |
+
"circular": "arranged in a circular pattern",
|
340 |
+
"scattered": "distributed across the space",
|
341 |
+
"regular": "evenly positioned",
|
342 |
+
"distributed": "thoughtfully placed"
|
343 |
+
}
|
344 |
+
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
345 |
+
|
346 |
+
return base_description
|
specialized_scene_processor.py
ADDED
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import logging
|
3 |
+
import traceback
|
4 |
+
import numpy as np
|
5 |
+
from typing import Dict, List, Any, Optional
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
class SpecializedSceneProcessor:
|
10 |
+
"""
|
11 |
+
負責處理特殊場景類型和地標識別
|
12 |
+
包含亞洲文化場景、高級餐飲、金融區、空中視角等專門處理邏輯
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self):
|
16 |
+
"""初始化特殊場景處理器"""
|
17 |
+
try:
|
18 |
+
logger.info("SpecializedSceneProcessor initialized successfully")
|
19 |
+
except Exception as e:
|
20 |
+
logger.error(f"Failed to initialize SpecializedSceneProcessor: {str(e)}")
|
21 |
+
logger.error(traceback.format_exc())
|
22 |
+
raise
|
23 |
+
|
24 |
+
def identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
|
25 |
+
"""
|
26 |
+
空中視角十字路口特徵
|
27 |
+
|
28 |
+
Args:
|
29 |
+
detected_objects: 檢測到的物件列表
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
十字路口特徵區域字典
|
33 |
+
"""
|
34 |
+
try:
|
35 |
+
zones = {}
|
36 |
+
|
37 |
+
# 檢查交通信號
|
38 |
+
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
39 |
+
if traffic_light_objs:
|
40 |
+
zones["traffic_control_pattern"] = {
|
41 |
+
"region": "intersection",
|
42 |
+
"objects": ["traffic light"] * len(traffic_light_objs),
|
43 |
+
"description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
|
44 |
+
}
|
45 |
+
|
46 |
+
# 人行道從空中視角的情境推斷
|
47 |
+
zones["crossing_pattern"] = {
|
48 |
+
"region": "central",
|
49 |
+
"objects": ["inferred crosswalk"],
|
50 |
+
"description": "Crossing pattern visible from aerial perspective"
|
51 |
+
}
|
52 |
+
|
53 |
+
return zones
|
54 |
+
|
55 |
+
except Exception as e:
|
56 |
+
logger.error(f"Error identifying aerial intersection features: {str(e)}")
|
57 |
+
logger.error(traceback.format_exc())
|
58 |
+
return {}
|
59 |
+
|
60 |
+
def identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
|
61 |
+
"""
|
62 |
+
識別空中視角廣場特徵
|
63 |
+
|
64 |
+
Args:
|
65 |
+
people_objs: 行人物件列表
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
廣場特徵區域字典
|
69 |
+
"""
|
70 |
+
try:
|
71 |
+
zones = {}
|
72 |
+
|
73 |
+
if people_objs:
|
74 |
+
# 檢查人群是否聚集在中央區域
|
75 |
+
central_people = [obj for obj in people_objs
|
76 |
+
if "middle" in obj["region"]]
|
77 |
+
|
78 |
+
if central_people:
|
79 |
+
zones["central_gathering"] = {
|
80 |
+
"region": "middle_center",
|
81 |
+
"objects": ["person"] * len(central_people),
|
82 |
+
"description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
|
83 |
+
}
|
84 |
+
|
85 |
+
return zones
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
logger.error(f"Error identifying aerial plaza features: {str(e)}")
|
89 |
+
logger.error(traceback.format_exc())
|
90 |
+
return {}
|
91 |
+
|
92 |
+
def identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
|
93 |
+
"""
|
94 |
+
亞洲文化場景中的行人通道
|
95 |
+
|
96 |
+
Args:
|
97 |
+
detected_objects: 檢測到的物件列表
|
98 |
+
|
99 |
+
Returns:
|
100 |
+
行人通道區域字典
|
101 |
+
"""
|
102 |
+
try:
|
103 |
+
zones = {}
|
104 |
+
|
105 |
+
pathway_items = []
|
106 |
+
pathway_regions = {}
|
107 |
+
|
108 |
+
# 提取人群用於通道分析
|
109 |
+
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
110 |
+
|
111 |
+
# 分析人群是否形成線形(商業街的特徵)
|
112 |
+
people_positions = [obj["normalized_center"] for obj in people_objs]
|
113 |
+
|
114 |
+
structured_path = False
|
115 |
+
path_direction = "meandering"
|
116 |
+
|
117 |
+
if len(people_positions) >= 3:
|
118 |
+
# 檢查人群是否沿相似y坐標排列(水平路徑)
|
119 |
+
y_coords = [pos[1] for pos in people_positions]
|
120 |
+
y_mean = sum(y_coords) / len(y_coords)
|
121 |
+
y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
|
122 |
+
|
123 |
+
horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
|
124 |
+
|
125 |
+
# 檢查人群是否沿相似x坐標排列(垂直路徑)
|
126 |
+
x_coords = [pos[0] for pos in people_positions]
|
127 |
+
x_mean = sum(x_coords) / len(x_coords)
|
128 |
+
x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
|
129 |
+
|
130 |
+
vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
|
131 |
+
|
132 |
+
structured_path = horizontal_path or vertical_path
|
133 |
+
path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
|
134 |
+
|
135 |
+
# 收集通道物件(人、自行車、摩托車在中間區域)
|
136 |
+
for obj in detected_objects:
|
137 |
+
if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
|
138 |
+
y_pos = obj["normalized_center"][1]
|
139 |
+
# 按垂直位置分組(圖像中間可能是通道)
|
140 |
+
if 0.25 <= y_pos <= 0.75:
|
141 |
+
region = obj["region"]
|
142 |
+
if region not in pathway_regions:
|
143 |
+
pathway_regions[region] = []
|
144 |
+
pathway_regions[region].append(obj)
|
145 |
+
pathway_items.append(obj["class_name"])
|
146 |
+
|
147 |
+
if pathway_items:
|
148 |
+
path_desc = "Pedestrian walkway with people moving through the commercial area"
|
149 |
+
if structured_path:
|
150 |
+
path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
|
151 |
+
|
152 |
+
zones["pedestrian_pathway"] = {
|
153 |
+
"region": "middle_center", # 通道通常會在中間area
|
154 |
+
"objects": list(set(pathway_items)),
|
155 |
+
"description": path_desc
|
156 |
+
}
|
157 |
+
|
158 |
+
return zones
|
159 |
+
|
160 |
+
except Exception as e:
|
161 |
+
logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
|
162 |
+
logger.error(traceback.format_exc())
|
163 |
+
return {}
|
164 |
+
|
165 |
+
def identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
|
166 |
+
"""
|
167 |
+
識別攤販區域
|
168 |
+
|
169 |
+
Args:
|
170 |
+
detected_objects: 檢測到的物件列表
|
171 |
+
|
172 |
+
Returns:
|
173 |
+
攤販區域字典
|
174 |
+
"""
|
175 |
+
try:
|
176 |
+
zones = {}
|
177 |
+
|
178 |
+
# 識別攤販區域(小攤/商店 - 從情境推斷)
|
179 |
+
has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
|
180 |
+
has_people = any(obj["class_id"] == 0 for obj in detected_objects)
|
181 |
+
|
182 |
+
if has_small_objects and has_people:
|
183 |
+
# 可能的攤販區域是人群和小物件聚集的地方
|
184 |
+
small_obj_regions = {}
|
185 |
+
|
186 |
+
for obj in detected_objects:
|
187 |
+
if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
|
188 |
+
region = obj["region"]
|
189 |
+
if region not in small_obj_regions:
|
190 |
+
small_obj_regions[region] = []
|
191 |
+
small_obj_regions[region].append(obj)
|
192 |
+
|
193 |
+
if small_obj_regions:
|
194 |
+
main_vendor_region = max(small_obj_regions.items(),
|
195 |
+
key=lambda x: len(x[1]),
|
196 |
+
default=(None, []))
|
197 |
+
|
198 |
+
if main_vendor_region[0] is not None:
|
199 |
+
vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
|
200 |
+
zones["vendor_zone"] = {
|
201 |
+
"region": main_vendor_region[0],
|
202 |
+
"objects": list(set(vendor_items)),
|
203 |
+
"description": "Vendor or market stall area with small merchandise"
|
204 |
+
}
|
205 |
+
|
206 |
+
return zones
|
207 |
+
|
208 |
+
except Exception as e:
|
209 |
+
logger.error(f"Error identifying vendor zones: {str(e)}")
|
210 |
+
logger.error(traceback.format_exc())
|
211 |
+
return {}
|
212 |
+
|
213 |
+
def identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
|
214 |
+
"""
|
215 |
+
識別高級餐飲的裝飾區域
|
216 |
+
|
217 |
+
Args:
|
218 |
+
detected_objects: 檢測到的物件列表
|
219 |
+
|
220 |
+
Returns:
|
221 |
+
裝飾區域字典
|
222 |
+
"""
|
223 |
+
try:
|
224 |
+
zones = {}
|
225 |
+
|
226 |
+
decor_items = []
|
227 |
+
decor_regions = {}
|
228 |
+
|
229 |
+
# 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
|
230 |
+
for obj in detected_objects:
|
231 |
+
if obj["class_id"] in [75, 40]: # Vase, wine glass
|
232 |
+
region = obj["region"]
|
233 |
+
if region not in decor_regions:
|
234 |
+
decor_regions[region] = []
|
235 |
+
decor_regions[region].append(obj)
|
236 |
+
decor_items.append(obj["class_name"])
|
237 |
+
|
238 |
+
if decor_items:
|
239 |
+
main_decor_region = max(decor_regions.items(),
|
240 |
+
key=lambda x: len(x[1]),
|
241 |
+
default=(None, []))
|
242 |
+
|
243 |
+
if main_decor_region[0] is not None:
|
244 |
+
zones["decorative_zone"] = {
|
245 |
+
"region": main_decor_region[0],
|
246 |
+
"objects": list(set(decor_items)),
|
247 |
+
"description": f"Decorative area with {', '.join(list(set(decor_items)))}"
|
248 |
+
}
|
249 |
+
|
250 |
+
return zones
|
251 |
+
|
252 |
+
except Exception as e:
|
253 |
+
logger.error(f"Error identifying upscale decorative zones: {str(e)}")
|
254 |
+
logger.error(traceback.format_exc())
|
255 |
+
return {}
|
256 |
+
|
257 |
+
def identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
|
258 |
+
"""
|
259 |
+
識別餐廳座位安排區域
|
260 |
+
|
261 |
+
Args:
|
262 |
+
detected_objects: 檢測到的物件列表
|
263 |
+
|
264 |
+
Returns:
|
265 |
+
座位區域字典
|
266 |
+
"""
|
267 |
+
try:
|
268 |
+
zones = {}
|
269 |
+
|
270 |
+
# 識別座位安排區域
|
271 |
+
chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
|
272 |
+
if len(chairs) >= 2:
|
273 |
+
chair_regions = {}
|
274 |
+
for obj in chairs:
|
275 |
+
region = obj["region"]
|
276 |
+
if region not in chair_regions:
|
277 |
+
chair_regions[region] = []
|
278 |
+
chair_regions[region].append(obj)
|
279 |
+
|
280 |
+
if chair_regions:
|
281 |
+
main_seating_region = max(chair_regions.items(),
|
282 |
+
key=lambda x: len(x[1]),
|
283 |
+
default=(None, []))
|
284 |
+
|
285 |
+
if main_seating_region[0] is not None:
|
286 |
+
zones["dining_seating_zone"] = {
|
287 |
+
"region": main_seating_region[0],
|
288 |
+
"objects": ["chair"] * len(main_seating_region[1]),
|
289 |
+
"description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
|
290 |
+
}
|
291 |
+
|
292 |
+
return zones
|
293 |
+
|
294 |
+
except Exception as e:
|
295 |
+
logger.error(f"Error identifying dining seating zones: {str(e)}")
|
296 |
+
logger.error(traceback.format_exc())
|
297 |
+
return {}
|
298 |
+
|
299 |
+
def identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
300 |
+
"""
|
301 |
+
識別服務區域
|
302 |
+
|
303 |
+
Args:
|
304 |
+
detected_objects: 檢測到的物件列表
|
305 |
+
existing_zones: 已存在的功能區域
|
306 |
+
|
307 |
+
Returns:
|
308 |
+
服務區域字典
|
309 |
+
"""
|
310 |
+
try:
|
311 |
+
zones = {}
|
312 |
+
|
313 |
+
serving_items = []
|
314 |
+
serving_regions = {}
|
315 |
+
|
316 |
+
# 服務區域可能有瓶子、碗、容器
|
317 |
+
for obj in detected_objects:
|
318 |
+
if obj["class_id"] in [39, 45]: # Bottle, bowl
|
319 |
+
# 檢查是否在與主餐桌不同的區域
|
320 |
+
if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
|
321 |
+
region = obj["region"]
|
322 |
+
if region not in serving_regions:
|
323 |
+
serving_regions[region] = []
|
324 |
+
serving_regions[region].append(obj)
|
325 |
+
serving_items.append(obj["class_name"])
|
326 |
+
|
327 |
+
if serving_items:
|
328 |
+
main_serving_region = max(serving_regions.items(),
|
329 |
+
key=lambda x: len(x[1]),
|
330 |
+
default=(None, []))
|
331 |
+
|
332 |
+
if main_serving_region[0] is not None:
|
333 |
+
zones["serving_zone"] = {
|
334 |
+
"region": main_serving_region[0],
|
335 |
+
"objects": list(set(serving_items)),
|
336 |
+
"description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
|
337 |
+
}
|
338 |
+
|
339 |
+
return zones
|
340 |
+
|
341 |
+
except Exception as e:
|
342 |
+
logger.error(f"Error identifying serving zones: {str(e)}")
|
343 |
+
logger.error(traceback.format_exc())
|
344 |
+
return {}
|
345 |
+
|
346 |
+
def identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
|
347 |
+
"""
|
348 |
+
識別建築區域(從場景情境推斷)
|
349 |
+
|
350 |
+
Args:
|
351 |
+
detected_objects: 檢測到的物件列表
|
352 |
+
|
353 |
+
Returns:
|
354 |
+
建築區域字典
|
355 |
+
"""
|
356 |
+
try:
|
357 |
+
zones = {}
|
358 |
+
|
359 |
+
# 側邊建築區域(從場景情境推斷)
|
360 |
+
# 檢查是否有實際可能包含建築物的區域
|
361 |
+
left_side_regions = ["top_left", "middle_left", "bottom_left"]
|
362 |
+
right_side_regions = ["top_right", "middle_right", "bottom_right"]
|
363 |
+
|
364 |
+
# 檢查左側
|
365 |
+
left_building_evidence = True
|
366 |
+
for region in left_side_regions:
|
367 |
+
# 如果此區域有很多車輛或人群,不太可能是建築物
|
368 |
+
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
369 |
+
for obj in detected_objects)
|
370 |
+
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
371 |
+
for obj in detected_objects)
|
372 |
+
|
373 |
+
if vehicle_in_region or people_in_region:
|
374 |
+
left_building_evidence = False
|
375 |
+
break
|
376 |
+
|
377 |
+
# 檢查右側
|
378 |
+
right_building_evidence = True
|
379 |
+
for region in right_side_regions:
|
380 |
+
# 如果此區域有很多車輛或人群,不太可能是建築物
|
381 |
+
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
382 |
+
for obj in detected_objects)
|
383 |
+
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
384 |
+
for obj in detected_objects)
|
385 |
+
|
386 |
+
if vehicle_in_region or people_in_region:
|
387 |
+
right_building_evidence = False
|
388 |
+
break
|
389 |
+
|
390 |
+
# 如果證據支持,添加建築區域
|
391 |
+
if left_building_evidence:
|
392 |
+
zones["building_zone_left"] = {
|
393 |
+
"region": "middle_left",
|
394 |
+
"objects": ["building"], # 推斷
|
395 |
+
"description": "Tall buildings line the left side of the street"
|
396 |
+
}
|
397 |
+
|
398 |
+
if right_building_evidence:
|
399 |
+
zones["building_zone_right"] = {
|
400 |
+
"region": "middle_right",
|
401 |
+
"objects": ["building"], # 推斷
|
402 |
+
"description": "Tall buildings line the right side of the street"
|
403 |
+
}
|
404 |
+
|
405 |
+
return zones
|
406 |
+
|
407 |
+
except Exception as e:
|
408 |
+
logger.error(f"Error identifying building zones: {str(e)}")
|
409 |
+
logger.error(traceback.format_exc())
|
410 |
+
return {}
|
411 |
+
|
412 |
+
def identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
|
413 |
+
"""
|
414 |
+
識別金融區的行人區域
|
415 |
+
|
416 |
+
Args:
|
417 |
+
detected_objects: 檢測到的物件列表
|
418 |
+
|
419 |
+
Returns:
|
420 |
+
行人區域字典
|
421 |
+
"""
|
422 |
+
try:
|
423 |
+
zones = {}
|
424 |
+
|
425 |
+
# 辨識行人區域(如果有人群)
|
426 |
+
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
427 |
+
if people_objs:
|
428 |
+
people_regions = {}
|
429 |
+
for obj in people_objs:
|
430 |
+
region = obj["region"]
|
431 |
+
if region not in people_regions:
|
432 |
+
people_regions[region] = []
|
433 |
+
people_regions[region].append(obj)
|
434 |
+
|
435 |
+
if people_regions:
|
436 |
+
main_pedestrian_region = max(people_regions.items(),
|
437 |
+
key=lambda x: len(x[1]),
|
438 |
+
default=(None, []))
|
439 |
+
|
440 |
+
if main_pedestrian_region[0] is not None:
|
441 |
+
zones["pedestrian_zone"] = {
|
442 |
+
"region": main_pedestrian_region[0],
|
443 |
+
"objects": ["person"] * len(main_pedestrian_region[1]),
|
444 |
+
"description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
|
445 |
+
}
|
446 |
+
|
447 |
+
return zones
|
448 |
+
|
449 |
+
except Exception as e:
|
450 |
+
logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
|
451 |
+
logger.error(traceback.format_exc())
|
452 |
+
return {}
|
453 |
+
|
454 |
+
def create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
|
455 |
+
"""
|
456 |
+
創建地標相關的輔助區域(攝影區、紀念品區等)
|
457 |
+
|
458 |
+
Args:
|
459 |
+
landmark: 地標物件字典
|
460 |
+
index: 地標索引
|
461 |
+
|
462 |
+
Returns:
|
463 |
+
輔助區域字典
|
464 |
+
"""
|
465 |
+
try:
|
466 |
+
auxiliary_zones = {}
|
467 |
+
landmark_region = landmark.get("region", "middle_center")
|
468 |
+
landmark_name = landmark.get("class_name", "Landmark")
|
469 |
+
|
470 |
+
# 創建攝影區
|
471 |
+
# 根據地標位置調整攝影區位置(地標前方通常是攝影區)
|
472 |
+
region_mapping = {
|
473 |
+
"top_left": "bottom_right",
|
474 |
+
"top_center": "bottom_center",
|
475 |
+
"top_right": "bottom_left",
|
476 |
+
"middle_left": "middle_right",
|
477 |
+
"middle_center": "bottom_center",
|
478 |
+
"middle_right": "middle_left",
|
479 |
+
"bottom_left": "top_right",
|
480 |
+
"bottom_center": "top_center",
|
481 |
+
"bottom_right": "top_left"
|
482 |
+
}
|
483 |
+
|
484 |
+
photo_region = region_mapping.get(landmark_region, landmark_region)
|
485 |
+
|
486 |
+
photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
|
487 |
+
auxiliary_zones[photo_key] = {
|
488 |
+
"name": f"{landmark_name} Photography Spot",
|
489 |
+
"description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
|
490 |
+
"objects": ["camera", "person", "cell phone"],
|
491 |
+
"region": photo_region,
|
492 |
+
"primary_function": "Tourist photography"
|
493 |
+
}
|
494 |
+
|
495 |
+
# 如果是著名地標,可能有紀念品販售區
|
496 |
+
if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
|
497 |
+
# 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
|
498 |
+
adjacent_regions = {
|
499 |
+
"top_left": ["top_center", "middle_left"],
|
500 |
+
"top_center": ["top_left", "top_right"],
|
501 |
+
"top_right": ["top_center", "middle_right"],
|
502 |
+
"middle_left": ["top_left", "bottom_left"],
|
503 |
+
"middle_center": ["middle_left", "middle_right"],
|
504 |
+
"middle_right": ["top_right", "bottom_right"],
|
505 |
+
"bottom_left": ["middle_left", "bottom_center"],
|
506 |
+
"bottom_center": ["bottom_left", "bottom_right"],
|
507 |
+
"bottom_right": ["bottom_center", "middle_right"]
|
508 |
+
}
|
509 |
+
|
510 |
+
if landmark_region in adjacent_regions:
|
511 |
+
souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
|
512 |
+
|
513 |
+
souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
|
514 |
+
auxiliary_zones[souvenir_key] = {
|
515 |
+
"name": f"{landmark_name} Souvenir Area",
|
516 |
+
"description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
|
517 |
+
"objects": ["person", "handbag", "backpack"],
|
518 |
+
"region": souvenir_region,
|
519 |
+
"primary_function": "Tourism commerce"
|
520 |
+
}
|
521 |
+
|
522 |
+
return auxiliary_zones
|
523 |
+
|
524 |
+
except Exception as e:
|
525 |
+
logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
|
526 |
+
logger.error(traceback.format_exc())
|
527 |
+
return {}
|
statistics_processor.py
ADDED
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, List, Optional, Any
|
3 |
+
|
4 |
+
class StatisticsProcessor:
|
5 |
+
"""
|
6 |
+
統計分析處理器 - 負責複雜的物件統計分析和數據轉換
|
7 |
+
|
8 |
+
此類別專門處理物件統計信息的深度分析、Places365信息處理,
|
9 |
+
以及基於統計數據生成替換內容的複雜邏輯。
|
10 |
+
"""
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
"""初始化統計分析處理器"""
|
14 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
15 |
+
self.logger.debug("StatisticsProcessor initialized successfully")
|
16 |
+
|
17 |
+
def generate_statistics_replacements(self, object_statistics: Optional[Dict]) -> Dict[str, str]:
|
18 |
+
"""
|
19 |
+
基於物體統計信息生成模板替換內容
|
20 |
+
|
21 |
+
Args:
|
22 |
+
object_statistics: 物體統計信息
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
Dict[str, str]: 統計信息基礎的替換內容
|
26 |
+
"""
|
27 |
+
replacements = {}
|
28 |
+
|
29 |
+
if not object_statistics:
|
30 |
+
return replacements
|
31 |
+
|
32 |
+
try:
|
33 |
+
# 處理植物元素
|
34 |
+
if "potted plant" in object_statistics:
|
35 |
+
count = object_statistics["potted plant"]["count"]
|
36 |
+
if count == 1:
|
37 |
+
replacements["plant_elements"] = "a potted plant"
|
38 |
+
elif count <= 3:
|
39 |
+
replacements["plant_elements"] = f"{count} potted plants"
|
40 |
+
else:
|
41 |
+
replacements["plant_elements"] = f"multiple potted plants ({count} total)"
|
42 |
+
|
43 |
+
# 處理座位(椅子)相關
|
44 |
+
if "chair" in object_statistics:
|
45 |
+
count = object_statistics["chair"]["count"]
|
46 |
+
|
47 |
+
# 使用統一的數字轉換邏輯
|
48 |
+
number_words = {
|
49 |
+
1: "one", 2: "two", 3: "three", 4: "four",
|
50 |
+
5: "five", 6: "six", 7: "seven", 8: "eight",
|
51 |
+
9: "nine", 10: "ten", 11: "eleven", 12: "twelve"
|
52 |
+
}
|
53 |
+
|
54 |
+
if count == 1:
|
55 |
+
replacements["seating"] = "a chair"
|
56 |
+
replacements["furniture"] = "a chair"
|
57 |
+
elif count in number_words:
|
58 |
+
word_count = number_words[count]
|
59 |
+
replacements["seating"] = f"{word_count} chairs"
|
60 |
+
replacements["furniture"] = f"{word_count} chairs"
|
61 |
+
elif count <= 20:
|
62 |
+
replacements["seating"] = f"several chairs"
|
63 |
+
replacements["furniture"] = f"several chairs"
|
64 |
+
else:
|
65 |
+
replacements["seating"] = f"numerous chairs ({count} total)"
|
66 |
+
replacements["furniture"] = f"numerous chairs"
|
67 |
+
|
68 |
+
# 處理混合家具情況(當存在多種家具類型時)
|
69 |
+
furniture_items = []
|
70 |
+
furniture_counts = []
|
71 |
+
|
72 |
+
# 收集所有家具類型的統計
|
73 |
+
for furniture_type in ["chair", "dining table", "couch", "bed"]:
|
74 |
+
if furniture_type in object_statistics:
|
75 |
+
count = object_statistics[furniture_type]["count"]
|
76 |
+
if count > 0:
|
77 |
+
furniture_items.append(furniture_type)
|
78 |
+
furniture_counts.append(count)
|
79 |
+
|
80 |
+
# 如果只有椅子,那就用上面的方式
|
81 |
+
# 如果有多種家具類型,生成組合描述
|
82 |
+
if len(furniture_items) > 1 and "furniture" not in replacements:
|
83 |
+
main_furniture = furniture_items[0] # 數量最多的家具類型
|
84 |
+
main_count = furniture_counts[0]
|
85 |
+
|
86 |
+
if main_furniture == "chair":
|
87 |
+
number_words = ["", "one", "two", "three", "four", "five", "six"]
|
88 |
+
if main_count <= 6:
|
89 |
+
replacements["furniture"] = f"{number_words[main_count]} chairs and other furniture"
|
90 |
+
else:
|
91 |
+
replacements["furniture"] = "multiple chairs and other furniture"
|
92 |
+
|
93 |
+
# 處理人員
|
94 |
+
if "person" in object_statistics:
|
95 |
+
count = object_statistics["person"]["count"]
|
96 |
+
if count == 1:
|
97 |
+
replacements["people_and_vehicles"] = "a person"
|
98 |
+
replacements["pedestrian_flow"] = "an individual walking"
|
99 |
+
elif count <= 5:
|
100 |
+
replacements["people_and_vehicles"] = f"{count} people"
|
101 |
+
replacements["pedestrian_flow"] = f"{count} people walking"
|
102 |
+
else:
|
103 |
+
replacements["people_and_vehicles"] = f"many people ({count} individuals)"
|
104 |
+
replacements["pedestrian_flow"] = f"a crowd of {count} people"
|
105 |
+
|
106 |
+
# 處理桌子設置
|
107 |
+
if "dining table" in object_statistics:
|
108 |
+
count = object_statistics["dining table"]["count"]
|
109 |
+
if count == 1:
|
110 |
+
replacements["table_setup"] = "a dining table"
|
111 |
+
replacements["table_description"] = "a dining surface"
|
112 |
+
else:
|
113 |
+
replacements["table_setup"] = f"{count} dining tables"
|
114 |
+
replacements["table_description"] = f"{count} dining surfaces"
|
115 |
+
|
116 |
+
self.logger.debug(f"Generated {len(replacements)} statistics-based replacements")
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
self.logger.warning(f"Error generating statistics replacements: {str(e)}")
|
120 |
+
|
121 |
+
return replacements
|
122 |
+
|
123 |
+
def generate_places365_replacements(self, places365_info: Optional[Dict]) -> Dict[str, str]:
|
124 |
+
"""
|
125 |
+
基於Places365信息生成模板替換內容
|
126 |
+
|
127 |
+
Args:
|
128 |
+
places365_info: Places365場景分類信息
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
Dict[str, str]: Places365基礎的替換內容
|
132 |
+
"""
|
133 |
+
replacements = {}
|
134 |
+
|
135 |
+
if not places365_info or places365_info.get('confidence', 0) <= 0.35:
|
136 |
+
replacements["places365_context"] = ""
|
137 |
+
replacements["places365_atmosphere"] = ""
|
138 |
+
return replacements
|
139 |
+
|
140 |
+
try:
|
141 |
+
scene_label = places365_info.get('scene_label', '').replace('_', ' ')
|
142 |
+
attributes = places365_info.get('attributes', [])
|
143 |
+
|
144 |
+
# 生成場景上下文
|
145 |
+
if scene_label:
|
146 |
+
replacements["places365_context"] = f"characteristic of a {scene_label}"
|
147 |
+
else:
|
148 |
+
replacements["places365_context"] = ""
|
149 |
+
|
150 |
+
# 生成氛圍描述
|
151 |
+
if 'natural_lighting' in attributes:
|
152 |
+
replacements["places365_atmosphere"] = "with natural illumination"
|
153 |
+
elif 'artificial_lighting' in attributes:
|
154 |
+
replacements["places365_atmosphere"] = "under artificial lighting"
|
155 |
+
else:
|
156 |
+
replacements["places365_atmosphere"] = ""
|
157 |
+
|
158 |
+
self.logger.debug("Generated Places365-based replacements")
|
159 |
+
|
160 |
+
except Exception as e:
|
161 |
+
self.logger.warning(f"Error generating Places365 replacements: {str(e)}")
|
162 |
+
replacements["places365_context"] = ""
|
163 |
+
replacements["places365_atmosphere"] = ""
|
164 |
+
|
165 |
+
return replacements
|
166 |
+
|
167 |
+
def analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
|
168 |
+
"""
|
169 |
+
分析場景組成以確定模板複雜度
|
170 |
+
|
171 |
+
Args:
|
172 |
+
detected_objects: 檢測到的物件列表
|
173 |
+
|
174 |
+
Returns:
|
175 |
+
Dict: 場景組成統計信息
|
176 |
+
"""
|
177 |
+
try:
|
178 |
+
total_objects = len(detected_objects)
|
179 |
+
|
180 |
+
# 統計不同類型的物件
|
181 |
+
object_categories = {}
|
182 |
+
for obj in detected_objects:
|
183 |
+
class_name = obj.get("class_name", "unknown")
|
184 |
+
object_categories[class_name] = object_categories.get(class_name, 0) + 1
|
185 |
+
|
186 |
+
# 計算場景多樣性
|
187 |
+
unique_categories = len(object_categories)
|
188 |
+
|
189 |
+
return {
|
190 |
+
"total_objects": total_objects,
|
191 |
+
"unique_categories": unique_categories,
|
192 |
+
"category_distribution": object_categories,
|
193 |
+
"complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
|
194 |
+
}
|
195 |
+
|
196 |
+
except Exception as e:
|
197 |
+
self.logger.warning(f"Error analyzing scene composition: {str(e)}")
|
198 |
+
return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
|
199 |
+
|
200 |
+
def generate_zone_descriptions(self, zone_data: Dict[str, Any], section: Dict[str, Any]) -> List[str]:
|
201 |
+
"""
|
202 |
+
生成功能區域描述
|
203 |
+
|
204 |
+
Args:
|
205 |
+
zone_data: 區域數據字典
|
206 |
+
section: 區域配置信息
|
207 |
+
|
208 |
+
Returns:
|
209 |
+
List[str]: 區域描述列表
|
210 |
+
"""
|
211 |
+
try:
|
212 |
+
descriptions = []
|
213 |
+
|
214 |
+
if not zone_data:
|
215 |
+
return descriptions
|
216 |
+
|
217 |
+
# 直接處理區域資料(zone_data 本身就是區域字典)
|
218 |
+
sorted_zones = sorted(zone_data.items(),
|
219 |
+
key=lambda x: len(x[1].get("objects", [])),
|
220 |
+
reverse=True)
|
221 |
+
|
222 |
+
for zone_name, zone_info in sorted_zones:
|
223 |
+
description = zone_info.get("description", "")
|
224 |
+
objects = zone_info.get("objects", [])
|
225 |
+
|
226 |
+
if objects:
|
227 |
+
# 使用現有描述或生成基於物件的描述
|
228 |
+
if description and not any(tech in description.lower() for tech in ['zone', 'area', 'region']):
|
229 |
+
zone_desc = description
|
230 |
+
else:
|
231 |
+
# 生成更自然的區域描述
|
232 |
+
clean_zone_name = zone_name.replace('_', ' ').replace(' area', '').replace(' zone', '')
|
233 |
+
object_list = ', '.join(objects[:3])
|
234 |
+
|
235 |
+
if 'crossing' in zone_name or 'pedestrian' in zone_name:
|
236 |
+
zone_desc = f"In the central crossing area, there are {object_list}."
|
237 |
+
elif 'vehicle' in zone_name or 'traffic' in zone_name:
|
238 |
+
zone_desc = f"The vehicle movement area includes {object_list}."
|
239 |
+
elif 'control' in zone_name:
|
240 |
+
zone_desc = f"Traffic control elements include {object_list}."
|
241 |
+
else:
|
242 |
+
zone_desc = f"The {clean_zone_name} contains {object_list}."
|
243 |
+
|
244 |
+
if len(objects) > 3:
|
245 |
+
zone_desc += f" Along with {len(objects) - 3} additional elements."
|
246 |
+
|
247 |
+
descriptions.append(zone_desc)
|
248 |
+
|
249 |
+
return descriptions
|
250 |
+
|
251 |
+
except Exception as e:
|
252 |
+
self.logger.error(f"Error generating zone descriptions: {str(e)}")
|
253 |
+
return []
|
254 |
+
|
255 |
+
def generate_object_summary(self, object_data: List[Dict], section: Dict[str, Any]) -> str:
|
256 |
+
"""
|
257 |
+
生成物件摘要描述
|
258 |
+
|
259 |
+
Args:
|
260 |
+
object_data: 物件數據列表
|
261 |
+
section: 摘要配置信息
|
262 |
+
|
263 |
+
Returns:
|
264 |
+
str: 物件摘要描述
|
265 |
+
"""
|
266 |
+
try:
|
267 |
+
if not object_data:
|
268 |
+
return ""
|
269 |
+
|
270 |
+
# 統計物件類型並計算重要性
|
271 |
+
object_stats = {}
|
272 |
+
for obj in object_data:
|
273 |
+
class_name = obj.get("class_name", "unknown")
|
274 |
+
confidence = obj.get("confidence", 0.5)
|
275 |
+
|
276 |
+
if class_name not in object_stats:
|
277 |
+
object_stats[class_name] = {"count": 0, "total_confidence": 0}
|
278 |
+
|
279 |
+
object_stats[class_name]["count"] += 1
|
280 |
+
object_stats[class_name]["total_confidence"] += confidence
|
281 |
+
|
282 |
+
# 按重要性排序(結合數量和置信度)
|
283 |
+
sorted_objects = []
|
284 |
+
for class_name, stats in object_stats.items():
|
285 |
+
count = stats["count"]
|
286 |
+
avg_confidence = stats["total_confidence"] / count
|
287 |
+
importance = count * 0.6 + avg_confidence * 0.4
|
288 |
+
sorted_objects.append((class_name, count, importance))
|
289 |
+
|
290 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
291 |
+
|
292 |
+
# 生成自然語言描述
|
293 |
+
descriptions = []
|
294 |
+
for class_name, count, _ in sorted_objects[:5]:
|
295 |
+
clean_name = class_name.replace('_', ' ')
|
296 |
+
if count == 1:
|
297 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
298 |
+
descriptions.append(f"{article} {clean_name}")
|
299 |
+
else:
|
300 |
+
descriptions.append(f"{count} {clean_name}s")
|
301 |
+
|
302 |
+
if len(descriptions) == 1:
|
303 |
+
return f"The scene features {descriptions[0]}."
|
304 |
+
elif len(descriptions) == 2:
|
305 |
+
return f"The scene features {descriptions[0]} and {descriptions[1]}."
|
306 |
+
else:
|
307 |
+
main_items = ", ".join(descriptions[:-1])
|
308 |
+
return f"The scene features {main_items}, and {descriptions[-1]}."
|
309 |
+
|
310 |
+
except Exception as e:
|
311 |
+
self.logger.error(f"Error generating object summary: {str(e)}")
|
312 |
+
return ""
|
313 |
+
|
314 |
+
def generate_conclusion(self, template: Dict[str, Any], zone_data: Dict[str, Any],
|
315 |
+
object_data: List[Dict]) -> str:
|
316 |
+
"""
|
317 |
+
生成結論描述
|
318 |
+
|
319 |
+
Args:
|
320 |
+
template: 模板配置信息
|
321 |
+
zone_data: 區域數據
|
322 |
+
object_data: 物件數據
|
323 |
+
|
324 |
+
Returns:
|
325 |
+
str: 結論描述
|
326 |
+
"""
|
327 |
+
try:
|
328 |
+
scene_type = template.get("scene_type", "general")
|
329 |
+
zones_count = len(zone_data)
|
330 |
+
objects_count = len(object_data)
|
331 |
+
|
332 |
+
if scene_type == "indoor":
|
333 |
+
conclusion = f"This indoor environment demonstrates clear functional organization with {zones_count} distinct areas and {objects_count} identified objects."
|
334 |
+
elif scene_type == "outdoor":
|
335 |
+
conclusion = f"This outdoor scene shows dynamic activity patterns across {zones_count} functional zones with {objects_count} detected elements."
|
336 |
+
else:
|
337 |
+
conclusion = f"The scene analysis reveals {zones_count} functional areas containing {objects_count} identifiable objects."
|
338 |
+
|
339 |
+
return conclusion
|
340 |
+
|
341 |
+
except Exception as e:
|
342 |
+
self.logger.error(f"Error generating conclusion: {str(e)}")
|
343 |
+
return ""
|
template_manager.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
template_processor.py
ADDED
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import traceback
|
3 |
+
import re
|
4 |
+
from typing import Dict, List, Optional, Union, Any
|
5 |
+
|
6 |
+
class TemplateProcessor:
|
7 |
+
"""
|
8 |
+
模板處理器 - 負責模板填充、後處理和結構化模板渲染
|
9 |
+
|
10 |
+
此類別專門處理模板的最終填充過程、文本格式化、
|
11 |
+
語法修復以及結構化模板的渲染邏輯。
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
"""初始化模板處理器"""
|
16 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
17 |
+
self.logger.debug("TemplateProcessor initialized successfully")
|
18 |
+
|
19 |
+
def preprocess_template(self, template: str) -> str:
|
20 |
+
"""
|
21 |
+
預處理模板,修復常見問題
|
22 |
+
|
23 |
+
Args:
|
24 |
+
template: 原始模板字符串
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
str: 預處理後的模板
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
# 移除可能導致問題的模式
|
31 |
+
template = re.sub(r'\{[^}]*\}\s*,\s*\{[^}]*\}', '{combined_elements}', template)
|
32 |
+
|
33 |
+
# 確保模板不以逗號開始
|
34 |
+
template = re.sub(r'^[,\s]*', '', template)
|
35 |
+
|
36 |
+
return template.strip()
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
self.logger.warning(f"Error preprocessing template: {str(e)}")
|
40 |
+
return template
|
41 |
+
|
42 |
+
def postprocess_filled_template(self, filled_template: str) -> str:
|
43 |
+
"""
|
44 |
+
後處理填充完成的模板,修復語法問題
|
45 |
+
|
46 |
+
Args:
|
47 |
+
filled_template: 填充後的模板字符串
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
str: 修復後的模板字符串
|
51 |
+
"""
|
52 |
+
try:
|
53 |
+
# 修復 "In , " 模式
|
54 |
+
filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
|
55 |
+
filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
|
56 |
+
filled_template = re.sub(r'\bWithin\s*,\s*', 'Within this area, ', filled_template)
|
57 |
+
|
58 |
+
# 修復連續逗號
|
59 |
+
filled_template = re.sub(r',\s*,', ',', filled_template)
|
60 |
+
|
61 |
+
# 修復開頭的逗號
|
62 |
+
filled_template = re.sub(r'^[,\s]*', '', filled_template)
|
63 |
+
|
64 |
+
# 確保首字母大寫
|
65 |
+
if filled_template and not filled_template[0].isupper():
|
66 |
+
filled_template = filled_template[0].upper() + filled_template[1:]
|
67 |
+
|
68 |
+
# 確保以句號結尾
|
69 |
+
if filled_template and not filled_template.endswith(('.', '!', '?')):
|
70 |
+
filled_template += '.'
|
71 |
+
|
72 |
+
return filled_template.strip()
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
self.logger.warning(f"Error postprocessing filled template: {str(e)}")
|
76 |
+
return filled_template
|
77 |
+
|
78 |
+
def get_template_by_scene_type(self, scene_type: str, detected_objects: List[Dict],
|
79 |
+
functional_zones: Dict, template_repository) -> str:
|
80 |
+
"""
|
81 |
+
根據場景類型選擇合適的模板並進行標準化處理
|
82 |
+
|
83 |
+
Args:
|
84 |
+
scene_type: 場景類型
|
85 |
+
detected_objects: 檢測到的物件列表
|
86 |
+
functional_zones: 功能區域字典
|
87 |
+
template_repository: 模板庫實例
|
88 |
+
|
89 |
+
Returns:
|
90 |
+
str: 標準化後的模板字符串
|
91 |
+
"""
|
92 |
+
try:
|
93 |
+
# 獲取場景的物件統計信息
|
94 |
+
object_stats = self._analyze_scene_composition(detected_objects)
|
95 |
+
zone_count = len(functional_zones) if functional_zones else 0
|
96 |
+
|
97 |
+
# 根據場景複雜度和類型選擇模板
|
98 |
+
templates = template_repository.templates
|
99 |
+
if scene_type in templates:
|
100 |
+
scene_templates = templates[scene_type]
|
101 |
+
|
102 |
+
# 根據複雜度選擇合適的模板變體
|
103 |
+
if zone_count >= 3 and object_stats.get("total_objects", 0) >= 10:
|
104 |
+
template_key = "complex"
|
105 |
+
elif zone_count >= 2 or object_stats.get("total_objects", 0) >= 5:
|
106 |
+
template_key = "moderate"
|
107 |
+
else:
|
108 |
+
template_key = "simple"
|
109 |
+
|
110 |
+
if template_key in scene_templates:
|
111 |
+
raw_template = scene_templates[template_key]
|
112 |
+
else:
|
113 |
+
raw_template = scene_templates.get("default", scene_templates[list(scene_templates.keys())[0]])
|
114 |
+
else:
|
115 |
+
# 如果沒有特定場景的模板,使用通用模板
|
116 |
+
raw_template = self._get_generic_template(object_stats, zone_count)
|
117 |
+
|
118 |
+
# 標準化模板中的佔位符和格式
|
119 |
+
standardized_template = self._standardize_template_format(raw_template)
|
120 |
+
return standardized_template
|
121 |
+
|
122 |
+
except Exception as e:
|
123 |
+
self.logger.error(f"Error selecting template for scene type '{scene_type}': {str(e)}")
|
124 |
+
return self._get_fallback_template()
|
125 |
+
|
126 |
+
def _analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
|
127 |
+
"""
|
128 |
+
分析場景組成以確定模板複雜度
|
129 |
+
|
130 |
+
Args:
|
131 |
+
detected_objects: 檢測到的物件列表
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
Dict: 場景組成統計信息
|
135 |
+
"""
|
136 |
+
try:
|
137 |
+
total_objects = len(detected_objects)
|
138 |
+
|
139 |
+
# 統計不同類型的物件
|
140 |
+
object_categories = {}
|
141 |
+
for obj in detected_objects:
|
142 |
+
class_name = obj.get("class_name", "unknown")
|
143 |
+
object_categories[class_name] = object_categories.get(class_name, 0) + 1
|
144 |
+
|
145 |
+
# 計算場景多樣性
|
146 |
+
unique_categories = len(object_categories)
|
147 |
+
|
148 |
+
return {
|
149 |
+
"total_objects": total_objects,
|
150 |
+
"unique_categories": unique_categories,
|
151 |
+
"category_distribution": object_categories,
|
152 |
+
"complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
|
153 |
+
}
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
self.logger.warning(f"Error analyzing scene composition: {str(e)}")
|
157 |
+
return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
|
158 |
+
|
159 |
+
def _get_generic_template(self, object_stats: Dict, zone_count: int) -> str:
|
160 |
+
"""
|
161 |
+
獲取通用模板
|
162 |
+
|
163 |
+
Args:
|
164 |
+
object_stats: 物件統計信息
|
165 |
+
zone_count: 功能區域數量
|
166 |
+
|
167 |
+
Returns:
|
168 |
+
str: 通用模板字符串
|
169 |
+
"""
|
170 |
+
try:
|
171 |
+
complexity_score = object_stats.get("complexity_score", 0)
|
172 |
+
|
173 |
+
if complexity_score >= 7 or zone_count >= 3:
|
174 |
+
return "This scene presents a comprehensive view featuring {functional_area} with {primary_objects}. The spatial organization demonstrates {spatial_arrangement} across multiple {activity_areas}, creating a dynamic environment with diverse elements and clear functional zones."
|
175 |
+
elif complexity_score >= 4 or zone_count >= 2:
|
176 |
+
return "The scene displays {functional_area} containing {primary_objects}. The arrangement shows {spatial_organization} with distinct areas serving different purposes within the overall space."
|
177 |
+
else:
|
178 |
+
return "A {scene_description} featuring {primary_objects} arranged in {basic_layout} within the visible area."
|
179 |
+
|
180 |
+
except Exception as e:
|
181 |
+
self.logger.warning(f"Error getting generic template: {str(e)}")
|
182 |
+
return self._get_fallback_template()
|
183 |
+
|
184 |
+
def _get_fallback_template(self) -> str:
|
185 |
+
"""
|
186 |
+
獲取備用模板
|
187 |
+
|
188 |
+
Returns:
|
189 |
+
str: 備用模板字符串
|
190 |
+
"""
|
191 |
+
return "A scene featuring various elements and organized areas of activity within the visible space."
|
192 |
+
|
193 |
+
def _standardize_template_format(self, template: str) -> str:
|
194 |
+
"""
|
195 |
+
標準化模板格式,確保佔位符和表達方式符合自然語言要求
|
196 |
+
|
197 |
+
Args:
|
198 |
+
template: 原始模板字符串
|
199 |
+
|
200 |
+
Returns:
|
201 |
+
str: 標準化後的模板字符串
|
202 |
+
"""
|
203 |
+
try:
|
204 |
+
if not template:
|
205 |
+
return self._get_fallback_template()
|
206 |
+
|
207 |
+
standardized = template
|
208 |
+
|
209 |
+
# 標準化佔位符格式,移除技術性標記
|
210 |
+
placeholder_mapping = {
|
211 |
+
r'\{zone_\d+\}': '{functional_area}',
|
212 |
+
r'\{object_group_\d+\}': '{primary_objects}',
|
213 |
+
r'\{region_\d+\}': '{spatial_area}',
|
214 |
+
r'\{category_\d+\}': '{object_category}',
|
215 |
+
r'\{area_\d+\}': '{activity_area}',
|
216 |
+
r'\{section_\d+\}': '{scene_section}'
|
217 |
+
}
|
218 |
+
|
219 |
+
for pattern, replacement in placeholder_mapping.items():
|
220 |
+
standardized = re.sub(pattern, replacement, standardized)
|
221 |
+
|
222 |
+
# 標準化常見的技術性術語
|
223 |
+
term_replacements = {
|
224 |
+
'functional_zones': 'areas of activity',
|
225 |
+
'object_detection': 'visible elements',
|
226 |
+
'category_regions': 'organized sections',
|
227 |
+
'spatial_distribution': 'arrangement throughout the space',
|
228 |
+
'viewpoint_analysis': 'perspective view'
|
229 |
+
}
|
230 |
+
|
231 |
+
for tech_term, natural_term in term_replacements.items():
|
232 |
+
standardized = standardized.replace(tech_term, natural_term)
|
233 |
+
|
234 |
+
# 確保模板語法的自然性
|
235 |
+
standardized = self._improve_template_readability(standardized)
|
236 |
+
|
237 |
+
return standardized
|
238 |
+
|
239 |
+
except Exception as e:
|
240 |
+
self.logger.warning(f"Error standardizing template format: {str(e)}")
|
241 |
+
return template if template else self._get_fallback_template()
|
242 |
+
|
243 |
+
def _improve_template_readability(self, template: str) -> str:
|
244 |
+
"""
|
245 |
+
改善模板的可讀性和自然性
|
246 |
+
|
247 |
+
Args:
|
248 |
+
template: 模板字符串
|
249 |
+
|
250 |
+
Returns:
|
251 |
+
str: 改善後的模板字符串
|
252 |
+
"""
|
253 |
+
try:
|
254 |
+
# 移除多餘的空格和換行
|
255 |
+
improved = re.sub(r'\s+', ' ', template).strip()
|
256 |
+
|
257 |
+
# 改善句子連接
|
258 |
+
improved = improved.replace(' . ', '. ')
|
259 |
+
improved = improved.replace(' , ', ', ')
|
260 |
+
improved = improved.replace(' ; ', '; ')
|
261 |
+
|
262 |
+
# 確保適當的句號結尾
|
263 |
+
if improved and not improved.endswith(('.', '!', '?')):
|
264 |
+
improved += '.'
|
265 |
+
|
266 |
+
# 改善常見的表達問題
|
267 |
+
readability_fixes = [
|
268 |
+
(r'\bthe the\b', 'the'),
|
269 |
+
(r'\ba a\b', 'a'),
|
270 |
+
(r'\ban an\b', 'an'),
|
271 |
+
(r'\bwith with\b', 'with'),
|
272 |
+
(r'\bin in\b', 'in'),
|
273 |
+
(r'\bof of\b', 'of'),
|
274 |
+
(r'\band and\b', 'and')
|
275 |
+
]
|
276 |
+
|
277 |
+
for pattern, replacement in readability_fixes:
|
278 |
+
improved = re.sub(pattern, replacement, improved, flags=re.IGNORECASE)
|
279 |
+
|
280 |
+
return improved
|
281 |
+
|
282 |
+
except Exception as e:
|
283 |
+
self.logger.warning(f"Error improving template readability: {str(e)}")
|
284 |
+
return template
|
285 |
+
|
286 |
+
def process_structured_template(self, template: Dict[str, Any], scene_data: Dict[str, Any],
|
287 |
+
statistics_processor) -> str:
|
288 |
+
"""
|
289 |
+
處理結構化模板字典
|
290 |
+
|
291 |
+
Args:
|
292 |
+
template: 結構化模板字典
|
293 |
+
scene_data: 場景分析資料
|
294 |
+
statistics_processor: 統計處理器實例
|
295 |
+
|
296 |
+
Returns:
|
297 |
+
str: 生成的場景描述
|
298 |
+
"""
|
299 |
+
try:
|
300 |
+
# 提取 scene_data 中各區塊資料
|
301 |
+
zone_data = scene_data.get("functional_zones", scene_data.get("zones", {}))
|
302 |
+
object_data = scene_data.get("detected_objects", [])
|
303 |
+
scene_context = scene_data.get("scene_context", "")
|
304 |
+
|
305 |
+
# 獲取模板結構
|
306 |
+
structure = template.get("structure", [])
|
307 |
+
if not structure:
|
308 |
+
self.logger.warning("Template has no structure defined")
|
309 |
+
return self._generate_fallback_scene_description(scene_data)
|
310 |
+
|
311 |
+
description_parts = []
|
312 |
+
|
313 |
+
# 按照模板結構生成描述
|
314 |
+
for section in structure:
|
315 |
+
section_type = section.get("type", "")
|
316 |
+
content = section.get("content", "")
|
317 |
+
|
318 |
+
if section_type == "opening":
|
319 |
+
description_parts.append(content)
|
320 |
+
|
321 |
+
elif section_type == "zone_analysis":
|
322 |
+
zone_descriptions = statistics_processor.generate_zone_descriptions(zone_data, section)
|
323 |
+
if zone_descriptions:
|
324 |
+
description_parts.extend(zone_descriptions)
|
325 |
+
|
326 |
+
elif section_type == "object_summary":
|
327 |
+
object_summary = statistics_processor.generate_object_summary(object_data, section)
|
328 |
+
if object_summary:
|
329 |
+
description_parts.append(object_summary)
|
330 |
+
|
331 |
+
elif section_type == "conclusion":
|
332 |
+
conclusion = statistics_processor.generate_conclusion(template, zone_data, object_data)
|
333 |
+
if conclusion:
|
334 |
+
description_parts.append(conclusion)
|
335 |
+
|
336 |
+
# 合併並標準化輸出
|
337 |
+
final_description = self._standardize_final_description(" ".join(description_parts))
|
338 |
+
self.logger.info("Successfully applied structured template")
|
339 |
+
return final_description
|
340 |
+
|
341 |
+
except Exception as e:
|
342 |
+
self.logger.error(f"Error processing structured template: {str(e)}")
|
343 |
+
return self._generate_fallback_scene_description(scene_data)
|
344 |
+
|
345 |
+
def _generate_fallback_scene_description(self, scene_data: Dict[str, Any]) -> str:
|
346 |
+
"""
|
347 |
+
生成備用場景描述
|
348 |
+
|
349 |
+
Args:
|
350 |
+
scene_data: 場景分析資料
|
351 |
+
|
352 |
+
Returns:
|
353 |
+
str: 備用場景描述
|
354 |
+
"""
|
355 |
+
try:
|
356 |
+
detected_objects = scene_data.get("detected_objects", [])
|
357 |
+
zones = scene_data.get("functional_zones", scene_data.get("zones", {}))
|
358 |
+
scene_type = scene_data.get("scene_type", "general")
|
359 |
+
|
360 |
+
object_count = len(detected_objects)
|
361 |
+
zone_count = len(zones)
|
362 |
+
|
363 |
+
if zone_count > 0 and object_count > 0:
|
364 |
+
return f"Scene analysis completed with {zone_count} functional areas containing {object_count} identified objects."
|
365 |
+
elif object_count > 0:
|
366 |
+
return f"Scene analysis identified {object_count} objects in this {scene_type.replace('_', ' ')} environment."
|
367 |
+
else:
|
368 |
+
return f"Scene analysis completed for this {scene_type.replace('_', ' ')} environment."
|
369 |
+
|
370 |
+
except Exception as e:
|
371 |
+
self.logger.warning(f"Error generating fallback description: {str(e)}")
|
372 |
+
return "Scene analysis completed with detected objects and functional areas."
|
373 |
+
|
374 |
+
def _standardize_final_description(self, description: str) -> str:
|
375 |
+
"""
|
376 |
+
對最終描述進行標準化處理
|
377 |
+
|
378 |
+
Args:
|
379 |
+
description: 原始描述文本
|
380 |
+
|
381 |
+
Returns:
|
382 |
+
str: 標準化後的描述文本
|
383 |
+
"""
|
384 |
+
try:
|
385 |
+
# 移除多餘空格
|
386 |
+
description = " ".join(description.split())
|
387 |
+
|
388 |
+
# 確保句子間有適當間距
|
389 |
+
description = description.replace(". ", ". ")
|
390 |
+
|
391 |
+
# 移除任何殘留的技術性標識符
|
392 |
+
technical_patterns = [
|
393 |
+
r'zone_\d+', r'area_\d+', r'region_\d+',
|
394 |
+
r'_zone', r'_area', r'_region'
|
395 |
+
]
|
396 |
+
|
397 |
+
for pattern in technical_patterns:
|
398 |
+
description = re.sub(pattern, '', description, flags=re.IGNORECASE)
|
399 |
+
|
400 |
+
return description.strip()
|
401 |
+
|
402 |
+
except Exception as e:
|
403 |
+
self.logger.error(f"Error standardizing final description: {str(e)}")
|
404 |
+
return description
|
405 |
+
|
406 |
+
def generate_fallback_description(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
407 |
+
"""
|
408 |
+
生成備用描述,當模板填充完全失敗時使用
|
409 |
+
|
410 |
+
Args:
|
411 |
+
scene_type: 場景類型
|
412 |
+
detected_objects: 檢測到的物體列表
|
413 |
+
|
414 |
+
Returns:
|
415 |
+
str: 備用描述
|
416 |
+
"""
|
417 |
+
try:
|
418 |
+
object_count = len(detected_objects)
|
419 |
+
|
420 |
+
if object_count == 0:
|
421 |
+
return f"A {scene_type.replace('_', ' ')} scene."
|
422 |
+
elif object_count == 1:
|
423 |
+
return f"A {scene_type.replace('_', ' ')} scene with one visible element."
|
424 |
+
else:
|
425 |
+
return f"A {scene_type.replace('_', ' ')} scene with {object_count} visible elements."
|
426 |
+
|
427 |
+
except Exception as e:
|
428 |
+
self.logger.warning(f"Error generating fallback description: {str(e)}")
|
429 |
+
return "A scene with various elements."
|
template_repository.py
ADDED
@@ -0,0 +1,834 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import traceback
|
3 |
+
from typing import Dict, List, Optional, Any
|
4 |
+
|
5 |
+
from scene_detail_templates import SCENE_DETAIL_TEMPLATES
|
6 |
+
from object_template_fillers import OBJECT_TEMPLATE_FILLERS
|
7 |
+
from viewpoint_templates import VIEWPOINT_TEMPLATES
|
8 |
+
from cultural_templates import CULTURAL_TEMPLATES
|
9 |
+
from lighting_conditions import LIGHTING_CONDITIONS
|
10 |
+
from confidence_templates import CONFIDENCE_TEMPLATES
|
11 |
+
|
12 |
+
class TemplateRepository:
|
13 |
+
"""
|
14 |
+
模板資料的管理器 - 負責模板的載入、儲存、檢索和驗證
|
15 |
+
|
16 |
+
此類別專門處理模板資源的管理,包括從各種來源載入模板、
|
17 |
+
驗證模板完整性,以及提供統一的模板檢索介面。
|
18 |
+
"""
|
19 |
+
|
20 |
+
def __init__(self, custom_templates_db: Optional[Dict] = None):
|
21 |
+
"""
|
22 |
+
初始化模板庫管理器
|
23 |
+
|
24 |
+
Args:
|
25 |
+
custom_templates_db: 可選的自定義模板數據庫,如果提供則會與默認模板合併
|
26 |
+
"""
|
27 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
28 |
+
self.templates = {}
|
29 |
+
self.template_registry = {}
|
30 |
+
|
31 |
+
try:
|
32 |
+
# 載入模板數據庫
|
33 |
+
self.templates = self._load_templates()
|
34 |
+
|
35 |
+
# 初始化模板註冊表
|
36 |
+
self.template_registry = self._initialize_template_registry()
|
37 |
+
|
38 |
+
# 如果提供了自定義模板,則進行合併
|
39 |
+
if custom_templates_db:
|
40 |
+
self._merge_custom_templates(custom_templates_db)
|
41 |
+
|
42 |
+
# 驗證模板完整性
|
43 |
+
self._validate_templates()
|
44 |
+
|
45 |
+
self.logger.info("TemplateRepository initialized successfully with %d template categories",
|
46 |
+
len(self.templates))
|
47 |
+
|
48 |
+
except Exception as e:
|
49 |
+
error_msg = f"Failed to initialize TemplateRepository: {str(e)}"
|
50 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
51 |
+
# 初始化基本的空模板
|
52 |
+
self.templates = self._initialize_fallback_templates()
|
53 |
+
|
54 |
+
def _load_templates(self) -> Dict:
|
55 |
+
"""
|
56 |
+
載入所有描述模板
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
Dict: 包含所有模板類別的字典
|
60 |
+
"""
|
61 |
+
try:
|
62 |
+
templates = {}
|
63 |
+
|
64 |
+
# 載入場景詳細描述模板
|
65 |
+
self.logger.debug("Loading scene detail templates")
|
66 |
+
try:
|
67 |
+
templates["scene_detail_templates"] = SCENE_DETAIL_TEMPLATES
|
68 |
+
except NameError:
|
69 |
+
self.logger.warning("SCENE_DETAIL_TEMPLATES not defined, using empty dict")
|
70 |
+
templates["scene_detail_templates"] = {}
|
71 |
+
|
72 |
+
# 載入物體模板填充器
|
73 |
+
self.logger.debug("Loading object template fillers")
|
74 |
+
try:
|
75 |
+
templates["object_template_fillers"] = OBJECT_TEMPLATE_FILLERS
|
76 |
+
except NameError:
|
77 |
+
self.logger.warning("OBJECT_TEMPLATE_FILLERS not defined, using empty dict")
|
78 |
+
templates["object_template_fillers"] = {}
|
79 |
+
|
80 |
+
# 載入視角模板
|
81 |
+
self.logger.debug("Loading viewpoint templates")
|
82 |
+
try:
|
83 |
+
templates["viewpoint_templates"] = VIEWPOINT_TEMPLATES
|
84 |
+
except NameError:
|
85 |
+
self.logger.warning("VIEWPOINT_TEMPLATES not defined, using empty dict")
|
86 |
+
templates["viewpoint_templates"] = {}
|
87 |
+
|
88 |
+
# 載入文化模板
|
89 |
+
self.logger.debug("Loading cultural templates")
|
90 |
+
try:
|
91 |
+
templates["cultural_templates"] = CULTURAL_TEMPLATES
|
92 |
+
except NameError:
|
93 |
+
self.logger.warning("CULTURAL_TEMPLATES not defined, using empty dict")
|
94 |
+
templates["cultural_templates"] = {}
|
95 |
+
|
96 |
+
# 從照明條件模組載入照明模板
|
97 |
+
self.logger.debug("Loading lighting templates")
|
98 |
+
try:
|
99 |
+
templates["lighting_templates"] = self._extract_lighting_templates()
|
100 |
+
except Exception as e:
|
101 |
+
self.logger.warning(f"Failed to extract lighting templates: {str(e)}")
|
102 |
+
templates["lighting_templates"] = {}
|
103 |
+
|
104 |
+
# 載入信心度模板
|
105 |
+
self.logger.debug("Loading confidence templates")
|
106 |
+
try:
|
107 |
+
templates["confidence_templates"] = CONFIDENCE_TEMPLATES
|
108 |
+
except NameError:
|
109 |
+
self.logger.warning("CONFIDENCE_TEMPLATES not defined, using empty dict")
|
110 |
+
templates["confidence_templates"] = {}
|
111 |
+
|
112 |
+
# 初始化默認模板(當成備份)
|
113 |
+
self._initialize_default_templates(templates)
|
114 |
+
|
115 |
+
self.logger.info("Successfully loaded %d template categories", len(templates))
|
116 |
+
return templates
|
117 |
+
|
118 |
+
except Exception as e:
|
119 |
+
error_msg = f"Unexpected error during template loading: {str(e)}"
|
120 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
121 |
+
# 返回基本模板
|
122 |
+
return self._initialize_fallback_templates()
|
123 |
+
|
124 |
+
def _initialize_template_registry(self) -> Dict[str, Dict[str, Any]]:
|
125 |
+
"""
|
126 |
+
初始化模板註冊表,包含各種場景類型的結構化模板
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
Dict[str, Dict[str, Any]]: 模板註冊表字典
|
130 |
+
"""
|
131 |
+
try:
|
132 |
+
template_registry = {
|
133 |
+
"indoor_detailed": {
|
134 |
+
"scene_type": "indoor",
|
135 |
+
"complexity": "high",
|
136 |
+
"structure": [
|
137 |
+
{
|
138 |
+
"type": "opening",
|
139 |
+
"content": "This indoor scene presents a comprehensive view of a well-organized living space."
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"type": "zone_analysis",
|
143 |
+
"priority": "functional_areas",
|
144 |
+
"detail_level": "detailed"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"type": "object_summary",
|
148 |
+
"grouping": "by_category",
|
149 |
+
"include_counts": True
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"type": "conclusion",
|
153 |
+
"style": "analytical"
|
154 |
+
}
|
155 |
+
]
|
156 |
+
},
|
157 |
+
|
158 |
+
"indoor_moderate": {
|
159 |
+
"scene_type": "indoor",
|
160 |
+
"complexity": "medium",
|
161 |
+
"structure": [
|
162 |
+
{
|
163 |
+
"type": "opening",
|
164 |
+
"content": "The indoor environment displays organized functional areas."
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"type": "zone_analysis",
|
168 |
+
"priority": "main_areas",
|
169 |
+
"detail_level": "moderate"
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"type": "object_summary",
|
173 |
+
"grouping": "by_function",
|
174 |
+
"include_counts": False
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"type": "conclusion",
|
178 |
+
"style": "descriptive"
|
179 |
+
}
|
180 |
+
]
|
181 |
+
},
|
182 |
+
|
183 |
+
"indoor_simple": {
|
184 |
+
"scene_type": "indoor",
|
185 |
+
"complexity": "low",
|
186 |
+
"structure": [
|
187 |
+
{
|
188 |
+
"type": "opening",
|
189 |
+
"content": "An indoor space with visible furniture and household items."
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"type": "zone_analysis",
|
193 |
+
"priority": "basic_areas",
|
194 |
+
"detail_level": "simple"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"type": "object_summary",
|
198 |
+
"grouping": "general",
|
199 |
+
"include_counts": False
|
200 |
+
}
|
201 |
+
]
|
202 |
+
},
|
203 |
+
|
204 |
+
"outdoor_detailed": {
|
205 |
+
"scene_type": "outdoor",
|
206 |
+
"complexity": "high",
|
207 |
+
"structure": [
|
208 |
+
{
|
209 |
+
"type": "opening",
|
210 |
+
"content": "This outdoor scene captures a dynamic urban environment with multiple activity zones."
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"type": "zone_analysis",
|
214 |
+
"priority": "activity_areas",
|
215 |
+
"detail_level": "detailed"
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"type": "object_summary",
|
219 |
+
"grouping": "by_location",
|
220 |
+
"include_counts": True
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"type": "conclusion",
|
224 |
+
"style": "environmental"
|
225 |
+
}
|
226 |
+
]
|
227 |
+
},
|
228 |
+
|
229 |
+
"outdoor_moderate": {
|
230 |
+
"scene_type": "outdoor",
|
231 |
+
"complexity": "medium",
|
232 |
+
"structure": [
|
233 |
+
{
|
234 |
+
"type": "opening",
|
235 |
+
"content": "The outdoor scene shows organized public spaces and pedestrian areas."
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"type": "zone_analysis",
|
239 |
+
"priority": "public_areas",
|
240 |
+
"detail_level": "moderate"
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"type": "object_summary",
|
244 |
+
"grouping": "by_type",
|
245 |
+
"include_counts": False
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"type": "conclusion",
|
249 |
+
"style": "observational"
|
250 |
+
}
|
251 |
+
]
|
252 |
+
},
|
253 |
+
|
254 |
+
"outdoor_simple": {
|
255 |
+
"scene_type": "outdoor",
|
256 |
+
"complexity": "low",
|
257 |
+
"structure": [
|
258 |
+
{
|
259 |
+
"type": "opening",
|
260 |
+
"content": "An outdoor area with pedestrians and urban elements."
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"type": "zone_analysis",
|
264 |
+
"priority": "basic_areas",
|
265 |
+
"detail_level": "simple"
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"type": "object_summary",
|
269 |
+
"grouping": "general",
|
270 |
+
"include_counts": False
|
271 |
+
}
|
272 |
+
]
|
273 |
+
},
|
274 |
+
|
275 |
+
"commercial_detailed": {
|
276 |
+
"scene_type": "commercial",
|
277 |
+
"complexity": "high",
|
278 |
+
"structure": [
|
279 |
+
{
|
280 |
+
"type": "opening",
|
281 |
+
"content": "This commercial environment demonstrates organized retail and customer service areas."
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"type": "zone_analysis",
|
285 |
+
"priority": "service_areas",
|
286 |
+
"detail_level": "detailed"
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"type": "object_summary",
|
290 |
+
"grouping": "by_function",
|
291 |
+
"include_counts": True
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"type": "conclusion",
|
295 |
+
"style": "business"
|
296 |
+
}
|
297 |
+
]
|
298 |
+
},
|
299 |
+
|
300 |
+
"transportation_detailed": {
|
301 |
+
"scene_type": "transportation",
|
302 |
+
"complexity": "high",
|
303 |
+
"structure": [
|
304 |
+
{
|
305 |
+
"type": "opening",
|
306 |
+
"content": "This transportation hub features organized passenger facilities and transit infrastructure."
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"type": "zone_analysis",
|
310 |
+
"priority": "transit_areas",
|
311 |
+
"detail_level": "detailed"
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"type": "object_summary",
|
315 |
+
"grouping": "by_transit_function",
|
316 |
+
"include_counts": True
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"type": "conclusion",
|
320 |
+
"style": "infrastructure"
|
321 |
+
}
|
322 |
+
]
|
323 |
+
},
|
324 |
+
|
325 |
+
"default": {
|
326 |
+
"scene_type": "general",
|
327 |
+
"complexity": "medium",
|
328 |
+
"structure": [
|
329 |
+
{
|
330 |
+
"type": "opening",
|
331 |
+
"content": "The scene displays various elements organized across functional areas."
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"type": "zone_analysis",
|
335 |
+
"priority": "general_areas",
|
336 |
+
"detail_level": "moderate"
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"type": "object_summary",
|
340 |
+
"grouping": "general",
|
341 |
+
"include_counts": False
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"type": "conclusion",
|
345 |
+
"style": "general"
|
346 |
+
}
|
347 |
+
]
|
348 |
+
}
|
349 |
+
}
|
350 |
+
|
351 |
+
self.logger.debug(f"Initialized template registry with {len(template_registry)} templates")
|
352 |
+
return template_registry
|
353 |
+
|
354 |
+
except Exception as e:
|
355 |
+
error_msg = f"Error initializing template registry: {str(e)}"
|
356 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
357 |
+
# 返回最基本的註冊表
|
358 |
+
return {
|
359 |
+
"default": {
|
360 |
+
"scene_type": "general",
|
361 |
+
"complexity": "low",
|
362 |
+
"structure": [
|
363 |
+
{
|
364 |
+
"type": "opening",
|
365 |
+
"content": "Scene analysis completed with identified objects and areas."
|
366 |
+
}
|
367 |
+
]
|
368 |
+
}
|
369 |
+
}
|
370 |
+
|
371 |
+
def _extract_lighting_templates(self) -> Dict:
|
372 |
+
"""
|
373 |
+
從照明條件模組提取照明描述模板
|
374 |
+
|
375 |
+
Returns:
|
376 |
+
Dict: 照明模板字典
|
377 |
+
"""
|
378 |
+
try:
|
379 |
+
lighting_templates = {}
|
380 |
+
|
381 |
+
# 從 LIGHTING_CONDITIONS 提取時間描述
|
382 |
+
time_descriptions = LIGHTING_CONDITIONS.get("time_descriptions", {})
|
383 |
+
|
384 |
+
for time_key, time_data in time_descriptions.items():
|
385 |
+
if isinstance(time_data, dict) and "general" in time_data:
|
386 |
+
lighting_templates[time_key] = time_data["general"]
|
387 |
+
else:
|
388 |
+
# 如果數據結構不符合預期,使用備用描述
|
389 |
+
lighting_templates[time_key] = f"The scene is captured during {time_key.replace('_', ' ')}."
|
390 |
+
|
391 |
+
# 確保至少有基本的照明模板
|
392 |
+
if not lighting_templates:
|
393 |
+
self.logger.warning("No lighting templates found, using defaults")
|
394 |
+
lighting_templates = self._get_default_lighting_templates()
|
395 |
+
|
396 |
+
self.logger.debug("Extracted %d lighting templates", len(lighting_templates))
|
397 |
+
return lighting_templates
|
398 |
+
|
399 |
+
except Exception as e:
|
400 |
+
self.logger.warning(f"Error extracting lighting templates: {str(e)}, using defaults")
|
401 |
+
return self._get_default_lighting_templates()
|
402 |
+
|
403 |
+
def _get_default_lighting_templates(self) -> Dict:
|
404 |
+
"""獲取默認照明模板"""
|
405 |
+
return {
|
406 |
+
"day_clear": "The scene is captured during clear daylight conditions.",
|
407 |
+
"day_overcast": "The scene is captured during overcast daylight.",
|
408 |
+
"night": "The scene is captured at night with artificial lighting.",
|
409 |
+
"dawn": "The scene is captured during dawn with soft natural lighting.",
|
410 |
+
"dusk": "The scene is captured during dusk with diminishing natural light.",
|
411 |
+
"unknown": "The lighting conditions are not clearly identifiable."
|
412 |
+
}
|
413 |
+
|
414 |
+
def _initialize_default_templates(self, templates: Dict):
|
415 |
+
"""
|
416 |
+
初始化默認模板作為備份機制
|
417 |
+
|
418 |
+
Args:
|
419 |
+
templates: 要檢查和補充的模板字典
|
420 |
+
"""
|
421 |
+
try:
|
422 |
+
# 置信度模板備份
|
423 |
+
if "confidence_templates" not in templates or not templates["confidence_templates"]:
|
424 |
+
templates["confidence_templates"] = {
|
425 |
+
"high": "{description} {details}",
|
426 |
+
"medium": "This appears to be {description} {details}",
|
427 |
+
"low": "This might be {description}, but the confidence is low. {details}"
|
428 |
+
}
|
429 |
+
|
430 |
+
# 場景詳細模板備份
|
431 |
+
if "scene_detail_templates" not in templates or not templates["scene_detail_templates"]:
|
432 |
+
templates["scene_detail_templates"] = {
|
433 |
+
"default": ["A scene with various elements and objects."]
|
434 |
+
}
|
435 |
+
|
436 |
+
# 物體填充模板備份
|
437 |
+
if "object_template_fillers" not in templates or not templates["object_template_fillers"]:
|
438 |
+
templates["object_template_fillers"] = {
|
439 |
+
"default": ["various items", "different objects", "multiple elements"]
|
440 |
+
}
|
441 |
+
|
442 |
+
# 視角模板備份
|
443 |
+
if "viewpoint_templates" not in templates or not templates["viewpoint_templates"]:
|
444 |
+
templates["viewpoint_templates"] = {
|
445 |
+
"eye_level": {
|
446 |
+
"prefix": "From eye level, ",
|
447 |
+
"observation": "the scene is viewed straight ahead.",
|
448 |
+
"short_desc": "at eye level"
|
449 |
+
},
|
450 |
+
"aerial": {
|
451 |
+
"prefix": "From above, ",
|
452 |
+
"observation": "the scene is viewed from a bird's-eye perspective.",
|
453 |
+
"short_desc": "from above"
|
454 |
+
},
|
455 |
+
"low_angle": {
|
456 |
+
"prefix": "From a low angle, ",
|
457 |
+
"observation": "the scene is viewed from below looking upward.",
|
458 |
+
"short_desc": "from below"
|
459 |
+
},
|
460 |
+
"elevated": {
|
461 |
+
"prefix": "From an elevated position, ",
|
462 |
+
"observation": "the scene is viewed from a higher vantage point.",
|
463 |
+
"short_desc": "from an elevated position"
|
464 |
+
}
|
465 |
+
}
|
466 |
+
|
467 |
+
# 文化模板備份
|
468 |
+
if "cultural_templates" not in templates or not templates["cultural_templates"]:
|
469 |
+
templates["cultural_templates"] = {
|
470 |
+
"asian": {
|
471 |
+
"elements": ["traditional architectural elements", "cultural signage", "Asian design features"],
|
472 |
+
"description": "The scene displays distinctive Asian cultural characteristics with {elements}."
|
473 |
+
},
|
474 |
+
"european": {
|
475 |
+
"elements": ["classical architecture", "European design elements", "historic features"],
|
476 |
+
"description": "The scene exhibits European architectural and cultural elements including {elements}."
|
477 |
+
}
|
478 |
+
}
|
479 |
+
|
480 |
+
self.logger.debug("Default templates initialized as backup")
|
481 |
+
|
482 |
+
except Exception as e:
|
483 |
+
self.logger.error(f"Error initializing default templates: {str(e)}")
|
484 |
+
|
485 |
+
def _merge_custom_templates(self, custom_templates: Dict):
|
486 |
+
"""
|
487 |
+
合併自定義模板到現有模板庫
|
488 |
+
|
489 |
+
Args:
|
490 |
+
custom_templates: 自定義模板字典
|
491 |
+
"""
|
492 |
+
try:
|
493 |
+
for template_category, custom_content in custom_templates.items():
|
494 |
+
if template_category in self.templates:
|
495 |
+
if isinstance(self.templates[template_category], dict) and isinstance(custom_content, dict):
|
496 |
+
self.templates[template_category].update(custom_content)
|
497 |
+
self.logger.debug(f"Merged custom templates for category: {template_category}")
|
498 |
+
else:
|
499 |
+
self.templates[template_category] = custom_content
|
500 |
+
self.logger.debug(f"Replaced templates for category: {template_category}")
|
501 |
+
else:
|
502 |
+
self.templates[template_category] = custom_content
|
503 |
+
self.logger.debug(f"Added new template category: {template_category}")
|
504 |
+
|
505 |
+
self.logger.info("Successfully merged custom templates")
|
506 |
+
|
507 |
+
except Exception as e:
|
508 |
+
self.logger.warning(f"Error merging custom templates: {str(e)}")
|
509 |
+
|
510 |
+
def _validate_templates(self):
|
511 |
+
"""
|
512 |
+
驗證模板完整性和有效性
|
513 |
+
"""
|
514 |
+
try:
|
515 |
+
required_categories = [
|
516 |
+
"scene_detail_templates",
|
517 |
+
"object_template_fillers",
|
518 |
+
"viewpoint_templates",
|
519 |
+
"cultural_templates",
|
520 |
+
"lighting_templates",
|
521 |
+
"confidence_templates"
|
522 |
+
]
|
523 |
+
|
524 |
+
missing_categories = []
|
525 |
+
for category in required_categories:
|
526 |
+
if category not in self.templates:
|
527 |
+
missing_categories.append(category)
|
528 |
+
elif not self.templates[category]:
|
529 |
+
self.logger.warning(f"Template category '{category}' is empty")
|
530 |
+
|
531 |
+
if missing_categories:
|
532 |
+
error_msg = f"Missing required template categories: {missing_categories}"
|
533 |
+
self.logger.warning(error_msg)
|
534 |
+
# 為缺失的類別創建空模板
|
535 |
+
for category in missing_categories:
|
536 |
+
self.templates[category] = {}
|
537 |
+
|
538 |
+
# 驗證視角模板結構
|
539 |
+
self._validate_viewpoint_templates()
|
540 |
+
|
541 |
+
# 驗證文化模板結構
|
542 |
+
self._validate_cultural_templates()
|
543 |
+
|
544 |
+
self.logger.debug("Template validation completed successfully")
|
545 |
+
|
546 |
+
except Exception as e:
|
547 |
+
error_msg = f"Template validation failed: {str(e)}"
|
548 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
549 |
+
|
550 |
+
def _validate_viewpoint_templates(self):
|
551 |
+
"""驗證視角模板結構"""
|
552 |
+
viewpoint_templates = self.templates.get("viewpoint_templates", {})
|
553 |
+
|
554 |
+
for viewpoint, template_data in viewpoint_templates.items():
|
555 |
+
if not isinstance(template_data, dict):
|
556 |
+
self.logger.warning(f"Invalid viewpoint template structure for '{viewpoint}'")
|
557 |
+
continue
|
558 |
+
|
559 |
+
required_keys = ["prefix", "observation"]
|
560 |
+
for key in required_keys:
|
561 |
+
if key not in template_data:
|
562 |
+
self.logger.warning(f"Missing '{key}' in viewpoint template '{viewpoint}'")
|
563 |
+
|
564 |
+
def _validate_cultural_templates(self):
|
565 |
+
"""驗證文化模板結構"""
|
566 |
+
cultural_templates = self.templates.get("cultural_templates", {})
|
567 |
+
|
568 |
+
for culture, template_data in cultural_templates.items():
|
569 |
+
if not isinstance(template_data, dict):
|
570 |
+
self.logger.warning(f"Invalid cultural template structure for '{culture}'")
|
571 |
+
continue
|
572 |
+
|
573 |
+
if "elements" not in template_data or "description" not in template_data:
|
574 |
+
self.logger.warning(f"Missing required keys in cultural template '{culture}'")
|
575 |
+
|
576 |
+
def _initialize_fallback_templates(self) -> Dict:
|
577 |
+
"""
|
578 |
+
初始化備用模板系統,當主要載入失敗時使用
|
579 |
+
|
580 |
+
Returns:
|
581 |
+
Dict: 最基本的模板字典
|
582 |
+
"""
|
583 |
+
return {
|
584 |
+
"scene_detail_templates": {"default": ["A scene with various elements."]},
|
585 |
+
"object_template_fillers": {"default": ["various items"]},
|
586 |
+
"viewpoint_templates": {
|
587 |
+
"eye_level": {
|
588 |
+
"prefix": "From eye level, ",
|
589 |
+
"observation": "the scene is viewed straight ahead.",
|
590 |
+
"short_desc": "at eye level"
|
591 |
+
}
|
592 |
+
},
|
593 |
+
"cultural_templates": {"default": {"elements": ["elements"], "description": "The scene displays cultural elements."}},
|
594 |
+
"lighting_templates": {"unknown": "The lighting conditions are not clearly identifiable."},
|
595 |
+
"confidence_templates": {"medium": "{description} {details}"}
|
596 |
+
}
|
597 |
+
|
598 |
+
def get_template(self, category: str, key: Optional[str] = None) -> Any:
|
599 |
+
"""
|
600 |
+
獲取指定類別的模板
|
601 |
+
|
602 |
+
Args:
|
603 |
+
category: 模板類別名稱
|
604 |
+
key: 可選的具體模板鍵值
|
605 |
+
|
606 |
+
Returns:
|
607 |
+
Any: 請求的模板內容,如果不存在則返回空字典或空字符串
|
608 |
+
"""
|
609 |
+
try:
|
610 |
+
if category not in self.templates:
|
611 |
+
self.logger.warning(f"Template category '{category}' not found")
|
612 |
+
return {} if key is None else ""
|
613 |
+
|
614 |
+
if key is None:
|
615 |
+
return self.templates[category]
|
616 |
+
|
617 |
+
category_templates = self.templates[category]
|
618 |
+
if not isinstance(category_templates, dict):
|
619 |
+
self.logger.warning(f"Template category '{category}' is not a dictionary")
|
620 |
+
return ""
|
621 |
+
|
622 |
+
if key not in category_templates:
|
623 |
+
self.logger.warning(f"Template key '{key}' not found in category '{category}'")
|
624 |
+
return ""
|
625 |
+
|
626 |
+
return category_templates[key]
|
627 |
+
|
628 |
+
except Exception as e:
|
629 |
+
error_msg = f"Error retrieving template {category}.{key}: {str(e)}"
|
630 |
+
self.logger.error(error_msg)
|
631 |
+
return {} if key is None else ""
|
632 |
+
|
633 |
+
def get_template_categories(self) -> List[str]:
|
634 |
+
"""
|
635 |
+
獲取所有可用的模板類別名稱
|
636 |
+
|
637 |
+
Returns:
|
638 |
+
List[str]: 模板類別名稱列表
|
639 |
+
"""
|
640 |
+
return list(self.templates.keys())
|
641 |
+
|
642 |
+
def template_exists(self, category: str, key: Optional[str] = None) -> bool:
|
643 |
+
"""
|
644 |
+
檢查模板是否存在
|
645 |
+
|
646 |
+
Args:
|
647 |
+
category: 模板類別
|
648 |
+
key: 可選的模板鍵值
|
649 |
+
|
650 |
+
Returns:
|
651 |
+
bool: 模板是否存在
|
652 |
+
"""
|
653 |
+
try:
|
654 |
+
if category not in self.templates:
|
655 |
+
return False
|
656 |
+
|
657 |
+
if key is None:
|
658 |
+
return True
|
659 |
+
|
660 |
+
category_templates = self.templates[category]
|
661 |
+
if isinstance(category_templates, dict):
|
662 |
+
return key in category_templates
|
663 |
+
|
664 |
+
return False
|
665 |
+
|
666 |
+
except Exception as e:
|
667 |
+
self.logger.warning(f"Error checking template existence for {category}.{key}: {str(e)}")
|
668 |
+
return False
|
669 |
+
|
670 |
+
def get_confidence_template(self, confidence_level: str) -> str:
|
671 |
+
"""
|
672 |
+
獲取指定信心度級別的模板
|
673 |
+
|
674 |
+
Args:
|
675 |
+
confidence_level: 信心度級別 ('high', 'medium', 'low')
|
676 |
+
|
677 |
+
Returns:
|
678 |
+
str: 信心度模板字符串
|
679 |
+
"""
|
680 |
+
try:
|
681 |
+
confidence_templates = self.templates.get("confidence_templates", {})
|
682 |
+
|
683 |
+
if confidence_level in confidence_templates:
|
684 |
+
return confidence_templates[confidence_level]
|
685 |
+
|
686 |
+
# 備用模板
|
687 |
+
fallback_templates = {
|
688 |
+
"high": "{description} {details}",
|
689 |
+
"medium": "This appears to be {description} {details}",
|
690 |
+
"low": "This might be {description}, but the confidence is low. {details}"
|
691 |
+
}
|
692 |
+
|
693 |
+
return fallback_templates.get(confidence_level, "{description} {details}")
|
694 |
+
|
695 |
+
except Exception as e:
|
696 |
+
self.logger.warning(f"Error getting confidence template for '{confidence_level}': {str(e)}")
|
697 |
+
return "{description} {details}"
|
698 |
+
|
699 |
+
def get_lighting_template(self, lighting_type: str) -> str:
|
700 |
+
"""
|
701 |
+
獲取指定照明類型的模板
|
702 |
+
|
703 |
+
Args:
|
704 |
+
lighting_type: 照明類型
|
705 |
+
|
706 |
+
Returns:
|
707 |
+
str: 照明描述模板
|
708 |
+
"""
|
709 |
+
try:
|
710 |
+
lighting_templates = self.templates.get("lighting_templates", {})
|
711 |
+
|
712 |
+
if lighting_type in lighting_templates:
|
713 |
+
return lighting_templates[lighting_type]
|
714 |
+
|
715 |
+
# 備用模板
|
716 |
+
return f"The scene is captured with {lighting_type.replace('_', ' ')} lighting conditions."
|
717 |
+
|
718 |
+
except Exception as e:
|
719 |
+
self.logger.warning(f"Error getting lighting template for '{lighting_type}': {str(e)}")
|
720 |
+
return "The lighting conditions are not clearly identifiable."
|
721 |
+
|
722 |
+
def get_viewpoint_template(self, viewpoint: str) -> Dict[str, str]:
|
723 |
+
"""
|
724 |
+
獲取指定視角的模板
|
725 |
+
|
726 |
+
Args:
|
727 |
+
viewpoint: 視角類型
|
728 |
+
|
729 |
+
Returns:
|
730 |
+
Dict[str, str]: 包含prefix、observation等鍵的視角模板字典
|
731 |
+
"""
|
732 |
+
try:
|
733 |
+
viewpoint_templates = self.templates.get("viewpoint_templates", {})
|
734 |
+
|
735 |
+
if viewpoint in viewpoint_templates:
|
736 |
+
return viewpoint_templates[viewpoint]
|
737 |
+
|
738 |
+
# 備用模板
|
739 |
+
fallback_templates = {
|
740 |
+
"eye_level": {
|
741 |
+
"prefix": "From eye level, ",
|
742 |
+
"observation": "the scene is viewed straight ahead.",
|
743 |
+
"short_desc": "at eye level"
|
744 |
+
},
|
745 |
+
"aerial": {
|
746 |
+
"prefix": "From above, ",
|
747 |
+
"observation": "the scene is viewed from a bird's-eye perspective.",
|
748 |
+
"short_desc": "from above"
|
749 |
+
},
|
750 |
+
"low_angle": {
|
751 |
+
"prefix": "From a low angle, ",
|
752 |
+
"observation": "the scene is viewed from below looking upward.",
|
753 |
+
"short_desc": "from below"
|
754 |
+
},
|
755 |
+
"elevated": {
|
756 |
+
"prefix": "From an elevated position, ",
|
757 |
+
"observation": "the scene is viewed from a higher vantage point.",
|
758 |
+
"short_desc": "from an elevated position"
|
759 |
+
}
|
760 |
+
}
|
761 |
+
|
762 |
+
return fallback_templates.get(viewpoint, fallback_templates["eye_level"])
|
763 |
+
|
764 |
+
except Exception as e:
|
765 |
+
self.logger.warning(f"Error getting viewpoint template for '{viewpoint}': {str(e)}")
|
766 |
+
return {
|
767 |
+
"prefix": "",
|
768 |
+
"observation": "the scene is viewed normally.",
|
769 |
+
"short_desc": "normally"
|
770 |
+
}
|
771 |
+
|
772 |
+
def get_cultural_template(self, cultural_context: str) -> Dict[str, Any]:
|
773 |
+
"""
|
774 |
+
獲取指定文化語境的模板
|
775 |
+
|
776 |
+
Args:
|
777 |
+
cultural_context: 文化語境
|
778 |
+
|
779 |
+
Returns:
|
780 |
+
Dict[str, Any]: 文化模板字典
|
781 |
+
"""
|
782 |
+
try:
|
783 |
+
cultural_templates = self.templates.get("cultural_templates", {})
|
784 |
+
|
785 |
+
if cultural_context in cultural_templates:
|
786 |
+
return cultural_templates[cultural_context]
|
787 |
+
|
788 |
+
# 備用模板
|
789 |
+
return {
|
790 |
+
"elements": ["cultural elements"],
|
791 |
+
"description": f"The scene displays {cultural_context} cultural characteristics."
|
792 |
+
}
|
793 |
+
|
794 |
+
except Exception as e:
|
795 |
+
self.logger.warning(f"Error getting cultural template for '{cultural_context}': {str(e)}")
|
796 |
+
return {
|
797 |
+
"elements": ["various elements"],
|
798 |
+
"description": "The scene displays cultural characteristics."
|
799 |
+
}
|
800 |
+
|
801 |
+
def get_scene_detail_templates(self, scene_type: str, viewpoint: Optional[str] = None) -> List[str]:
|
802 |
+
"""
|
803 |
+
獲取場景詳細描述模板
|
804 |
+
|
805 |
+
Args:
|
806 |
+
scene_type: 場景類型
|
807 |
+
viewpoint: 可選的視角類型
|
808 |
+
|
809 |
+
Returns:
|
810 |
+
List[str]: 場景描述模板列表
|
811 |
+
"""
|
812 |
+
try:
|
813 |
+
scene_templates = self.templates.get("scene_detail_templates", {})
|
814 |
+
|
815 |
+
# 首先嘗試獲取特定視角的模板
|
816 |
+
if viewpoint:
|
817 |
+
viewpoint_key = f"{scene_type}_{viewpoint}"
|
818 |
+
if viewpoint_key in scene_templates:
|
819 |
+
return scene_templates[viewpoint_key]
|
820 |
+
|
821 |
+
# 然後嘗試獲取場景類型的通用模板
|
822 |
+
if scene_type in scene_templates:
|
823 |
+
return scene_templates[scene_type]
|
824 |
+
|
825 |
+
# 最後使用默認模板
|
826 |
+
if "default" in scene_templates:
|
827 |
+
return scene_templates["default"]
|
828 |
+
|
829 |
+
# 備用模板
|
830 |
+
return ["A scene with various elements and objects."]
|
831 |
+
|
832 |
+
except Exception as e:
|
833 |
+
self.logger.warning(f"Error getting scene detail templates for '{scene_type}': {str(e)}")
|
834 |
+
return ["A scene with various elements and objects."]
|
text_optimizer.py
ADDED
@@ -0,0 +1,616 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import logging
|
3 |
+
from typing import Dict, List, Optional, Any, Tuple
|
4 |
+
|
5 |
+
class TextOptimizer:
|
6 |
+
"""
|
7 |
+
文本優化器 - 專門處理文本格式化、清理和優化
|
8 |
+
負責物件列表格式化、重複移除、複數形式處理以及描述文本的優化
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
"""初始化文本優化器"""
|
13 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
14 |
+
|
15 |
+
def format_object_list_for_description(self,
|
16 |
+
objects: List[Dict],
|
17 |
+
use_indefinite_article_for_one: bool = False,
|
18 |
+
count_threshold_for_generalization: int = -1,
|
19 |
+
max_types_to_list: int = 5) -> str:
|
20 |
+
"""
|
21 |
+
將物件列表格式化為人類可讀的字符串,包含總計數字
|
22 |
+
|
23 |
+
Args:
|
24 |
+
objects: 物件字典列表,每個應包含 'class_name'
|
25 |
+
use_indefinite_article_for_one: 單個物件是否使用 "a/an",否則使用 "one"
|
26 |
+
count_threshold_for_generalization: 超過此計數時使用通用術語,-1表示精確計數
|
27 |
+
max_types_to_list: 列表中包含的不同物件類型最大數量
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
str: 格式化的物件描述字符串
|
31 |
+
"""
|
32 |
+
try:
|
33 |
+
if not objects:
|
34 |
+
return "no specific objects clearly identified"
|
35 |
+
|
36 |
+
counts: Dict[str, int] = {}
|
37 |
+
for obj in objects:
|
38 |
+
name = obj.get("class_name", "unknown object")
|
39 |
+
if name == "unknown object" or not name:
|
40 |
+
continue
|
41 |
+
counts[name] = counts.get(name, 0) + 1
|
42 |
+
|
43 |
+
if not counts:
|
44 |
+
return "no specific objects clearly identified"
|
45 |
+
|
46 |
+
descriptions = []
|
47 |
+
# 按計數降序然後按名稱升序排序,限制物件類型數量
|
48 |
+
sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
|
49 |
+
|
50 |
+
for name, count in sorted_counts:
|
51 |
+
if count == 1:
|
52 |
+
if use_indefinite_article_for_one:
|
53 |
+
if name[0].lower() in 'aeiou':
|
54 |
+
descriptions.append(f"an {name}")
|
55 |
+
else:
|
56 |
+
descriptions.append(f"a {name}")
|
57 |
+
else:
|
58 |
+
descriptions.append(f"one {name}")
|
59 |
+
else:
|
60 |
+
# 處理複數形式
|
61 |
+
plural_name = self._get_plural_form(name)
|
62 |
+
|
63 |
+
if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
|
64 |
+
if count <= count_threshold_for_generalization + 3:
|
65 |
+
descriptions.append(f"several {plural_name}")
|
66 |
+
else:
|
67 |
+
descriptions.append(f"many {plural_name}")
|
68 |
+
else:
|
69 |
+
descriptions.append(f"{count} {plural_name}")
|
70 |
+
|
71 |
+
if not descriptions:
|
72 |
+
return "no specific objects clearly identified"
|
73 |
+
|
74 |
+
if len(descriptions) == 1:
|
75 |
+
return descriptions[0]
|
76 |
+
elif len(descriptions) == 2:
|
77 |
+
return f"{descriptions[0]} and {descriptions[1]}"
|
78 |
+
else:
|
79 |
+
# 使用牛津逗號格式
|
80 |
+
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
81 |
+
|
82 |
+
except Exception as e:
|
83 |
+
self.logger.warning(f"Error formatting object list: {str(e)}")
|
84 |
+
return "various objects"
|
85 |
+
|
86 |
+
def optimize_object_description(self, description: str) -> str:
|
87 |
+
"""
|
88 |
+
優化物件描述文本,消除多餘重複並改善表達流暢度
|
89 |
+
|
90 |
+
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
91 |
+
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
92 |
+
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
93 |
+
|
94 |
+
Args:
|
95 |
+
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
99 |
+
"""
|
100 |
+
try:
|
101 |
+
# 1. 處理多餘的空間限定表達
|
102 |
+
# 使用通用模式來識別和移除不必要的空間描述
|
103 |
+
description = self._remove_redundant_spatial_qualifiers(description)
|
104 |
+
|
105 |
+
# 2. 辨識並處理物件列表的重複問題
|
106 |
+
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
107 |
+
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
108 |
+
|
109 |
+
# 遍歷每個找到的物件列表進行重複檢測和優化
|
110 |
+
for obj_list in object_lists:
|
111 |
+
# 3. 解析單個物件列表中的項目
|
112 |
+
all_items = self._parse_object_list_items(obj_list)
|
113 |
+
|
114 |
+
# 4. 統計物件出現頻���
|
115 |
+
item_counts = self._count_object_items(all_items)
|
116 |
+
|
117 |
+
# 5. 生成優化後的物件列表
|
118 |
+
if item_counts:
|
119 |
+
new_items = self._generate_optimized_item_list(item_counts)
|
120 |
+
new_list = self._format_item_list(new_items)
|
121 |
+
description = description.replace(obj_list, new_list)
|
122 |
+
|
123 |
+
return description
|
124 |
+
|
125 |
+
except Exception as e:
|
126 |
+
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
127 |
+
return description
|
128 |
+
|
129 |
+
def remove_repetitive_descriptors(self, description: str) -> str:
|
130 |
+
"""
|
131 |
+
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
132 |
+
|
133 |
+
Args:
|
134 |
+
description: 原始描述文本
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
str: 清理後的描述文本
|
138 |
+
"""
|
139 |
+
try:
|
140 |
+
# 定義需要移除或替換的模式
|
141 |
+
cleanup_patterns = [
|
142 |
+
# 移除 "identical" 描述模式
|
143 |
+
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
144 |
+
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
145 |
+
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
146 |
+
|
147 |
+
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
148 |
+
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
149 |
+
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
150 |
+
(r'\bcomprehensive display of\b', 'display of'),
|
151 |
+
|
152 |
+
# 簡化過度描述性的短語
|
153 |
+
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
154 |
+
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
155 |
+
]
|
156 |
+
|
157 |
+
processed_description = description
|
158 |
+
for pattern, replacement in cleanup_patterns:
|
159 |
+
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
160 |
+
|
161 |
+
# 進一步清理可能的多餘空格
|
162 |
+
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
163 |
+
|
164 |
+
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
165 |
+
return processed_description
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
169 |
+
return description
|
170 |
+
|
171 |
+
def format_object_count_description(self, class_name: str, count: int,
|
172 |
+
scene_type: Optional[str] = None,
|
173 |
+
detected_objects: Optional[List[Dict]] = None,
|
174 |
+
avg_confidence: float = 0.0) -> str:
|
175 |
+
"""
|
176 |
+
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
177 |
+
|
178 |
+
Args:
|
179 |
+
class_name: 標準化後的類別名稱
|
180 |
+
count: 物件數量
|
181 |
+
scene_type: 場景類型,用於語境化描述
|
182 |
+
detected_objects: 該類型的所有檢測物件,用於空間分析
|
183 |
+
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
184 |
+
|
185 |
+
Returns:
|
186 |
+
str: 完整的格式化數量描述
|
187 |
+
"""
|
188 |
+
try:
|
189 |
+
if count <= 0:
|
190 |
+
return ""
|
191 |
+
|
192 |
+
# 獲取基礎的複數形式
|
193 |
+
plural_form = self._get_plural_form(class_name)
|
194 |
+
|
195 |
+
# 單數情況的處理
|
196 |
+
if count == 1:
|
197 |
+
return self._format_single_object_description(class_name, scene_type,
|
198 |
+
detected_objects, avg_confidence)
|
199 |
+
|
200 |
+
# 複數情況的處理
|
201 |
+
return self._format_multiple_objects_description(class_name, count, plural_form,
|
202 |
+
scene_type, detected_objects, avg_confidence)
|
203 |
+
|
204 |
+
except Exception as e:
|
205 |
+
self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
|
206 |
+
return f"{count} {class_name}s" if count > 1 else class_name
|
207 |
+
|
208 |
+
def normalize_object_class_name(self, class_name: str) -> str:
|
209 |
+
"""
|
210 |
+
標準化物件類別名稱,確保輸出自然語言格式
|
211 |
+
|
212 |
+
Args:
|
213 |
+
class_name: 原始類別名稱
|
214 |
+
|
215 |
+
Returns:
|
216 |
+
str: 標準化後的類別名稱
|
217 |
+
"""
|
218 |
+
try:
|
219 |
+
if not class_name or not isinstance(class_name, str):
|
220 |
+
return "object"
|
221 |
+
|
222 |
+
# 移除可能的技術性前綴或後綴
|
223 |
+
normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
|
224 |
+
normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
|
225 |
+
|
226 |
+
# 將下劃線和連字符替換為空格
|
227 |
+
normalized = normalized.replace('_', ' ').replace('-', ' ')
|
228 |
+
|
229 |
+
# 移除多餘空格
|
230 |
+
normalized = ' '.join(normalized.split())
|
231 |
+
|
232 |
+
# 特殊類別名稱的標準化映射
|
233 |
+
class_name_mapping = {
|
234 |
+
'traffic light': 'traffic light',
|
235 |
+
'stop sign': 'stop sign',
|
236 |
+
'fire hydrant': 'fire hydrant',
|
237 |
+
'dining table': 'dining table',
|
238 |
+
'potted plant': 'potted plant',
|
239 |
+
'tv monitor': 'television',
|
240 |
+
'cell phone': 'mobile phone',
|
241 |
+
'wine glass': 'wine glass',
|
242 |
+
'hot dog': 'hot dog',
|
243 |
+
'teddy bear': 'teddy bear',
|
244 |
+
'hair drier': 'hair dryer',
|
245 |
+
'toothbrush': 'toothbrush'
|
246 |
+
}
|
247 |
+
|
248 |
+
return class_name_mapping.get(normalized, normalized)
|
249 |
+
|
250 |
+
except Exception as e:
|
251 |
+
self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
|
252 |
+
return class_name if isinstance(class_name, str) else "object"
|
253 |
+
|
254 |
+
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
255 |
+
"""
|
256 |
+
移除描述中冗餘的空間限定詞
|
257 |
+
|
258 |
+
Args:
|
259 |
+
description: 包含可能多餘空間描述的文本
|
260 |
+
|
261 |
+
Returns:
|
262 |
+
str: 移除多餘空間限定詞後的文本
|
263 |
+
"""
|
264 |
+
# 定義常見的多餘空間表達模式
|
265 |
+
redundant_patterns = [
|
266 |
+
# 室內物件的多餘房間描述
|
267 |
+
(r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
|
268 |
+
# 廚房物件的多餘描述
|
269 |
+
(r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
|
270 |
+
# 浴室物件的多餘描述
|
271 |
+
(r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
|
272 |
+
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
273 |
+
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
274 |
+
]
|
275 |
+
|
276 |
+
for pattern, replacement in redundant_patterns:
|
277 |
+
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
278 |
+
|
279 |
+
return description
|
280 |
+
|
281 |
+
def _parse_object_list_items(self, obj_list: str) -> List[str]:
|
282 |
+
"""
|
283 |
+
解析物件列表中的項目
|
284 |
+
|
285 |
+
Args:
|
286 |
+
obj_list: 物件列表字符串
|
287 |
+
|
288 |
+
Returns:
|
289 |
+
List[str]: 解析後的項目列表
|
290 |
+
"""
|
291 |
+
# 先處理逗號格式 "A, B, and C"
|
292 |
+
if ", and " in obj_list:
|
293 |
+
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
294 |
+
last_item = obj_list.rsplit(", and ", 1)[1]
|
295 |
+
front_items = [item.strip() for item in before_last_and.split(",")]
|
296 |
+
all_items = front_items + [last_item.strip()]
|
297 |
+
elif " and " in obj_list:
|
298 |
+
all_items = [item.strip() for item in obj_list.split(" and ")]
|
299 |
+
else:
|
300 |
+
all_items = [item.strip() for item in obj_list.split(",")]
|
301 |
+
|
302 |
+
return all_items
|
303 |
+
|
304 |
+
def _count_object_items(self, all_items: List[str]) -> Dict[str, int]:
|
305 |
+
"""
|
306 |
+
統計物件項目的出現次數
|
307 |
+
|
308 |
+
Args:
|
309 |
+
all_items: 所有項目列表
|
310 |
+
|
311 |
+
Returns:
|
312 |
+
Dict[str, int]: 項目計數字典
|
313 |
+
"""
|
314 |
+
item_counts = {}
|
315 |
+
|
316 |
+
for item in all_items:
|
317 |
+
item = item.strip()
|
318 |
+
if item and item not in ["and", "with", ""]:
|
319 |
+
clean_item = self._normalize_item_for_counting(item)
|
320 |
+
if clean_item not in item_counts:
|
321 |
+
item_counts[clean_item] = 0
|
322 |
+
item_counts[clean_item] += 1
|
323 |
+
|
324 |
+
return item_counts
|
325 |
+
|
326 |
+
def _generate_optimized_item_list(self, item_counts: Dict[str, int]) -> List[str]:
|
327 |
+
"""
|
328 |
+
生成優化後的項目列表
|
329 |
+
|
330 |
+
Args:
|
331 |
+
item_counts: 項目計數字典
|
332 |
+
|
333 |
+
Returns:
|
334 |
+
List[str]: 優化後的項目列表
|
335 |
+
"""
|
336 |
+
new_items = []
|
337 |
+
|
338 |
+
for item, count in item_counts.items():
|
339 |
+
if count > 1:
|
340 |
+
plural_item = self._make_plural(item)
|
341 |
+
new_items.append(f"{count} {plural_item}")
|
342 |
+
else:
|
343 |
+
new_items.append(item)
|
344 |
+
|
345 |
+
return new_items
|
346 |
+
|
347 |
+
def _format_item_list(self, new_items: List[str]) -> str:
|
348 |
+
"""
|
349 |
+
格式化項目列表為字符串
|
350 |
+
|
351 |
+
Args:
|
352 |
+
new_items: 新項目列表
|
353 |
+
|
354 |
+
Returns:
|
355 |
+
str: 格式化後的字符串
|
356 |
+
"""
|
357 |
+
if len(new_items) == 1:
|
358 |
+
return new_items[0]
|
359 |
+
elif len(new_items) == 2:
|
360 |
+
return f"{new_items[0]} and {new_items[1]}"
|
361 |
+
else:
|
362 |
+
return ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
363 |
+
|
364 |
+
def _normalize_item_for_counting(self, item: str) -> str:
|
365 |
+
"""
|
366 |
+
正規化物件項目以便準確計數
|
367 |
+
|
368 |
+
Args:
|
369 |
+
item: 原始物件項目字串
|
370 |
+
|
371 |
+
Returns:
|
372 |
+
str: 正規化後的物件項目
|
373 |
+
"""
|
374 |
+
item = re.sub(r'^(a|an|the)\s+', '', item.lower())
|
375 |
+
return item.strip()
|
376 |
+
|
377 |
+
def _make_plural(self, item: str) -> str:
|
378 |
+
"""
|
379 |
+
將單數名詞轉換為複數形式
|
380 |
+
|
381 |
+
Args:
|
382 |
+
item: 單數形式的名詞
|
383 |
+
|
384 |
+
Returns:
|
385 |
+
str: 複數形式的名詞
|
386 |
+
"""
|
387 |
+
if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
|
388 |
+
return item[:-1] + "ies"
|
389 |
+
elif item.endswith(("s", "sh", "ch", "x", "z")):
|
390 |
+
return item + "es"
|
391 |
+
elif not item.endswith("s"):
|
392 |
+
return item + "s"
|
393 |
+
else:
|
394 |
+
return item
|
395 |
+
|
396 |
+
def _get_plural_form(self, word: str) -> str:
|
397 |
+
"""
|
398 |
+
獲取詞彙的複數形式
|
399 |
+
|
400 |
+
Args:
|
401 |
+
word: 單數詞彙
|
402 |
+
|
403 |
+
Returns:
|
404 |
+
str: 複數形式
|
405 |
+
"""
|
406 |
+
try:
|
407 |
+
# 特殊複數形式
|
408 |
+
irregular_plurals = {
|
409 |
+
'person': 'people',
|
410 |
+
'child': 'children',
|
411 |
+
'foot': 'feet',
|
412 |
+
'tooth': 'teeth',
|
413 |
+
'mouse': 'mice',
|
414 |
+
'man': 'men',
|
415 |
+
'woman': 'women'
|
416 |
+
}
|
417 |
+
|
418 |
+
if word.lower() in irregular_plurals:
|
419 |
+
return irregular_plurals[word.lower()]
|
420 |
+
|
421 |
+
# 規則複數形式
|
422 |
+
if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
423 |
+
return word + 'es'
|
424 |
+
elif word.endswith('y') and word[-2] not in 'aeiou':
|
425 |
+
return word[:-1] + 'ies'
|
426 |
+
elif word.endswith('f'):
|
427 |
+
return word[:-1] + 'ves'
|
428 |
+
elif word.endswith('fe'):
|
429 |
+
return word[:-2] + 'ves'
|
430 |
+
else:
|
431 |
+
return word + 's'
|
432 |
+
|
433 |
+
except Exception as e:
|
434 |
+
self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
|
435 |
+
return word + 's'
|
436 |
+
|
437 |
+
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
438 |
+
detected_objects: Optional[List[Dict]],
|
439 |
+
avg_confidence: float) -> str:
|
440 |
+
"""
|
441 |
+
處理單個物件的描述生成
|
442 |
+
|
443 |
+
Args:
|
444 |
+
class_name: 物件類別名稱
|
445 |
+
scene_type: 場景類型
|
446 |
+
detected_objects: 檢測物件列表
|
447 |
+
avg_confidence: 平均置信度
|
448 |
+
|
449 |
+
Returns:
|
450 |
+
str: 單個物件的完整描述
|
451 |
+
"""
|
452 |
+
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
453 |
+
|
454 |
+
# 獲取材質描述符
|
455 |
+
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
456 |
+
|
457 |
+
# 獲取位置或特徵描述符
|
458 |
+
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
459 |
+
|
460 |
+
# 組合描述
|
461 |
+
descriptors = []
|
462 |
+
if material_descriptor:
|
463 |
+
descriptors.append(material_descriptor)
|
464 |
+
if feature_descriptor:
|
465 |
+
descriptors.append(feature_descriptor)
|
466 |
+
|
467 |
+
if descriptors:
|
468 |
+
return f"{article} {' '.join(descriptors)} {class_name}"
|
469 |
+
else:
|
470 |
+
return f"{article} {class_name}"
|
471 |
+
|
472 |
+
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
473 |
+
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
474 |
+
avg_confidence: float) -> str:
|
475 |
+
"""
|
476 |
+
處理多個物件的描述生成
|
477 |
+
|
478 |
+
Args:
|
479 |
+
class_name: 物件類別名稱
|
480 |
+
count: 物件數量
|
481 |
+
plural_form: 複數形式
|
482 |
+
scene_type: 場景類型
|
483 |
+
detected_objects: 檢測物件列表
|
484 |
+
avg_confidence: 平均置信度
|
485 |
+
|
486 |
+
Returns:
|
487 |
+
str: 多個物件的完整描述
|
488 |
+
"""
|
489 |
+
# 數字到文字的轉換映射
|
490 |
+
number_words = {
|
491 |
+
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
492 |
+
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
493 |
+
11: "eleven", 12: "twelve"
|
494 |
+
}
|
495 |
+
|
496 |
+
# 確定基礎數量表達
|
497 |
+
if count in number_words:
|
498 |
+
count_expression = number_words[count]
|
499 |
+
elif count <= 20:
|
500 |
+
count_expression = "several"
|
501 |
+
else:
|
502 |
+
count_expression = "numerous"
|
503 |
+
|
504 |
+
# 獲取材質或功能描述符
|
505 |
+
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
506 |
+
|
507 |
+
# 構建基礎描述
|
508 |
+
descriptors = []
|
509 |
+
if material_descriptor:
|
510 |
+
descriptors.append(material_descriptor)
|
511 |
+
|
512 |
+
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
513 |
+
return base_description
|
514 |
+
|
515 |
+
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
516 |
+
avg_confidence: float) -> Optional[str]:
|
517 |
+
"""
|
518 |
+
基於場景語境和置信度進行材質推斷
|
519 |
+
|
520 |
+
Args:
|
521 |
+
class_name: 物件類別名稱
|
522 |
+
scene_type: 場景類型
|
523 |
+
avg_confidence: 檢測置信度
|
524 |
+
|
525 |
+
Returns:
|
526 |
+
Optional[str]: 材質描述符
|
527 |
+
"""
|
528 |
+
# 只有在置信度足夠高時才進行材質推斷
|
529 |
+
if avg_confidence < 0.5:
|
530 |
+
return None
|
531 |
+
|
532 |
+
# 餐廳和用餐相關場景
|
533 |
+
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
534 |
+
material_mapping = {
|
535 |
+
"chair": "wooden" if avg_confidence > 0.7 else None,
|
536 |
+
"dining table": "wooden",
|
537 |
+
"couch": "upholstered",
|
538 |
+
"vase": "decorative"
|
539 |
+
}
|
540 |
+
return material_mapping.get(class_name)
|
541 |
+
|
542 |
+
# 辦公場景
|
543 |
+
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
544 |
+
material_mapping = {
|
545 |
+
"chair": "office",
|
546 |
+
"dining table": "conference",
|
547 |
+
"laptop": "modern",
|
548 |
+
"book": "reference"
|
549 |
+
}
|
550 |
+
return material_mapping.get(class_name)
|
551 |
+
|
552 |
+
# 客廳場景
|
553 |
+
elif scene_type and scene_type in ["living_room"]:
|
554 |
+
material_mapping = {
|
555 |
+
"couch": "comfortable",
|
556 |
+
"chair": "accent",
|
557 |
+
"tv": "large",
|
558 |
+
"vase": "decorative"
|
559 |
+
}
|
560 |
+
return material_mapping.get(class_name)
|
561 |
+
|
562 |
+
# 室外場景
|
563 |
+
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
564 |
+
material_mapping = {
|
565 |
+
"car": "parked",
|
566 |
+
"person": "walking",
|
567 |
+
"bicycle": "stationed"
|
568 |
+
}
|
569 |
+
return material_mapping.get(class_name)
|
570 |
+
|
571 |
+
# 如果沒有特定的場景映射,返回通用描述符
|
572 |
+
generic_mapping = {
|
573 |
+
"chair": "comfortable",
|
574 |
+
"dining table": "sturdy",
|
575 |
+
"car": "parked",
|
576 |
+
"person": "present"
|
577 |
+
}
|
578 |
+
|
579 |
+
return generic_mapping.get(class_name)
|
580 |
+
|
581 |
+
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
582 |
+
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
583 |
+
"""
|
584 |
+
為單個物件生成特徵描述符
|
585 |
+
|
586 |
+
Args:
|
587 |
+
class_name: 物件類別名稱
|
588 |
+
scene_type: 場景類型
|
589 |
+
detected_objects: 檢測物件
|
590 |
+
|
591 |
+
Returns:
|
592 |
+
Optional[str]: 特徵描述符
|
593 |
+
"""
|
594 |
+
if not detected_objects or len(detected_objects) != 1:
|
595 |
+
return None
|
596 |
+
|
597 |
+
obj = detected_objects[0]
|
598 |
+
region = obj.get("region", "").lower()
|
599 |
+
|
600 |
+
# 基於位置的描述
|
601 |
+
if "center" in region:
|
602 |
+
if class_name == "dining table":
|
603 |
+
return "central"
|
604 |
+
elif class_name == "chair":
|
605 |
+
return "centrally placed"
|
606 |
+
elif "corner" in region or "left" in region or "right" in region:
|
607 |
+
return "positioned"
|
608 |
+
|
609 |
+
# 基於場景的功能描述
|
610 |
+
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
611 |
+
if class_name == "chair":
|
612 |
+
return "dining"
|
613 |
+
elif class_name == "vase":
|
614 |
+
return "decorative"
|
615 |
+
|
616 |
+
return None
|