DawnC commited on
Commit
12d9ea9
·
verified ·
1 Parent(s): 060f7fa

Upload 14 files

Browse files
content_generator.py ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import random
3
+ import re
4
+ from typing import Dict, List, Optional, Union, Any
5
+
6
+ class ContentGenerator:
7
+ """
8
+ 內容生成器 - 負責基礎內容生成和佔位符替換邏輯
9
+
10
+ 此類別專門處理模板中的動態內容生成,包括物件摘要、
11
+ 場景特定內容生成,以及提供默認的替換字典。
12
+ """
13
+
14
+ def __init__(self):
15
+ """初始化內容生成器"""
16
+ self.logger = logging.getLogger(self.__class__.__name__)
17
+
18
+ # 預載入默認替換內容
19
+ self.default_replacements = self._generate_default_replacements()
20
+
21
+ self.logger.debug("ContentGenerator initialized successfully")
22
+
23
+ def _generate_default_replacements(self) -> Dict[str, str]:
24
+ """
25
+ 生成默認的模板替換內容
26
+
27
+ Returns:
28
+ Dict[str, str]: 默認替換內容字典
29
+ """
30
+ return {
31
+ # 場景介紹相關
32
+ "scene_introduction": "this scene",
33
+ "location_prefix": "this location",
34
+ "setting_description": "this setting",
35
+ "area_description": "this area",
36
+ "environment_description": "this environment",
37
+ "spatial_introduction": "this space",
38
+
39
+ # 室內相關
40
+ "furniture": "various furniture pieces",
41
+ "seating": "comfortable seating",
42
+ "electronics": "entertainment devices",
43
+ "bed_type": "a bed",
44
+ "bed_location": "room",
45
+ "bed_description": "sleeping arrangements",
46
+ "extras": "personal items",
47
+ "table_setup": "a dining table and chairs",
48
+ "table_description": "a dining surface",
49
+ "dining_items": "dining furniture and tableware",
50
+ "appliances": "kitchen appliances",
51
+ "kitchen_items": "cooking utensils and dishware",
52
+ "cooking_equipment": "cooking equipment",
53
+ "office_equipment": "work-related furniture and devices",
54
+ "desk_setup": "a desk and chair",
55
+ "computer_equipment": "electronic devices",
56
+
57
+ # 室外/城市相關
58
+ "traffic_description": "vehicles and pedestrians",
59
+ "people_and_vehicles": "people and various vehicles",
60
+ "street_elements": "urban infrastructure",
61
+ "park_features": "benches and greenery",
62
+ "outdoor_elements": "natural features",
63
+ "park_description": "outdoor amenities",
64
+ "store_elements": "merchandise displays",
65
+ "shopping_activity": "customers browse and shop",
66
+ "store_items": "products for sale",
67
+
68
+ # 高級餐廳相關
69
+ "design_elements": "elegant decor",
70
+ "lighting": "stylish lighting fixtures",
71
+
72
+ # 亞洲商業街相關
73
+ "storefront_features": "compact shops",
74
+ "pedestrian_flow": "people walking",
75
+ "asian_elements": "distinctive cultural elements",
76
+ "cultural_elements": "traditional design features",
77
+ "signage": "colorful signs",
78
+ "street_activities": "busy urban activity",
79
+
80
+ # 金融區相關
81
+ "buildings": "tall buildings",
82
+ "traffic_elements": "vehicles",
83
+ "skyscrapers": "high-rise buildings",
84
+ "road_features": "wide streets",
85
+ "architectural_elements": "modern architecture",
86
+ "city_landmarks": "prominent structures",
87
+
88
+ # 十字路口相關
89
+ "crossing_pattern": "clearly marked pedestrian crossings",
90
+ "pedestrian_behavior": "careful pedestrian movement",
91
+ "pedestrian_density": "multiple groups of pedestrians",
92
+ "traffic_pattern": "well-regulated traffic flow",
93
+ "pedestrian_flow": "steady pedestrian movement",
94
+ "traffic_description": "active urban traffic",
95
+ "people_and_vehicles": "pedestrians and vehicles",
96
+ "street_elements": "urban infrastructure elements",
97
+
98
+ # 交通相關
99
+ "transit_vehicles": "public transportation vehicles",
100
+ "passenger_activity": "commuter movement",
101
+ "transportation_modes": "various transit options",
102
+ "passenger_needs": "waiting areas",
103
+ "transit_infrastructure": "transit facilities",
104
+ "passenger_movement": "commuter flow",
105
+
106
+ # 購物區相關
107
+ "retail_elements": "shops and displays",
108
+ "store_types": "various retail establishments",
109
+ "walkway_features": "pedestrian pathways",
110
+ "commercial_signage": "store signs",
111
+ "consumer_behavior": "shopping activities",
112
+
113
+ # 空中視角相關
114
+ "commercial_layout": "organized retail areas",
115
+ "pedestrian_pattern": "people movement patterns",
116
+ "gathering_features": "public gathering spaces",
117
+ "movement_pattern": "crowd flow patterns",
118
+ "urban_elements": "city infrastructure",
119
+ "public_activity": "social interaction",
120
+
121
+ # 文化特定元素
122
+ "stall_elements": "vendor booths",
123
+ "lighting_features": "decorative lights",
124
+ "food_elements": "food offerings",
125
+ "vendor_stalls": "market stalls",
126
+ "nighttime_activity": "evening commerce",
127
+ "cultural_lighting": "traditional lighting",
128
+ "night_market_sounds": "lively market sounds",
129
+ "evening_crowd_behavior": "nighttime social activity",
130
+ "architectural_elements": "cultural buildings",
131
+ "religious_structures": "sacred buildings",
132
+ "decorative_features": "ornamental designs",
133
+ "cultural_practices": "traditional activities",
134
+ "temple_architecture": "religious structures",
135
+ "sensory_elements": "atmospheric elements",
136
+ "visitor_activities": "cultural experiences",
137
+ "ritual_activities": "ceremonial practices",
138
+ "cultural_symbols": "meaningful symbols",
139
+ "architectural_style": "historical buildings",
140
+ "historic_elements": "traditional architecture",
141
+ "urban_design": "city planning elements",
142
+ "social_behaviors": "public interactions",
143
+ "european_features": "European architectural details",
144
+ "tourist_activities": "visitor activities",
145
+ "local_customs": "regional practices",
146
+
147
+ # 時間特定元素
148
+ "lighting_effects": "artificial lighting",
149
+ "shadow_patterns": "light and shadow",
150
+ "urban_features": "city elements",
151
+ "illuminated_elements": "lit structures",
152
+ "evening_activities": "nighttime activities",
153
+ "light_sources": "lighting points",
154
+ "lit_areas": "illuminated spaces",
155
+ "shadowed_zones": "darker areas",
156
+ "illuminated_signage": "bright signs",
157
+ "colorful_lighting": "multicolored lights",
158
+ "neon_elements": "neon signs",
159
+ "night_crowd_behavior": "evening social patterns",
160
+ "light_displays": "lighting installations",
161
+ "building_features": "architectural elements",
162
+ "nightlife_activities": "evening entertainment",
163
+ "lighting_modifier": "bright",
164
+
165
+ # 混合環境元素
166
+ "transitional_elements": "connecting features",
167
+ "indoor_features": "interior elements",
168
+ "outdoor_setting": "exterior spaces",
169
+ "interior_amenities": "inside comforts",
170
+ "exterior_features": "outside elements",
171
+ "inside_elements": "interior design",
172
+ "outside_spaces": "outdoor areas",
173
+ "dual_environment_benefits": "combined settings",
174
+ "passenger_activities": "waiting behaviors",
175
+ "transportation_types": "transit vehicles",
176
+ "sheltered_elements": "covered areas",
177
+ "exposed_areas": "open sections",
178
+ "waiting_behaviors": "passenger activities",
179
+ "indoor_facilities": "inside services",
180
+ "platform_features": "transit platform elements",
181
+ "transit_routines": "transportation procedures",
182
+
183
+ # 專門場所元素
184
+ "seating_arrangement": "spectator seating",
185
+ "playing_surface": "athletic field",
186
+ "sporting_activities": "sports events",
187
+ "spectator_facilities": "viewer accommodations",
188
+ "competition_space": "sports arena",
189
+ "sports_events": "athletic competitions",
190
+ "viewing_areas": "audience sections",
191
+ "field_elements": "field markings and equipment",
192
+ "game_activities": "competitive play",
193
+ "construction_equipment": "building machinery",
194
+ "building_materials": "construction supplies",
195
+ "construction_activities": "building work",
196
+ "work_elements": "construction tools",
197
+ "structural_components": "building structures",
198
+ "site_equipment": "construction gear",
199
+ "raw_materials": "building supplies",
200
+ "construction_process": "building phases",
201
+ "medical_elements": "healthcare equipment",
202
+ "clinical_activities": "medical procedures",
203
+ "facility_design": "healthcare layout",
204
+ "healthcare_features": "medical facilities",
205
+ "patient_interactions": "care activities",
206
+ "equipment_types": "medical devices",
207
+ "care_procedures": "health services",
208
+ "treatment_spaces": "clinical areas",
209
+ "educational_furniture": "learning furniture",
210
+ "learning_activities": "educational practices",
211
+ "instructional_design": "teaching layout",
212
+ "classroom_elements": "school equipment",
213
+ "teaching_methods": "educational approaches",
214
+ "student_engagement": "learning participation",
215
+ "learning_spaces": "educational areas",
216
+ "educational_tools": "teaching resources",
217
+ "knowledge_transfer": "learning exchanges"
218
+ }
219
+
220
+ def generate_objects_summary(self, detected_objects: List[Dict]) -> str:
221
+ """
222
+ 基於檢測物件生成自然語言摘要,按重要性排序
223
+
224
+ Args:
225
+ detected_objects: 檢測到的物件列表
226
+
227
+ Returns:
228
+ str: 物件摘要描述
229
+ """
230
+ try:
231
+ # detected_objects 裡有幾個 traffic light)
232
+ tl_count = len([obj for obj in detected_objects if obj.get("class_name","") == "traffic light"])
233
+ # print(f"[DEBUG] _generate_objects_summary 傳入的 detected_objects 中 traffic light: {tl_count} 個")
234
+ for obj in detected_objects:
235
+ if obj.get("class_name","") == "traffic light":
236
+ print(f" - conf={obj.get('confidence',0):.4f}, bbox={obj.get('bbox')}, region={obj.get('region')}")
237
+
238
+ if not detected_objects:
239
+ return "various elements"
240
+
241
+ # 計算物件統計
242
+ object_counts = {}
243
+ total_confidence = 0
244
+
245
+ for obj in detected_objects:
246
+ class_name = obj.get("class_name", "unknown")
247
+ confidence = obj.get("confidence", 0.5)
248
+
249
+ if class_name not in object_counts:
250
+ object_counts[class_name] = {"count": 0, "total_confidence": 0}
251
+
252
+ object_counts[class_name]["count"] += 1
253
+ object_counts[class_name]["total_confidence"] += confidence
254
+ total_confidence += confidence
255
+
256
+ # 計算平均置信度並排序
257
+ sorted_objects = []
258
+ for class_name, stats in object_counts.items():
259
+ avg_confidence = stats["total_confidence"] / stats["count"]
260
+ count = stats["count"]
261
+
262
+ # 重要性評分:結合數量和置信度
263
+ importance_score = (count * 0.6) + (avg_confidence * 0.4)
264
+ sorted_objects.append((class_name, count, importance_score))
265
+
266
+ # 按重要性排序,取前5個最重要的物件
267
+ sorted_objects.sort(key=lambda x: x[2], reverse=True)
268
+ top_objects = sorted_objects[:5]
269
+
270
+ # 生成自然語言描述
271
+ descriptions = []
272
+ for class_name, count, _ in top_objects:
273
+ clean_name = class_name.replace('_', ' ')
274
+ if count == 1:
275
+ article = "an" if clean_name[0].lower() in 'aeiou' else "a"
276
+ descriptions.append(f"{article} {clean_name}")
277
+ else:
278
+ descriptions.append(f"{count} {clean_name}s")
279
+
280
+ # 組合描述
281
+ if len(descriptions) == 1:
282
+ return descriptions[0]
283
+ elif len(descriptions) == 2:
284
+ return f"{descriptions[0]} and {descriptions[1]}"
285
+ else:
286
+ return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
287
+
288
+ except Exception as e:
289
+ self.logger.warning(f"Error generating objects summary: {str(e)}")
290
+ return "various elements"
291
+
292
+ def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
293
+ all_replacements: Dict, detected_objects: List[Dict],
294
+ scene_type: str) -> str:
295
+ """
296
+ 獲取特定佔位符的替換內容,確保永遠不返回空值
297
+
298
+ Args:
299
+ placeholder: 佔位符名稱
300
+ fillers: 模板填充器字典
301
+ all_replacements: 所有替換內容字典
302
+ detected_objects: 檢測到的物體列表
303
+ scene_type: 場景類型
304
+
305
+ Returns:
306
+ str: 替換內容
307
+ """
308
+ try:
309
+ # 優先處理動態內容生成的佔位符
310
+ dynamic_placeholders = [
311
+ 'primary_objects', 'detected_objects_summary', 'main_objects',
312
+ 'functional_area', 'functional_zones_description', 'scene_elements'
313
+ ]
314
+
315
+ if placeholder in dynamic_placeholders:
316
+ dynamic_content = self.generate_objects_summary(detected_objects)
317
+ if dynamic_content and dynamic_content.strip():
318
+ return dynamic_content.strip()
319
+
320
+ # 檢查預定義替換內容
321
+ if placeholder in all_replacements:
322
+ replacement = all_replacements[placeholder]
323
+ if replacement and replacement.strip():
324
+ return replacement.strip()
325
+
326
+ # 檢查物體模板填充器
327
+ if placeholder in fillers:
328
+ options = fillers[placeholder]
329
+ if options and isinstance(options, list):
330
+ valid_options = [opt.strip() for opt in options if opt and str(opt).strip()]
331
+ if valid_options:
332
+ num_items = min(len(valid_options), random.randint(1, 3))
333
+ selected_items = random.sample(valid_options, num_items)
334
+
335
+ if len(selected_items) == 1:
336
+ return selected_items[0]
337
+ elif len(selected_items) == 2:
338
+ return f"{selected_items[0]} and {selected_items[1]}"
339
+ else:
340
+ return ", ".join(selected_items[:-1]) + f", and {selected_items[-1]}"
341
+
342
+ # 基於檢測對象生成動態內容
343
+ scene_specific_replacement = self.generate_scene_specific_content(
344
+ placeholder, detected_objects, scene_type
345
+ )
346
+ if scene_specific_replacement and scene_specific_replacement.strip():
347
+ return scene_specific_replacement.strip()
348
+
349
+ # 通用備用字典
350
+ fallback_replacements = {
351
+ # 交通和城市相關
352
+ "crossing_pattern": "pedestrian crosswalks",
353
+ "pedestrian_behavior": "people moving carefully",
354
+ "traffic_pattern": "vehicle movement",
355
+ "urban_elements": "city infrastructure",
356
+ "street_elements": "urban features",
357
+ "intersection_features": "traffic management systems",
358
+ "pedestrian_density": "groups of people",
359
+ "pedestrian_flow": "pedestrian movement",
360
+ "traffic_description": "vehicle traffic",
361
+ "people_and_vehicles": "pedestrians and cars",
362
+
363
+ # 場景設置相關
364
+ "scene_setting": "this urban environment",
365
+ "location_context": "the area",
366
+ "spatial_context": "the scene",
367
+ "environmental_context": "this location",
368
+
369
+ # 常見的家具和設備
370
+ "furniture": "various furniture pieces",
371
+ "seating": "seating arrangements",
372
+ "electronics": "electronic devices",
373
+ "appliances": "household appliances",
374
+
375
+ # 活動和行為
376
+ "activities": "various activities",
377
+ "interactions": "people interacting",
378
+ "movement": "movement patterns",
379
+
380
+ # 照明和氛圍
381
+ "lighting_conditions": "ambient lighting",
382
+ "atmosphere": "the overall atmosphere",
383
+ "ambiance": "environmental ambiance",
384
+
385
+ # 空間描述
386
+ "spatial_arrangement": "spatial organization",
387
+ "layout": "the layout",
388
+ "composition": "visual composition",
389
+
390
+ # 物體和元素
391
+ "objects": "various objects",
392
+ "elements": "scene elements",
393
+ "features": "notable features",
394
+ "details": "observable details"
395
+ }
396
+
397
+ if placeholder in fallback_replacements:
398
+ return fallback_replacements[placeholder]
399
+
400
+ # 基於場景類型的智能默認值
401
+ scene_based_defaults = self.get_scene_based_default(placeholder, scene_type)
402
+ if scene_based_defaults:
403
+ return scene_based_defaults
404
+
405
+ # 最終備用:將下劃線轉換為有意義的短語
406
+ cleaned_placeholder = placeholder.replace('_', ' ')
407
+
408
+ # 對常見模式提供更好的默認值
409
+ if placeholder.endswith('_pattern'):
410
+ return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
411
+ elif placeholder.endswith('_behavior'):
412
+ return f"{cleaned_placeholder.replace(' behavior', '')} activity"
413
+ elif placeholder.endswith('_description'):
414
+ return f"{cleaned_placeholder.replace(' description', '')} elements"
415
+ elif placeholder.endswith('_elements'):
416
+ return cleaned_placeholder
417
+ elif placeholder.endswith('_features'):
418
+ return cleaned_placeholder
419
+ else:
420
+ return cleaned_placeholder if cleaned_placeholder != placeholder else "various elements"
421
+
422
+ except Exception as e:
423
+ self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
424
+ # 確保即使在異常情況下也返回有意義的內容
425
+ return placeholder.replace('_', ' ') if placeholder else "scene elements"
426
+
427
+ def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
428
+ """
429
+ 基於場景類型提供智能默認值
430
+
431
+ Args:
432
+ placeholder: 佔位符名稱
433
+ scene_type: 場景類型
434
+
435
+ Returns:
436
+ Optional[str]: 場景特定的默認值或None
437
+ """
438
+ try:
439
+ # 針對不同場景類型的特定默認值
440
+ scene_defaults = {
441
+ "urban_intersection": {
442
+ "crossing_pattern": "marked crosswalks",
443
+ "pedestrian_behavior": "pedestrians crossing carefully",
444
+ "traffic_pattern": "controlled traffic flow"
445
+ },
446
+ "city_street": {
447
+ "traffic_description": "urban vehicle traffic",
448
+ "street_elements": "city infrastructure",
449
+ "people_and_vehicles": "pedestrians and vehicles"
450
+ },
451
+ "living_room": {
452
+ "furniture": "comfortable living room furniture",
453
+ "seating": "sofas and chairs",
454
+ "electronics": "entertainment equipment"
455
+ },
456
+ "kitchen": {
457
+ "appliances": "kitchen appliances",
458
+ "cooking_equipment": "cooking tools and equipment"
459
+ },
460
+ "office_workspace": {
461
+ "office_equipment": "work furniture and devices",
462
+ "desk_setup": "desk and office chair"
463
+ }
464
+ }
465
+
466
+ if scene_type in scene_defaults and placeholder in scene_defaults[scene_type]:
467
+ return scene_defaults[scene_type][placeholder]
468
+
469
+ return None
470
+
471
+ except Exception as e:
472
+ self.logger.warning(f"Error getting scene-based default for '{placeholder}' in '{scene_type}': {str(e)}")
473
+ return None
474
+
475
+ def generate_scene_specific_content(self, placeholder: str, detected_objects: List[Dict],
476
+ scene_type: str) -> Optional[str]:
477
+ """
478
+ 基於場景特定邏輯生成佔位符內容
479
+
480
+ Args:
481
+ placeholder: 佔位符名稱
482
+ detected_objects: 檢測到的物體列表
483
+ scene_type: 場景類型
484
+
485
+ Returns:
486
+ Optional[str]: 生成的內容或None
487
+ """
488
+ try:
489
+ if placeholder == "furniture":
490
+ # 提取家具物品
491
+ furniture_ids = [56, 57, 58, 59, 60, 61] # 家具類別ID
492
+ furniture_objects = [obj for obj in detected_objects if obj.get("class_id") in furniture_ids]
493
+
494
+ if furniture_objects:
495
+ furniture_names = [obj.get("class_name", "furniture") for obj in furniture_objects[:3]]
496
+ unique_names = list(set(furniture_names))
497
+ return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
498
+ return "various furniture items"
499
+
500
+ elif placeholder == "electronics":
501
+ # 提取電子設備
502
+ electronics_ids = [62, 63, 64, 65, 66, 67, 68, 69, 70] # 電子設備類別ID
503
+ electronics_objects = [obj for obj in detected_objects if obj.get("class_id") in electronics_ids]
504
+
505
+ if electronics_objects:
506
+ electronics_names = [obj.get("class_name", "electronic device") for obj in electronics_objects[:3]]
507
+ unique_names = list(set(electronics_names))
508
+ return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
509
+ return "electronic devices"
510
+
511
+ elif placeholder == "people_count":
512
+ # 計算人數
513
+ people_count = len([obj for obj in detected_objects if obj.get("class_id") == 0])
514
+
515
+ if people_count == 0:
516
+ return "no people"
517
+ elif people_count == 1:
518
+ return "one person"
519
+ elif people_count < 5:
520
+ return f"{people_count} people"
521
+ else:
522
+ return "several people"
523
+
524
+ elif placeholder == "seating":
525
+ # 提取座位物品
526
+ seating_ids = [56, 57] # chair, sofa
527
+ seating_objects = [obj for obj in detected_objects if obj.get("class_id") in seating_ids]
528
+
529
+ if seating_objects:
530
+ seating_names = [obj.get("class_name", "seating") for obj in seating_objects[:2]]
531
+ unique_names = list(set(seating_names))
532
+ return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
533
+ return "seating arrangements"
534
+
535
+ # 如果沒有匹配的特定邏輯,返回None
536
+ return None
537
+
538
+ except Exception as e:
539
+ self.logger.warning(f"Error generating scene-specific content for '{placeholder}': {str(e)}")
540
+ return None
541
+
542
+ def get_emergency_replacement(self, placeholder: str) -> str:
543
+ """
544
+ 獲取緊急替換值,確保不會產生語法錯誤
545
+
546
+ Args:
547
+ placeholder: 佔位符名稱
548
+
549
+ Returns:
550
+ str: 安全的替換值
551
+ """
552
+ emergency_replacements = {
553
+ "crossing_pattern": "pedestrian walkways",
554
+ "pedestrian_behavior": "people moving through the area",
555
+ "traffic_pattern": "vehicle movement",
556
+ "scene_setting": "this location",
557
+ "urban_elements": "city features",
558
+ "street_elements": "urban components"
559
+ }
560
+
561
+ if placeholder in emergency_replacements:
562
+ return emergency_replacements[placeholder]
563
+
564
+ # 基於佔位符名稱生成合理的替換
565
+ cleaned = placeholder.replace('_', ' ')
566
+ if len(cleaned.split()) > 1:
567
+ return cleaned
568
+ else:
569
+ return f"various {cleaned}"
functional_zone_detector.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+ import traceback
4
+ from typing import Dict, List, Any, Optional
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class FunctionalZoneDetector:
9
+ """
10
+ 負責基於物件關聯性的功能區域識別
11
+ 處理物件組合分析和描述性區域命名
12
+ """
13
+
14
+ def __init__(self):
15
+ """初始化功能區域檢測器"""
16
+ try:
17
+ logger.info("FunctionalZoneDetector initialized successfully")
18
+ except Exception as e:
19
+ logger.error(f"Failed to initialize FunctionalZoneDetector: {str(e)}")
20
+ logger.error(traceback.format_exc())
21
+ raise
22
+
23
+ def identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
24
+ """
25
+ 識別主要功能區域,基於最強的物件關聯性組合
26
+ 採用通用邏輯處理各種室內場景
27
+
28
+ Args:
29
+ detected_objects: 檢測到的物件列表
30
+
31
+ Returns:
32
+ 主要功能區域字典或None
33
+ """
34
+ try:
35
+ # 用餐區域檢測(桌椅組合)
36
+ dining_area = self.detect_functional_combination(
37
+ detected_objects,
38
+ primary_objects=[60], # dining table
39
+ supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
40
+ min_supporting=2,
41
+ description_template="Dining area with table and seating arrangement"
42
+ )
43
+ if dining_area:
44
+ return dining_area
45
+
46
+ # 休息區域檢測(沙發電視組合或床)
47
+ seating_area = self.detect_functional_combination(
48
+ detected_objects,
49
+ primary_objects=[57, 59], # sofa, bed
50
+ supporting_objects=[62, 58, 56], # tv, potted plant, chair
51
+ min_supporting=1,
52
+ description_template="Seating and relaxation area"
53
+ )
54
+ if seating_area:
55
+ return seating_area
56
+
57
+ # 工作區域檢測(電子設備與家具組合)
58
+ work_area = self.detect_functional_combination(
59
+ detected_objects,
60
+ primary_objects=[63, 66], # laptop, keyboard
61
+ supporting_objects=[60, 56, 64], # dining table, chair, mouse
62
+ min_supporting=2,
63
+ description_template="Workspace area with electronics and furniture"
64
+ )
65
+ if work_area:
66
+ return work_area
67
+
68
+ return None
69
+
70
+ except Exception as e:
71
+ logger.error(f"Error identifying primary functional area: {str(e)}")
72
+ logger.error(traceback.format_exc())
73
+ return None
74
+
75
+ def identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
76
+ """
77
+ 識別次要功能區域,避免與主要區域重疊
78
+
79
+ Args:
80
+ detected_objects: 檢測到的物件列表
81
+ existing_zones: 已存在的功能區域
82
+
83
+ Returns:
84
+ 次要功能區域字典或None
85
+ """
86
+ try:
87
+ # 獲取已使用的區域
88
+ used_regions = set(zone.get("region") for zone in existing_zones.values())
89
+
90
+ # 裝飾區域檢測(植物集中區域)
91
+ decorative_area = self.detect_functional_combination(
92
+ detected_objects,
93
+ primary_objects=[58], # potted plant
94
+ supporting_objects=[75], # vase
95
+ min_supporting=0,
96
+ min_primary=3, # 至少需要3個植物
97
+ description_template="Decorative area with plants and ornamental items",
98
+ exclude_regions=used_regions
99
+ )
100
+ if decorative_area:
101
+ return decorative_area
102
+
103
+ # 儲存區域檢測(廚房電器組合)
104
+ storage_area = self.detect_functional_combination(
105
+ detected_objects,
106
+ primary_objects=[72, 68, 69], # refrigerator, microwave, oven
107
+ supporting_objects=[71], # sink
108
+ min_supporting=0,
109
+ min_primary=2,
110
+ description_template="Kitchen appliance and storage area",
111
+ exclude_regions=used_regions
112
+ )
113
+ if storage_area:
114
+ return storage_area
115
+
116
+ return None
117
+
118
+ except Exception as e:
119
+ logger.error(f"Error identifying secondary functional area: {str(e)}")
120
+ logger.error(traceback.format_exc())
121
+ return None
122
+
123
+ def detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
124
+ supporting_objects: List[int], min_supporting: int,
125
+ description_template: str, min_primary: int = 1,
126
+ exclude_regions: set = None) -> Dict:
127
+ """
128
+ 通用的功能組合檢測方法
129
+ 基於主要物件和支持物件的組合判斷��能區域
130
+
131
+ Args:
132
+ detected_objects: 檢測到的物件列表
133
+ primary_objects: 主要物件的class_id列表
134
+ supporting_objects: 支持物件的class_id列表
135
+ min_supporting: 最少需要的支持物件數量
136
+ description_template: 描述模板
137
+ min_primary: 最少需要的主要物件數量
138
+ exclude_regions: 需要排除的區域集合
139
+
140
+ Returns:
141
+ 功能區域資訊字典,如果不符合條件則返回None
142
+ """
143
+ try:
144
+ if exclude_regions is None:
145
+ exclude_regions = set()
146
+
147
+ # 收集主要物件
148
+ primary_objs = [obj for obj in detected_objects
149
+ if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
150
+
151
+ # 收集支持物件
152
+ supporting_objs = [obj for obj in detected_objects
153
+ if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
154
+
155
+ # 檢查是否滿足最少數量要求
156
+ if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
157
+ return None
158
+
159
+ # 按區域組織物件
160
+ region_combinations = {}
161
+ all_relevant_objs = primary_objs + supporting_objs
162
+
163
+ for obj in all_relevant_objs:
164
+ region = obj.get("region")
165
+
166
+ # 排除指定區域
167
+ if region in exclude_regions:
168
+ continue
169
+
170
+ if region not in region_combinations:
171
+ region_combinations[region] = {"primary": [], "supporting": [], "all": []}
172
+
173
+ region_combinations[region]["all"].append(obj)
174
+
175
+ if obj.get("class_id") in primary_objects:
176
+ region_combinations[region]["primary"].append(obj)
177
+ else:
178
+ region_combinations[region]["supporting"].append(obj)
179
+
180
+ # 找到最佳區域組合
181
+ best_region = None
182
+ best_score = 0
183
+
184
+ for region, objs in region_combinations.items():
185
+ # 計算該區域的評分
186
+ primary_count = len(objs["primary"])
187
+ supporting_count = len(objs["supporting"])
188
+
189
+ # 必須滿足最低要求
190
+ if primary_count < min_primary or supporting_count < min_supporting:
191
+ continue
192
+
193
+ # 計算組合評分(主要物件權重較高)
194
+ score = primary_count * 2 + supporting_count
195
+
196
+ if score > best_score:
197
+ best_score = score
198
+ best_region = region
199
+
200
+ if best_region is None:
201
+ return None
202
+
203
+ best_combination = region_combinations[best_region]
204
+ all_objects = [obj["class_name"] for obj in best_combination["all"]]
205
+
206
+ return {
207
+ "region": best_region,
208
+ "objects": all_objects,
209
+ "description": description_template
210
+ }
211
+
212
+ except Exception as e:
213
+ logger.error(f"Error detecting functional combination: {str(e)}")
214
+ logger.error(traceback.format_exc())
215
+ return None
216
+
217
+ def generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
218
+ """
219
+ 基於區域與物品名產生一個比較有描述性的區域
220
+
221
+ Args:
222
+ zone_data: 區域數據字典
223
+ priority_level: 優先級別(primary/secondary)
224
+
225
+ Returns:
226
+ str: 描述性區域鍵名
227
+ """
228
+ try:
229
+ objects = zone_data.get("objects", [])
230
+ region = zone_data.get("region", "")
231
+ description = zone_data.get("description", "")
232
+
233
+ # 基於物件內容確定功能類型
234
+ if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
235
+ base_name = "dining area"
236
+ elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
237
+ base_name = "seating area"
238
+ elif any("bed" in obj.lower() for obj in objects):
239
+ base_name = "sleeping area"
240
+ elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
241
+ base_name = "workspace area"
242
+ elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
243
+ base_name = "decorative area"
244
+ elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
245
+ base_name = "kitchen area"
246
+ else:
247
+ # 基於描述內容推斷
248
+ if "dining" in description.lower():
249
+ base_name = "dining area"
250
+ elif "seating" in description.lower() or "relaxation" in description.lower():
251
+ base_name = "seating area"
252
+ elif "work" in description.lower():
253
+ base_name = "workspace area"
254
+ elif "decorative" in description.lower():
255
+ base_name = "decorative area"
256
+ else:
257
+ base_name = "functional area"
258
+
259
+ # 為次要區域添加位置標識以區分
260
+ if priority_level == "secondary" and region:
261
+ spatial_context = self.get_spatial_context_description(region)
262
+ if spatial_context:
263
+ return f"{spatial_context} {base_name}"
264
+
265
+ return base_name
266
+
267
+ except Exception as e:
268
+ logger.warning(f"Error generating descriptive zone key: {str(e)}")
269
+ return "activity area"
270
+
271
+ def get_spatial_context_description(self, region: str) -> str:
272
+ """
273
+ 獲取空間上下文描述
274
+
275
+ Args:
276
+ region: 區域位置標識
277
+
278
+ Returns:
279
+ str: 空間上下文描述
280
+ """
281
+ try:
282
+ spatial_mapping = {
283
+ "top_left": "upper left",
284
+ "top_center": "upper",
285
+ "top_right": "upper right",
286
+ "middle_left": "left side",
287
+ "middle_center": "central",
288
+ "middle_right": "right side",
289
+ "bottom_left": "lower left",
290
+ "bottom_center": "lower",
291
+ "bottom_right": "lower right"
292
+ }
293
+
294
+ return spatial_mapping.get(region, "")
295
+
296
+ except Exception as e:
297
+ logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
298
+ return ""
object_description_generator.py CHANGED
@@ -4,6 +4,11 @@ import traceback
4
  from typing import Dict, List, Tuple, Optional, Any
5
  import numpy as np
6
 
 
 
 
 
 
7
  class ObjectDescriptionError(Exception):
8
  """物件描述生成過程中的自定義異常"""
9
  pass
@@ -12,9 +17,12 @@ class ObjectDescriptionError(Exception):
12
  class ObjectDescriptionGenerator:
13
  """
14
  物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
 
15
 
16
  該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
17
  空間位置描述、物件列表格式化以及描述文本的優化。
 
 
18
  """
19
 
20
  def __init__(self,
@@ -31,6 +39,7 @@ class ObjectDescriptionGenerator:
31
  max_categories_to_return: 返回的物件類別最大數量
32
  max_total_objects: 返回的物件總數上限
33
  confidence_threshold_for_description: 用於描述的置信度閾值
 
34
  """
35
  self.logger = logging.getLogger(self.__class__.__name__)
36
 
@@ -40,6 +49,23 @@ class ObjectDescriptionGenerator:
40
  self.confidence_threshold_for_description = confidence_threshold_for_description
41
  self.region_analyzer = region_analyzer
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
44
  "max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
45
  min_prominence_score, max_categories_to_return,
@@ -59,49 +85,11 @@ class ObjectDescriptionGenerator:
59
  Returns:
60
  List[Dict]: 按重要性排序的物件列表
61
  """
62
- try:
63
- if not detected_objects:
64
- return []
65
-
66
- prominent_objects = []
67
-
68
- for obj in detected_objects:
69
- # 計算重要性評分
70
- prominence_score = self._calculate_prominence_score(obj)
71
-
72
- # 只保留超過閾值的物件
73
- if prominence_score >= min_prominence_score:
74
- obj_copy = obj.copy()
75
- obj_copy['prominence_score'] = prominence_score
76
- prominent_objects.append(obj_copy)
77
-
78
- # 按重要性評分排序(從高到低)
79
- prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
80
-
81
- # 如果指定了最大類別數量限制,進行過濾
82
- if max_categories_to_return is not None and max_categories_to_return > 0:
83
- categories_seen = set()
84
- filtered_objects = []
85
-
86
- for obj in prominent_objects:
87
- class_name = obj.get("class_name", "unknown")
88
-
89
- # 如果是新類別且未達到限制
90
- if class_name not in categories_seen:
91
- if len(categories_seen) < max_categories_to_return:
92
- categories_seen.add(class_name)
93
- filtered_objects.append(obj)
94
- else:
95
- # 已見過的類別,直接添加
96
- filtered_objects.append(obj)
97
-
98
- return filtered_objects
99
-
100
- return prominent_objects
101
-
102
- except Exception as e:
103
- self.logger.error(f"Error calculating prominent objects: {str(e)}")
104
- return []
105
 
106
  def set_region_analyzer(self, region_analyzer: Any) -> None:
107
  """
@@ -112,107 +100,11 @@ class ObjectDescriptionGenerator:
112
  """
113
  try:
114
  self.region_analyzer = region_analyzer
 
115
  self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
116
  except Exception as e:
117
  self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
118
 
119
- def _get_standardized_spatial_description(self, obj: Dict) -> str:
120
- """
121
- 使用RegionAnalyzer生成標準化空間描述的內部方法
122
-
123
- Args:
124
- obj: 物件字典
125
-
126
- Returns:
127
- str: 標準化空間描述,失敗時返回空字串
128
- """
129
- try:
130
- if hasattr(self, 'region_analyzer') and self.region_analyzer:
131
- region = obj.get("region", "")
132
- object_type = obj.get("class_name", "")
133
-
134
- if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
135
- return self.region_analyzer.get_contextual_spatial_description(region, object_type)
136
- elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
137
- return self.region_analyzer.get_spatial_description_phrase(region)
138
-
139
- return ""
140
-
141
- except Exception as e:
142
- self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
143
- if object_type:
144
- return f"visible in the scene"
145
- return "present in the view"
146
-
147
- def _calculate_prominence_score(self, obj: Dict) -> float:
148
- """
149
- 計算物件的重要性評分
150
-
151
- Args:
152
- obj: 物件字典,包含檢測信息
153
-
154
- Returns:
155
- float: 重要性評分 (0.0-1.0)
156
- """
157
- try:
158
- # 基礎置信度評分 (權重: 40%)
159
- confidence = obj.get("confidence", 0.5)
160
- confidence_score = confidence * 0.4
161
-
162
- # 大小評分 (權重: 30%)
163
- normalized_area = obj.get("normalized_area", 0.1)
164
- # 使用對數縮放避免過大物件主導評分
165
- size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
166
-
167
- # 位置評分 (權重: 20%)
168
- # 中心區域的物件通常更重要
169
- center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
170
- distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
171
- position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
172
-
173
- # 類別重要性評分 (權重: 10%)
174
- class_importance = self._get_class_importance(obj.get("class_name", "unknown"))
175
- class_score = class_importance * 0.1
176
-
177
- total_score = confidence_score + size_score + position_score + class_score
178
-
179
- # 確保評分在有效範圍內
180
- return max(0.0, min(1.0, total_score))
181
-
182
- except Exception as e:
183
- self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
184
- return 0.5 # 返回中等評分作為備用
185
-
186
- def _get_class_importance(self, class_name: str) -> float:
187
- """
188
- 根據物件類別返回重要性係數
189
-
190
- Args:
191
- class_name: 物件類別名稱
192
-
193
- Returns:
194
- float: 類別重要性係數 (0.0-1.0)
195
- """
196
- # 高重要性物件(人、車輛、建築)
197
- high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
198
-
199
- # 中等重要性物件(家具、電器)
200
- medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
201
-
202
- # 低重要性物件(小物品、配件)
203
- low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
204
-
205
- class_name_lower = class_name.lower()
206
-
207
- if any(item in class_name_lower for item in high_importance):
208
- return 1.0
209
- elif any(item in class_name_lower for item in medium_importance):
210
- return 0.7
211
- elif any(item in class_name_lower for item in low_importance):
212
- return 0.4
213
- else:
214
- return 0.6 # 預設中等重要性
215
-
216
  def format_object_list_for_description(self,
217
  objects: List[Dict],
218
  use_indefinite_article_for_one: bool = False,
@@ -230,65 +122,12 @@ class ObjectDescriptionGenerator:
230
  Returns:
231
  str: 格式化的物件描述字符串
232
  """
233
- try:
234
- if not objects:
235
- return "no specific objects clearly identified"
236
-
237
- counts: Dict[str, int] = {}
238
- for obj in objects:
239
- name = obj.get("class_name", "unknown object")
240
- if name == "unknown object" or not name:
241
- continue
242
- counts[name] = counts.get(name, 0) + 1
243
-
244
- if not counts:
245
- return "no specific objects clearly identified"
246
-
247
- descriptions = []
248
- # 按計數降序然後按名稱升序排序,限制物件類型數量
249
- sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
250
-
251
- for name, count in sorted_counts:
252
- if count == 1:
253
- if use_indefinite_article_for_one:
254
- if name[0].lower() in 'aeiou':
255
- descriptions.append(f"an {name}")
256
- else:
257
- descriptions.append(f"a {name}")
258
- else:
259
- descriptions.append(f"one {name}")
260
- else:
261
- # 處理複數形式
262
- plural_name = name
263
- if name.endswith("y") and not name.lower().endswith(("ay", "ey", "iy", "oy", "uy")):
264
- plural_name = name[:-1] + "ies"
265
- elif name.endswith(("s", "sh", "ch", "x", "z")):
266
- plural_name = name + "es"
267
- elif not name.endswith("s"):
268
- plural_name = name + "s"
269
-
270
- if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
271
- if count <= count_threshold_for_generalization + 3:
272
- descriptions.append(f"several {plural_name}")
273
- else:
274
- descriptions.append(f"many {plural_name}")
275
- else:
276
- descriptions.append(f"{count} {plural_name}")
277
-
278
- if not descriptions:
279
- return "no specific objects clearly identified"
280
-
281
- if len(descriptions) == 1:
282
- return descriptions[0]
283
- elif len(descriptions) == 2:
284
- return f"{descriptions[0]} and {descriptions[1]}"
285
- else:
286
- # 使用牛津逗號格式
287
- return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
288
-
289
- except Exception as e:
290
- self.logger.warning(f"Error formatting object list: {str(e)}")
291
- return "various objects"
292
 
293
  def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
294
  image_height: Optional[int] = None,
@@ -305,95 +144,16 @@ class ObjectDescriptionGenerator:
305
  Returns:
306
  str: 空間描述字符串,空值region時返回空字串
307
  """
308
- try:
309
- region = obj.get("region") or ""
310
-
311
- # 處理空值或無效region,直接返回空字串避免不完整描述
312
- if not region.strip() or region == "unknown":
313
- # 根據物件類型提供合適的預設位置描述
314
- if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
315
- return "positioned in the scene"
316
- elif object_type and "person" in object_type.lower():
317
- return "present in the area"
318
- else:
319
- return "located in the scene"
320
-
321
- # 如果提供了RegionAnalyzer實例,使用其標準化方法
322
- if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
323
- object_type = obj.get("class_name", "")
324
- if hasattr(region_analyzer, 'get_contextual_spatial_description'):
325
- spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
326
- else:
327
- spatial_desc = region_analyzer.get_spatial_description_phrase(region)
328
-
329
- if spatial_desc:
330
- return spatial_desc
331
-
332
- # 備用邏輯:使用改進的內建映射
333
- clean_region = region.replace('_', ' ').strip().lower()
334
-
335
- region_map = {
336
- "top left": "in the upper left area",
337
- "top center": "in the upper area",
338
- "top right": "in the upper right area",
339
- "middle left": "on the left side",
340
- "middle center": "in the center",
341
- "center": "in the center",
342
- "middle right": "on the right side",
343
- "bottom left": "in the lower left area",
344
- "bottom center": "in the lower area",
345
- "bottom right": "in the lower right area"
346
- }
347
-
348
- # 直接映射匹配
349
- if clean_region in region_map:
350
- return region_map[clean_region]
351
-
352
- # 模糊匹配處理
353
- if "top" in clean_region and "left" in clean_region:
354
- return "in the upper left area"
355
- elif "top" in clean_region and "right" in clean_region:
356
- return "in the upper right area"
357
- elif "bottom" in clean_region and "left" in clean_region:
358
- return "in the lower left area"
359
- elif "bottom" in clean_region and "right" in clean_region:
360
- return "in the lower right area"
361
- elif "top" in clean_region:
362
- return "in the upper area"
363
- elif "bottom" in clean_region:
364
- return "in the lower area"
365
- elif "left" in clean_region:
366
- return "on the left side"
367
- elif "right" in clean_region:
368
- return "on the right side"
369
- elif "center" in clean_region or "middle" in clean_region:
370
- return "in the center"
371
-
372
- # 如果region無法識別,使用normalized_center作為最後備用
373
- norm_center = obj.get("normalized_center")
374
- if norm_center and image_width and image_height:
375
- x_norm, y_norm = norm_center
376
- h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
377
- v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
378
-
379
- if h_pos == "center" and v_pos == "center":
380
- return "in the center"
381
- return f"in the {v_pos} {h_pos} area"
382
-
383
- # 如果所有方法都失敗,返回空字串
384
- return ""
385
-
386
- except Exception as e:
387
- self.logger.warning(f"Error generating spatial description: {str(e)}")
388
- return ""
389
 
390
  def optimize_object_description(self, description: str) -> str:
391
  """
392
- 優化物件描述文本,消除冗餘重複並改善表達流暢度
393
-
394
- 這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
395
- 產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
396
- 列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
397
 
398
  Args:
399
  description: 原始的場景描述文本,可能包含重複或冗餘的表達
@@ -401,164 +161,7 @@ class ObjectDescriptionGenerator:
401
  Returns:
402
  str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
403
  """
404
- try:
405
- import re
406
-
407
- # 1. 處理多餘的空間限定表達
408
- # 使用通用模式來識別和移除不必要的空間描述
409
- # 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
410
- description = self._remove_redundant_spatial_qualifiers(description)
411
-
412
- # 2. 辨識並處理物件列表的重複問題
413
- # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
414
- # 使用正則表達式捕獲 "with" 關鍵字後的物件序列
415
- # 注意:正則表達式需要修正以避免貪婪匹配的問題
416
- object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
417
-
418
- # 遍歷每個找到的物件列表進行重複檢測和優化
419
- for obj_list in object_lists:
420
- # 3. 解析單個物件列表中的項目
421
- # 使用更精確的正則表達式來分割物件項目
422
- # 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
423
- # 需要特別注意處理最後一個 "and" 的情況
424
-
425
- # 先處理逗號格式 "A, B, and C"
426
- if ", and " in obj_list:
427
- # 分割 ", and " 前後的部分
428
- before_last_and = obj_list.rsplit(", and ", 1)[0]
429
- last_item = obj_list.rsplit(", and ", 1)[1]
430
-
431
- # 處理前面的項目(用逗號分割)
432
- front_items = [item.strip() for item in before_last_and.split(",")]
433
- # 添加最後一個項目
434
- all_items = front_items + [last_item.strip()]
435
- elif " and " in obj_list:
436
- # 處理簡單的 "A and B" 格式
437
- all_items = [item.strip() for item in obj_list.split(" and ")]
438
- else:
439
- # 處理純逗號分隔的列表
440
- all_items = [item.strip() for item in obj_list.split(",")]
441
-
442
- # 4. 統計物件出現頻率
443
- # 建立字典來記錄每個物件的出現次數
444
- item_counts = {}
445
-
446
- for item in all_items:
447
- # 清理項目文字並過濾無效內容
448
- item = item.strip()
449
- # 過濾掉連接詞和空白項目
450
- if item and item not in ["and", "with", ""]:
451
- # 移除可能的冠詞前綴以便正確計數
452
- # 例如 "a car" 和 "car" 應該被視為同一項目
453
- clean_item = self._normalize_item_for_counting(item)
454
- if clean_item not in item_counts:
455
- item_counts[clean_item] = 0
456
- item_counts[clean_item] += 1
457
-
458
- # 5. 生成優化後的物件列表
459
- if item_counts:
460
- new_items = []
461
-
462
- for item, count in item_counts.items():
463
- if count > 1:
464
- # 對於重複項目,使用數字加複數形式
465
- plural_item = self._make_plural(item)
466
- new_items.append(f"{count} {plural_item}")
467
- else:
468
- # 單個項目保持原樣
469
- new_items.append(item)
470
-
471
- # 6. 重新格式化物件列表
472
- # 使用標準的英文列表連接格式
473
- if len(new_items) == 1:
474
- new_list = new_items[0]
475
- elif len(new_items) == 2:
476
- new_list = f"{new_items[0]} and {new_items[1]}"
477
- else:
478
- # 使用逗號格式確保清晰度
479
- new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
480
-
481
- # 7. 在原文中替換優化後的列表
482
- # 將原始的多餘列表替換為優化後的簡潔版本
483
- description = description.replace(obj_list, new_list)
484
-
485
- return description
486
-
487
- except Exception as e:
488
- self.logger.warning(f"Error optimizing object description: {str(e)}")
489
- return description
490
-
491
- def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
492
- """
493
- 移除描述中冗餘的空間限定詞
494
-
495
- 這個方法使用模式匹配來識別和移除不必要的空間描述,例如
496
- "bed in the room" 中的 "in the room" 部分通常是多餘的,因為
497
- 床這個物件本身就是室內環境。
498
-
499
- Args:
500
- description: 包含可能多餘空間描述的文本
501
-
502
- Returns:
503
- str: 移除多餘空間限定詞後的文本
504
- """
505
- import re
506
-
507
- # 定義常見的多餘空間表達模式
508
- # 這些模式捕獲「物件 + 不必要的空間限定」的情況
509
- redundant_patterns = [
510
- # 室內物件的多餘房間描述
511
- (r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
512
- # 廚房物件的多餘描述
513
- (r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
514
- # 浴室物件的多餘描述
515
- (r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
516
- # 一般性的多餘表達:「在場景中」、「在圖片中」等
517
- (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
518
- ]
519
-
520
- for pattern, replacement in redundant_patterns:
521
- description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
522
-
523
- return description
524
-
525
-
526
- def _normalize_item_for_counting(self, item: str) -> str:
527
- """
528
- 正規化物件項目以便準確計數
529
-
530
- 移除冠詞和其他可能影響計數準確性的前綴詞彙,
531
- 確保 "a car" 和 "car" 被視為同一物件類型。
532
-
533
- Args:
534
- item: 原始物件項目字串
535
-
536
- Returns:
537
- str: 正規化後的物件項目
538
- """
539
- # 移除常見的英文冠詞
540
- item = re.sub(r'^(a|an|the)\s+', '', item.lower())
541
- return item.strip()
542
-
543
- def _make_plural(self, item: str) -> str:
544
- """
545
- 將單數名詞轉換為複數形式
546
-
547
- Args:
548
- item: 單數形式的名詞
549
-
550
- Returns:
551
- str: 複數形式的名詞
552
- """
553
- # 重用已經實現的複數化邏輯
554
- if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
555
- return item[:-1] + "ies"
556
- elif item.endswith(("s", "sh", "ch", "x", "z")):
557
- return item + "es"
558
- elif not item.endswith("s"):
559
- return item + "s"
560
- else:
561
- return item
562
 
563
  def generate_dynamic_everyday_description(self,
564
  detected_objects: List[Dict],
@@ -586,6 +189,7 @@ class ObjectDescriptionGenerator:
586
  try:
587
  description_segments = []
588
  image_width, image_height = image_dimensions if image_dimensions else (None, None)
 
589
 
590
  self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
591
  f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
@@ -625,8 +229,6 @@ class ObjectDescriptionGenerator:
625
  else:
626
  description_segments.append("Within this setting, no specific objects were clearly identified.")
627
  else:
628
- objects_by_class: Dict[str, List[Dict]] = {}
629
-
630
  # 使用置信度過濾
631
  confident_objects = [obj for obj in detected_objects
632
  if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
@@ -642,172 +244,29 @@ class ObjectDescriptionGenerator:
642
  else:
643
  description_segments.append(no_confident_obj_msg.lower().capitalize())
644
  else:
645
- if object_statistics:
646
- # 使用預計算的統計信息,採用動態的信心度
647
- for class_name, stats in object_statistics.items():
648
- count = stats.get("count", 0)
649
- avg_confidence = stats.get("avg_confidence", 0)
650
-
651
- # 動態調整置信度閾值
652
- dynamic_threshold = self.confidence_threshold_for_description
653
- if class_name in ["potted plant", "vase", "clock", "book"]:
654
- dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
655
- elif count >= 3:
656
- dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
657
-
658
- if count > 0 and avg_confidence >= dynamic_threshold:
659
- matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
660
- if not matching_objects:
661
- matching_objects = [obj for obj in detected_objects
662
- if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
663
-
664
- if matching_objects:
665
- actual_count = min(stats["count"], len(matching_objects))
666
- objects_by_class[class_name] = matching_objects[:actual_count]
667
- else:
668
- # 備用邏輯,同樣使用動態閾值
669
- for obj in confident_objects:
670
- name = obj.get("class_name", "unknown object")
671
- if name == "unknown object" or not name:
672
- continue
673
- if name not in objects_by_class:
674
- objects_by_class[name] = []
675
- objects_by_class[name].append(obj)
676
-
677
- print(f"DEBUG: Before spatial deduplication:")
678
- for class_name in ["car", "traffic light", "person", "handbag"]:
679
- if class_name in objects_by_class:
680
- print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
681
 
682
  if not objects_by_class:
683
  description_segments.append("No common objects were confidently identified for detailed description.")
684
  else:
685
- # 物件組排序函數
686
- def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
687
- class_name_key, obj_group_list = item_tuple
688
- priority = 3
689
- count = len(obj_group_list)
690
-
691
- # 確保類別名稱已標準化
692
- normalized_class_name = self._normalize_object_class_name(class_name_key)
693
-
694
- # 動態優先級
695
- if normalized_class_name == "person":
696
- priority = 0
697
- elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
698
- priority = 1
699
- elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
700
- priority = 2
701
- elif count >= 3:
702
- priority = max(1, priority - 1)
703
- elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
704
- priority = 2
705
-
706
- avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
707
- quantity_bonus = min(count / 5.0, 1.0)
708
-
709
- return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
710
-
711
- # remove duplicate
712
- deduplicated_objects_by_class = {}
713
- processed_positions = []
714
-
715
- for class_name, group_of_objects in objects_by_class.items():
716
- unique_objects = []
717
-
718
- for obj in group_of_objects:
719
- obj_position = obj.get("normalized_center", [0.5, 0.5])
720
- is_duplicate = False
721
-
722
- for processed_pos in processed_positions:
723
- position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
724
- if position_distance < 0.15:
725
- is_duplicate = True
726
- break
727
-
728
- if not is_duplicate:
729
- unique_objects.append(obj)
730
- processed_positions.append(obj_position)
731
-
732
- if unique_objects:
733
- deduplicated_objects_by_class[class_name] = unique_objects
734
-
735
- objects_by_class = deduplicated_objects_by_class
736
- print(f"DEBUG: After spatial deduplication:")
737
- for class_name in ["car", "traffic light", "person", "handbag"]:
738
- if class_name in objects_by_class:
739
- print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
740
-
741
- sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
742
-
743
- object_clauses = []
744
-
745
- for class_name, group_of_objects in sorted_object_groups:
746
- count = len(group_of_objects)
747
- if class_name in ["car", "traffic light", "person", "handbag"]:
748
- print(f"DEBUG: Final count for {class_name}: {count}")
749
- if count == 0:
750
- continue
751
-
752
- # 標準化class name
753
- normalized_class_name = self._normalize_object_class_name(class_name)
754
-
755
- # 使用統計信息確保準確的數量描述
756
- if object_statistics and class_name in object_statistics:
757
- actual_count = object_statistics[class_name]["count"]
758
- formatted_name_with_exact_count = self._format_object_count_description(
759
- normalized_class_name,
760
- actual_count,
761
- scene_type=scene_type
762
- )
763
- else:
764
- formatted_name_with_exact_count = self._format_object_count_description(
765
- normalized_class_name,
766
- count,
767
- scene_type=scene_type
768
- )
769
-
770
- if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
771
- continue
772
-
773
- # 確定群組的集體位置
774
- location_description_suffix = ""
775
- if count == 1:
776
- spatial_desc = self.get_spatial_description(group_of_objects[0], image_width, image_height, self.region_analyzer)
777
- if spatial_desc:
778
- location_description_suffix = f"is {spatial_desc}"
779
- else:
780
- distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
781
- valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
782
- if not valid_regions:
783
- location_description_suffix = "is positioned in the scene"
784
- elif len(valid_regions) == 1:
785
- spatial_desc = self.get_spatial_description_phrase(valid_regions[0])
786
- location_description_suffix = f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
787
- elif len(valid_regions) == 2:
788
- clean_region1 = valid_regions[0].replace('_', ' ')
789
- clean_region2 = valid_regions[1].replace('_', ' ')
790
- location_description_suffix = f"is mainly across the {clean_region1} and {clean_region2} areas"
791
- else:
792
- location_description_suffix = "is distributed in various parts of the scene"
793
- else:
794
- distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
795
- valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
796
- if not valid_regions:
797
- location_description_suffix = "are visible in the scene"
798
- elif len(valid_regions) == 1:
799
- clean_region = valid_regions[0].replace('_', ' ')
800
- location_description_suffix = f"are primarily in the {clean_region} area"
801
- elif len(valid_regions) == 2:
802
- clean_region1 = valid_regions[0].replace('_', ' ')
803
- clean_region2 = valid_regions[1].replace('_', ' ')
804
- location_description_suffix = f"are mainly across the {clean_region1} and {clean_region2} areas"
805
- else:
806
- location_description_suffix = "are distributed in various parts of the scene"
807
-
808
- # 首字母大寫
809
- formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
810
- object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
811
 
812
  if object_clauses:
813
  if not description_segments:
@@ -845,7 +304,7 @@ class ObjectDescriptionGenerator:
845
  raw_description += "."
846
 
847
  # 移除重複性和不適當的描述詞彙
848
- raw_description = self._remove_repetitive_descriptors(raw_description)
849
 
850
  if not raw_description or len(raw_description.strip()) < 20:
851
  if 'confident_objects' in locals() and confident_objects:
@@ -860,586 +319,6 @@ class ObjectDescriptionGenerator:
860
  self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
861
  raise ObjectDescriptionError(error_msg) from e
862
 
863
- def _remove_repetitive_descriptors(self, description: str) -> str:
864
- """
865
- 移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
866
-
867
- Args:
868
- description: 原始描述文本
869
-
870
- Returns:
871
- str: 清理後的描述文本
872
- """
873
- try:
874
- import re
875
-
876
- # 定義需要移除或替換的模式
877
- cleanup_patterns = [
878
- # 移除 "identical" 描述模式
879
- (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
880
- (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
881
- (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
882
-
883
- # 改善 "comprehensive arrangement" 等過於技術性的表達
884
- (r'\bcomprehensive arrangement of\b', 'arrangement of'),
885
- (r'\bcomprehensive view featuring\b', 'scene featuring'),
886
- (r'\bcomprehensive display of\b', 'display of'),
887
-
888
- # 簡化過度描述性的短語
889
- (r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
890
- (r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
891
- ]
892
-
893
- processed_description = description
894
- for pattern, replacement in cleanup_patterns:
895
- processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
896
-
897
- # 進一步清理可能的多餘空格
898
- processed_description = re.sub(r'\s+', ' ', processed_description).strip()
899
-
900
- self.logger.debug(f"Cleaned description: removed repetitive descriptors")
901
- return processed_description
902
-
903
- except Exception as e:
904
- self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
905
- return description
906
-
907
- def _format_object_count_description(self, class_name: str, count: int,
908
- scene_type: Optional[str] = None,
909
- detected_objects: Optional[List[Dict]] = None,
910
- avg_confidence: float = 0.0) -> str:
911
- """
912
- 格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
913
-
914
- 這個方法是整個物件描述系統的核心,它將多個子功能整合在一起:
915
- 1. 數字到文字的轉換(避免阿拉伯數字)
916
- 2. 基於場景的材質推斷
917
- 3. 空間排列模式的描述
918
- 4. 語境化的物件描述
919
-
920
- Args:
921
- class_name: 標準化後的類別名稱
922
- count: 物件數量
923
- scene_type: 場景類型,用於語境化描述
924
- detected_objects: 該類型的所有檢測物件,用於空間分析
925
- avg_confidence: 平均檢測置信度,影響材質推斷的可信度
926
-
927
- Returns:
928
- str: 完整的格式化數量描述
929
- """
930
- try:
931
- if count <= 0:
932
- return ""
933
-
934
- # 獲取基礎的複數形式
935
- plural_form = self._get_plural_form(class_name)
936
-
937
- # 單數情況的處理
938
- if count == 1:
939
- return self._format_single_object_description(class_name, scene_type,
940
- detected_objects, avg_confidence)
941
-
942
- # 複數情況的處理
943
- return self._format_multiple_objects_description(class_name, count, plural_form,
944
- scene_type, detected_objects, avg_confidence)
945
-
946
- except Exception as e:
947
- self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
948
- return f"{count} {class_name}s" if count > 1 else class_name
949
-
950
- def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
951
- detected_objects: Optional[List[Dict]],
952
- avg_confidence: float) -> str:
953
- """
954
- 處理單個物件的描述生成
955
-
956
- 對於單個物件,我們重點在於通過材質推斷和位置描述來豐富描述內容,
957
- 避免簡單的 "a chair" 這樣的描述,而是生成 "a wooden dining chair" 這樣的表達
958
-
959
- Args:
960
- class_name: 物件類別名稱
961
- scene_type: 場景類型
962
- detected_objects: 檢測物件列表
963
- avg_confidence: 平均置信度
964
-
965
- Returns:
966
- str: 單個物件的完整描述
967
- """
968
- article = "an" if class_name[0].lower() in 'aeiou' else "a"
969
-
970
- # 獲取材質描述符
971
- material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
972
-
973
- # 獲取位置或特徵描述符
974
- feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
975
-
976
- # 組合描述
977
- descriptors = []
978
- if material_descriptor:
979
- descriptors.append(material_descriptor)
980
- if feature_descriptor:
981
- descriptors.append(feature_descriptor)
982
-
983
- if descriptors:
984
- return f"{article} {' '.join(descriptors)} {class_name}"
985
- else:
986
- return f"{article} {class_name}"
987
-
988
- def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
989
- scene_type: Optional[str], detected_objects: Optional[List[Dict]],
990
- avg_confidence: float) -> str:
991
- """
992
- 處理多個物件的描述生成
993
-
994
- 對於多個物件,我們的重點是:
995
- 1. 將數字轉換為文字表達
996
- 2. 分析空間排列模式
997
- 3. 添加適當的材質或功能描述
998
- 4. 生成自然流暢的描述
999
-
1000
- Args:
1001
- class_name: 物件類別名稱
1002
- count: 物件數量
1003
- plural_form: 複數形式
1004
- scene_type: 場景類型
1005
- detected_objects: 檢測物件列表
1006
- avg_confidence: 平均置信度
1007
-
1008
- Returns:
1009
- str: 多個物件的完整描述
1010
- """
1011
- # 數字到文字的轉換映射
1012
- number_words = {
1013
- 2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
1014
- 7: "seven", 8: "eight", 9: "nine", 10: "ten",
1015
- 11: "eleven", 12: "twelve"
1016
- }
1017
-
1018
- # 確定基礎數量表達
1019
- if count in number_words:
1020
- count_expression = number_words[count]
1021
- elif count <= 20:
1022
- count_expression = "several"
1023
- else:
1024
- count_expression = "numerous"
1025
-
1026
- # 獲取材質或功能描述符
1027
- material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
1028
-
1029
- # 獲取空間排列描述
1030
- spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
1031
- detected_objects, count)
1032
-
1033
- # 組合最終描述
1034
- descriptors = []
1035
- if material_descriptor:
1036
- descriptors.append(material_descriptor)
1037
-
1038
- # 構建基礎描述
1039
- base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
1040
-
1041
- # 添加空間排列信息
1042
- if spatial_descriptor:
1043
- return f"{base_description} {spatial_descriptor}"
1044
- else:
1045
- return base_description
1046
-
1047
- def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
1048
- avg_confidence: float) -> Optional[str]:
1049
- """
1050
- 基於場景語境和置信度進行材質推斷
1051
-
1052
- 這個方法實現了智能的材質推斷,它不依賴複雜的圖像分析,
1053
- 而是基於常識和場景邏輯來推斷最可能的材質描述
1054
-
1055
- Args:
1056
- class_name: 物件類別名稱
1057
- scene_type: 場景類型
1058
- avg_confidence: 檢測置信度,影響推斷的保守程度
1059
-
1060
- Returns:
1061
- Optional[str]: 材質描述符,如果無法推斷則返回None
1062
- """
1063
- # 只有在置信度足夠高時才進行材質推斷
1064
- if avg_confidence < 0.5:
1065
- return None
1066
-
1067
- # 餐廳和用餐相關場景
1068
- if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
1069
- material_mapping = {
1070
- "chair": "wooden" if avg_confidence > 0.7 else None,
1071
- "dining table": "wooden",
1072
- "couch": "upholstered",
1073
- "vase": "decorative"
1074
- }
1075
- return material_mapping.get(class_name)
1076
-
1077
- # 辦公場景
1078
- elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
1079
- material_mapping = {
1080
- "chair": "office",
1081
- "dining table": "conference", # 在辦公環境中,餐桌通常是會議桌
1082
- "laptop": "modern",
1083
- "book": "reference"
1084
- }
1085
- return material_mapping.get(class_name)
1086
-
1087
- # 客廳場景
1088
- elif scene_type and scene_type in ["living_room"]:
1089
- material_mapping = {
1090
- "couch": "comfortable",
1091
- "chair": "accent",
1092
- "tv": "large",
1093
- "vase": "decorative"
1094
- }
1095
- return material_mapping.get(class_name)
1096
-
1097
- # 室外場景
1098
- elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
1099
- material_mapping = {
1100
- "car": "parked",
1101
- "person": "walking",
1102
- "bicycle": "stationed"
1103
- }
1104
- return material_mapping.get(class_name)
1105
-
1106
- # 如果沒有特定的場景映射,返回通用描述符
1107
- generic_mapping = {
1108
- "chair": "comfortable",
1109
- "dining table": "sturdy",
1110
- "car": "parked",
1111
- "person": "present"
1112
- }
1113
-
1114
- return generic_mapping.get(class_name)
1115
-
1116
- def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
1117
- detected_objects: Optional[List[Dict]],
1118
- count: int) -> Optional[str]:
1119
- """
1120
- 分析物件的空間排列模式並生成相應描述
1121
-
1122
- 這個方法通過分析物件的位置分布來判斷排列模式,
1123
- 然後根據物件類型和場景生成適當的空間描述
1124
-
1125
- Args:
1126
- class_name: 物件類別名稱
1127
- scene_type: 場景類型
1128
- detected_objects: 該類型的所有檢測物件
1129
- count: 物件數量
1130
-
1131
- Returns:
1132
- Optional[str]: 空間排列描述,如果無法分析則返回None
1133
- """
1134
- if not detected_objects or len(detected_objects) < 2:
1135
- return None
1136
-
1137
- try:
1138
- # 提取物件的標準化位置
1139
- positions = []
1140
- for obj in detected_objects:
1141
- center = obj.get("normalized_center", [0.5, 0.5])
1142
- if isinstance(center, (list, tuple)) and len(center) >= 2:
1143
- positions.append(center)
1144
-
1145
- if len(positions) < 2:
1146
- return None
1147
-
1148
- # 分析排列模式
1149
- arrangement_pattern = self._analyze_arrangement_pattern(positions)
1150
-
1151
- # 根據物件類型和場景生成描述
1152
- return self._generate_arrangement_description(class_name, scene_type,
1153
- arrangement_pattern, count)
1154
-
1155
- except Exception as e:
1156
- self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
1157
- return None
1158
-
1159
- def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
1160
- """
1161
- 分析位置點的排列模式
1162
-
1163
- 這個方法使用簡單的幾何分析來判斷物件的排列類型,
1164
- 幫助我們理解物件在空間中的組織方式
1165
-
1166
- Args:
1167
- positions: 標準化的位置座標列表
1168
-
1169
- Returns:
1170
- str: 排列模式類型(linear, clustered, scattered, circular等)
1171
- """
1172
- import numpy as np
1173
-
1174
- if len(positions) < 2:
1175
- return "single"
1176
-
1177
- # 轉換為numpy陣列便於計算
1178
- pos_array = np.array(positions)
1179
-
1180
- # 計算位置的分布特徵
1181
- x_coords = pos_array[:, 0]
1182
- y_coords = pos_array[:, 1]
1183
-
1184
- # 分析x和y方向的變異程度
1185
- x_variance = np.var(x_coords)
1186
- y_variance = np.var(y_coords)
1187
-
1188
- # 計算物件間的平均距離
1189
- distances = []
1190
- for i in range(len(positions)):
1191
- for j in range(i + 1, len(positions)):
1192
- dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
1193
- (positions[i][1] - positions[j][1])**2)
1194
- distances.append(dist)
1195
-
1196
- avg_distance = np.mean(distances) if distances else 0
1197
- distance_variance = np.var(distances) if distances else 0
1198
-
1199
- # 判斷排列模式
1200
- if len(positions) >= 4 and self._is_circular_pattern(positions):
1201
- return "circular"
1202
- elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
1203
- return "linear"
1204
- elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
1205
- return "clustered"
1206
- elif avg_distance > 0.6: # 物件分散
1207
- return "scattered"
1208
- elif distance_variance < 0.03: # 距離一致,可能是規則排列
1209
- return "regular"
1210
- else:
1211
- return "distributed"
1212
-
1213
- def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
1214
- """
1215
- 檢查位置是否形成圓形或環形排列
1216
-
1217
- Args:
1218
- positions: 位置座標列表
1219
-
1220
- Returns:
1221
- bool: 是否為圓形排列
1222
- """
1223
- import numpy as np
1224
-
1225
- if len(positions) < 4:
1226
- return False
1227
-
1228
- try:
1229
- pos_array = np.array(positions)
1230
-
1231
- # 計算中心點
1232
- center_x = np.mean(pos_array[:, 0])
1233
- center_y = np.mean(pos_array[:, 1])
1234
-
1235
- # 計算每個點到中心的距離
1236
- distances_to_center = []
1237
- for pos in positions:
1238
- dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
1239
- distances_to_center.append(dist)
1240
-
1241
- # 如果所有距離都相近,可能是圓形排列
1242
- distance_variance = np.var(distances_to_center)
1243
- return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
1244
-
1245
- except:
1246
- return False
1247
-
1248
- def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
1249
- arrangement_pattern: str, count: int) -> Optional[str]:
1250
- """
1251
- 根據物件類型、場景和排列模式生成空間描述
1252
-
1253
- 這個方法將抽象的排列模式轉換為自然語言描述,
1254
- 並根據具體的物件類型和場景語境進行定制
1255
-
1256
- Args:
1257
- class_name: 物件類別名稱
1258
- scene_type: 場景類型
1259
- arrangement_pattern: 排列模式
1260
- count: 物件數量
1261
-
1262
- Returns:
1263
- Optional[str]: 生成的空間排列描述
1264
- """
1265
- # 基於物件類型的描述模板
1266
- arrangement_templates = {
1267
- "chair": {
1268
- "linear": "arranged in a row",
1269
- "clustered": "grouped together for conversation",
1270
- "circular": "arranged around the table",
1271
- "scattered": "positioned throughout the space",
1272
- "regular": "evenly spaced",
1273
- "distributed": "thoughtfully positioned"
1274
- },
1275
- "dining table": {
1276
- "linear": "aligned to create a unified dining space",
1277
- "clustered": "grouped to form intimate dining areas",
1278
- "scattered": "distributed to optimize space flow",
1279
- "regular": "systematically positioned",
1280
- "distributed": "strategically placed"
1281
- },
1282
- "car": {
1283
- "linear": "parked in sequence",
1284
- "clustered": "grouped in the parking area",
1285
- "scattered": "distributed throughout the lot",
1286
- "regular": "neatly parked",
1287
- "distributed": "positioned across the area"
1288
- },
1289
- "person": {
1290
- "linear": "moving in a line",
1291
- "clustered": "gathered together",
1292
- "circular": "forming a circle",
1293
- "scattered": "spread across the area",
1294
- "distributed": "positioned throughout the scene"
1295
- }
1296
- }
1297
-
1298
- # 獲取對應的描述模板
1299
- if class_name in arrangement_templates:
1300
- template_dict = arrangement_templates[class_name]
1301
- base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
1302
- else:
1303
- # 通用的排列描述
1304
- generic_templates = {
1305
- "linear": "arranged in a line",
1306
- "clustered": "grouped together",
1307
- "circular": "arranged in a circular pattern",
1308
- "scattered": "distributed across the space",
1309
- "regular": "evenly positioned",
1310
- "distributed": "thoughtfully placed"
1311
- }
1312
- base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
1313
-
1314
- return base_description
1315
-
1316
- def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
1317
- detected_objects: Optional[List[Dict]]) -> Optional[str]:
1318
- """
1319
- 為單個物件生成特徵描述符
1320
-
1321
- 當只有一個物件時,我們可以提供更具體的位置或功能描述
1322
-
1323
- Args:
1324
- class_name: 物件類別名稱
1325
- scene_type: 場景類型
1326
- detected_objects: 檢測物件(單個)
1327
-
1328
- Returns:
1329
- Optional[str]: 特徵描述符
1330
- """
1331
- if not detected_objects or len(detected_objects) != 1:
1332
- return None
1333
-
1334
- obj = detected_objects[0]
1335
- region = obj.get("region", "").lower()
1336
-
1337
- # 基於位置的描述
1338
- if "center" in region:
1339
- if class_name == "dining table":
1340
- return "central"
1341
- elif class_name == "chair":
1342
- return "centrally placed"
1343
- elif "corner" in region or "left" in region or "right" in region:
1344
- return "positioned"
1345
-
1346
- # 基於場景的功能描述
1347
- if scene_type and scene_type in ["dining_area", "restaurant"]:
1348
- if class_name == "chair":
1349
- return "dining"
1350
- elif class_name == "vase":
1351
- return "decorative"
1352
-
1353
- return None
1354
-
1355
- def _get_plural_form(self, word: str) -> str:
1356
- """
1357
- 獲取詞彙的複數形式
1358
-
1359
- Args:
1360
- word: 單數詞彙
1361
-
1362
- Returns:
1363
- str: 複數形式
1364
- """
1365
- try:
1366
- # 特殊複數形式
1367
- irregular_plurals = {
1368
- 'person': 'people',
1369
- 'child': 'children',
1370
- 'foot': 'feet',
1371
- 'tooth': 'teeth',
1372
- 'mouse': 'mice',
1373
- 'man': 'men',
1374
- 'woman': 'women'
1375
- }
1376
-
1377
- if word.lower() in irregular_plurals:
1378
- return irregular_plurals[word.lower()]
1379
-
1380
- # 規則複數形式
1381
- if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
1382
- return word + 'es'
1383
- elif word.endswith('y') and word[-2] not in 'aeiou':
1384
- return word[:-1] + 'ies'
1385
- elif word.endswith('f'):
1386
- return word[:-1] + 'ves'
1387
- elif word.endswith('fe'):
1388
- return word[:-2] + 'ves'
1389
- else:
1390
- return word + 's'
1391
-
1392
- except Exception as e:
1393
- self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
1394
- return word + 's'
1395
-
1396
- def _normalize_object_class_name(self, class_name: str) -> str:
1397
- """
1398
- 標準化物件類別名稱,確保輸出自然語言格式
1399
-
1400
- Args:
1401
- class_name: 原始類別名稱
1402
-
1403
- Returns:
1404
- str: 標準化後的類別名稱
1405
- """
1406
- try:
1407
- if not class_name or not isinstance(class_name, str):
1408
- return "object"
1409
-
1410
- # 移除可能的技術性前綴或後綴
1411
- import re
1412
- normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
1413
- normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
1414
-
1415
- # 將下劃線和連字符替換為空格
1416
- normalized = normalized.replace('_', ' ').replace('-', ' ')
1417
-
1418
- # 移除多餘空格
1419
- normalized = ' '.join(normalized.split())
1420
-
1421
- # 特殊類別名稱的標準化映射
1422
- class_name_mapping = {
1423
- 'traffic light': 'traffic light',
1424
- 'stop sign': 'stop sign',
1425
- 'fire hydrant': 'fire hydrant',
1426
- 'dining table': 'dining table',
1427
- 'potted plant': 'potted plant',
1428
- 'tv monitor': 'television',
1429
- 'cell phone': 'mobile phone',
1430
- 'wine glass': 'wine glass',
1431
- 'hot dog': 'hot dog',
1432
- 'teddy bear': 'teddy bear',
1433
- 'hair drier': 'hair dryer',
1434
- 'toothbrush': 'toothbrush'
1435
- }
1436
-
1437
- return class_name_mapping.get(normalized, normalized)
1438
-
1439
- except Exception as e:
1440
- self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
1441
- return class_name if isinstance(class_name, str) else "object"
1442
-
1443
  def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
1444
  """
1445
  當模板不可用時生成基本詳細��息
@@ -1588,7 +467,7 @@ class ObjectDescriptionGenerator:
1588
  furniture_names = []
1589
  for obj in furniture_objects[:3]:
1590
  raw_name = obj.get("class_name", "furniture")
1591
- normalized_name = self._normalize_object_class_name(raw_name)
1592
  furniture_names.append(normalized_name)
1593
 
1594
  unique_names = list(set(furniture_names))
@@ -1786,7 +665,6 @@ class ObjectDescriptionGenerator:
1786
  return "functional area"
1787
 
1788
  # 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
1789
- import re
1790
  base_name = re.sub(r'_\d+$', '', zone_name)
1791
 
1792
  # 將下劃線替換為空格
@@ -1851,9 +729,16 @@ class ObjectDescriptionGenerator:
1851
  old_value = getattr(self, key)
1852
  setattr(self, key, value)
1853
  self.logger.info(f"Updated {key}: {old_value} -> {value}")
 
 
 
 
 
 
 
1854
  else:
1855
  self.logger.warning(f"Unknown configuration parameter: {key}")
1856
 
1857
  except Exception as e:
1858
  self.logger.error(f"Error updating configuration: {str(e)}")
1859
- raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
 
4
  from typing import Dict, List, Tuple, Optional, Any
5
  import numpy as np
6
 
7
+ from prominence_calculator import ProminenceCalculator
8
+ from spatial_location_handler import SpatialLocationHandler
9
+ from text_optimizer import TextOptimizer
10
+ from object_group_processor import ObjectGroupProcessor
11
+
12
  class ObjectDescriptionError(Exception):
13
  """物件描述生成過程中的自定義異常"""
14
  pass
 
17
  class ObjectDescriptionGenerator:
18
  """
19
  物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
20
+ 匯總於EnhancedSceneDescriber
21
 
22
  該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
23
  空間位置描述、物件列表格式化以及描述文本的優化。
24
+
25
+ 作為 Facade 模式的實現,協調四個專門的子組件來完成複雜的描述生成任務。
26
  """
27
 
28
  def __init__(self,
 
39
  max_categories_to_return: 返回的物件類別最大數量
40
  max_total_objects: 返回的物件總數上限
41
  confidence_threshold_for_description: 用於描述的置信度閾值
42
+ region_analyzer: 可選的RegionAnalyzer實例
43
  """
44
  self.logger = logging.getLogger(self.__class__.__name__)
45
 
 
49
  self.confidence_threshold_for_description = confidence_threshold_for_description
50
  self.region_analyzer = region_analyzer
51
 
52
+ # 初始化子組件
53
+ self.prominence_calculator = ProminenceCalculator(
54
+ min_prominence_score=self.min_prominence_score
55
+ )
56
+
57
+ self.spatial_handler = SpatialLocationHandler(
58
+ region_analyzer=self.region_analyzer
59
+ )
60
+
61
+ self.text_optimizer = TextOptimizer()
62
+
63
+ self.object_group_processor = ObjectGroupProcessor(
64
+ confidence_threshold_for_description=self.confidence_threshold_for_description,
65
+ spatial_handler=self.spatial_handler,
66
+ text_optimizer=self.text_optimizer
67
+ )
68
+
69
  self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
70
  "max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
71
  min_prominence_score, max_categories_to_return,
 
85
  Returns:
86
  List[Dict]: 按重要性排序的物件列表
87
  """
88
+ return self.prominence_calculator.filter_prominent_objects(
89
+ detected_objects=detected_objects,
90
+ min_prominence_score=min_prominence_score,
91
+ max_categories_to_return=max_categories_to_return
92
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  def set_region_analyzer(self, region_analyzer: Any) -> None:
95
  """
 
100
  """
101
  try:
102
  self.region_analyzer = region_analyzer
103
+ self.spatial_handler.set_region_analyzer(region_analyzer)
104
  self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
105
  except Exception as e:
106
  self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def format_object_list_for_description(self,
109
  objects: List[Dict],
110
  use_indefinite_article_for_one: bool = False,
 
122
  Returns:
123
  str: 格式化的物件描述字符串
124
  """
125
+ return self.text_optimizer.format_object_list_for_description(
126
+ objects=objects,
127
+ use_indefinite_article_for_one=use_indefinite_article_for_one,
128
+ count_threshold_for_generalization=count_threshold_for_generalization,
129
+ max_types_to_list=max_types_to_list
130
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
133
  image_height: Optional[int] = None,
 
144
  Returns:
145
  str: 空間描述字符串,空值region時返回空字串
146
  """
147
+ return self.spatial_handler.generate_spatial_description(
148
+ obj=obj,
149
+ image_width=image_width,
150
+ image_height=image_height,
151
+ region_analyzer=region_analyzer
152
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  def optimize_object_description(self, description: str) -> str:
155
  """
156
+ 優化物件描述文本,消除多餘重複並改善表達流暢度
 
 
 
 
157
 
158
  Args:
159
  description: 原始的場景描述文本,可能包含重複或冗餘的表達
 
161
  Returns:
162
  str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
163
  """
164
+ return self.text_optimizer.optimize_object_description(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def generate_dynamic_everyday_description(self,
167
  detected_objects: List[Dict],
 
189
  try:
190
  description_segments = []
191
  image_width, image_height = image_dimensions if image_dimensions else (None, None)
192
+ scene_type = places365_info.get("scene", "") if places365_info else ""
193
 
194
  self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
195
  f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
 
229
  else:
230
  description_segments.append("Within this setting, no specific objects were clearly identified.")
231
  else:
 
 
232
  # 使用置信度過濾
233
  confident_objects = [obj for obj in detected_objects
234
  if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
 
244
  else:
245
  description_segments.append(no_confident_obj_msg.lower().capitalize())
246
  else:
247
+ # 使用 ObjectGroupProcessor 處理物件分組和排序
248
+ objects_by_class = self.object_group_processor.group_objects_by_class(
249
+ confident_objects, object_statistics
250
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  if not objects_by_class:
253
  description_segments.append("No common objects were confidently identified for detailed description.")
254
  else:
255
+ # 移除重複物件
256
+ deduplicated_objects_by_class = self.object_group_processor.remove_duplicate_objects(
257
+ objects_by_class
258
+ )
259
+
260
+ # 排序物件組
261
+ sorted_object_groups = self.object_group_processor.sort_object_groups(
262
+ deduplicated_objects_by_class
263
+ )
264
+
265
+ # 生成物件描述子句
266
+ object_clauses = self.object_group_processor.generate_object_clauses(
267
+ sorted_object_groups, object_statistics, scene_type,
268
+ image_width, image_height, self.region_analyzer
269
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  if object_clauses:
272
  if not description_segments:
 
304
  raw_description += "."
305
 
306
  # 移除重複性和不適當的描述詞彙
307
+ raw_description = self.text_optimizer.remove_repetitive_descriptors(raw_description)
308
 
309
  if not raw_description or len(raw_description.strip()) < 20:
310
  if 'confident_objects' in locals() and confident_objects:
 
319
  self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
320
  raise ObjectDescriptionError(error_msg) from e
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
323
  """
324
  當模板不可用時生成基本詳細��息
 
467
  furniture_names = []
468
  for obj in furniture_objects[:3]:
469
  raw_name = obj.get("class_name", "furniture")
470
+ normalized_name = self.text_optimizer.normalize_object_class_name(raw_name)
471
  furniture_names.append(normalized_name)
472
 
473
  unique_names = list(set(furniture_names))
 
665
  return "functional area"
666
 
667
  # 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
 
668
  base_name = re.sub(r'_\d+$', '', zone_name)
669
 
670
  # 將下劃線替換為空格
 
729
  old_value = getattr(self, key)
730
  setattr(self, key, value)
731
  self.logger.info(f"Updated {key}: {old_value} -> {value}")
732
+
733
+ # 同步更新子組件的配置
734
+ if key == "min_prominence_score" and hasattr(self, 'prominence_calculator'):
735
+ self.prominence_calculator.min_prominence_score = value
736
+ elif key == "confidence_threshold_for_description" and hasattr(self, 'object_group_processor'):
737
+ self.object_group_processor.confidence_threshold_for_description = value
738
+
739
  else:
740
  self.logger.warning(f"Unknown configuration parameter: {key}")
741
 
742
  except Exception as e:
743
  self.logger.error(f"Error updating configuration: {str(e)}")
744
+ raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
object_group_processor.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, List, Tuple, Optional, Any
3
+
4
+ class ObjectGroupProcessor:
5
+ """
6
+ 物件組處理器 - 專門處理物件分組、排序和子句生成的邏輯
7
+ 負責物件按類別分組、重複物件檢測移除、物件組優先級排序以及描述子句的生成
8
+ """
9
+
10
+ def __init__(self, confidence_threshold_for_description: float = 0.25,
11
+ spatial_handler: Optional[Any] = None,
12
+ text_optimizer: Optional[Any] = None):
13
+ """
14
+ 初始化物件組處理器
15
+
16
+ Args:
17
+ confidence_threshold_for_description: 用於描述的置信度閾值
18
+ spatial_handler: 空間位置處理器實例
19
+ text_optimizer: 文本優化器實例
20
+ """
21
+ self.logger = logging.getLogger(self.__class__.__name__)
22
+ self.confidence_threshold_for_description = confidence_threshold_for_description
23
+ self.spatial_handler = spatial_handler
24
+ self.text_optimizer = text_optimizer
25
+
26
+ def group_objects_by_class(self, confident_objects: List[Dict],
27
+ object_statistics: Optional[Dict]) -> Dict[str, List[Dict]]:
28
+ """
29
+ 按類別分組物件
30
+
31
+ Args:
32
+ confident_objects: 置信度過濾後的物件
33
+ object_statistics: 物件統計信息
34
+
35
+ Returns:
36
+ Dict[str, List[Dict]]: 按類別分組的物件
37
+ """
38
+ objects_by_class = {}
39
+
40
+ if object_statistics:
41
+ # 使用預計算的統計信息,採用動態的信心度
42
+ for class_name, stats in object_statistics.items():
43
+ count = stats.get("count", 0)
44
+ avg_confidence = stats.get("avg_confidence", 0)
45
+
46
+ # 動態調整置信度閾值
47
+ dynamic_threshold = self.confidence_threshold_for_description
48
+ if class_name in ["potted plant", "vase", "clock", "book"]:
49
+ dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
50
+ elif count >= 3:
51
+ dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
52
+
53
+ if count > 0 and avg_confidence >= dynamic_threshold:
54
+ matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
55
+ if not matching_objects:
56
+ matching_objects = [obj for obj in confident_objects
57
+ if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
58
+
59
+ if matching_objects:
60
+ actual_count = min(stats["count"], len(matching_objects))
61
+ objects_by_class[class_name] = matching_objects[:actual_count]
62
+
63
+ # Debug logging for specific classes
64
+ if class_name in ["car", "traffic light", "person", "handbag"]:
65
+ print(f"DEBUG: Before spatial deduplication:")
66
+ print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
67
+ else:
68
+ # 備用邏輯,同樣使用動態閾值
69
+ for obj in confident_objects:
70
+ name = obj.get("class_name", "unknown object")
71
+ if name == "unknown object" or not name:
72
+ continue
73
+ if name not in objects_by_class:
74
+ objects_by_class[name] = []
75
+ objects_by_class[name].append(obj)
76
+
77
+ return objects_by_class
78
+
79
+ def remove_duplicate_objects(self, objects_by_class: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
80
+ """
81
+ 移除重複物件
82
+
83
+ Args:
84
+ objects_by_class: 按類別分組的物件
85
+
86
+ Returns:
87
+ Dict[str, List[Dict]]: 去重後的物件
88
+ """
89
+ deduplicated_objects_by_class = {}
90
+ processed_positions = []
91
+
92
+ for class_name, group_of_objects in objects_by_class.items():
93
+ unique_objects = []
94
+
95
+ for obj in group_of_objects:
96
+ obj_position = obj.get("normalized_center", [0.5, 0.5])
97
+ is_duplicate = False
98
+
99
+ for processed_pos in processed_positions:
100
+ position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
101
+ if position_distance < 0.15:
102
+ is_duplicate = True
103
+ break
104
+
105
+ if not is_duplicate:
106
+ unique_objects.append(obj)
107
+ processed_positions.append(obj_position)
108
+
109
+ if unique_objects:
110
+ deduplicated_objects_by_class[class_name] = unique_objects
111
+
112
+ # Debug logging after deduplication
113
+ for class_name in ["car", "traffic light", "person", "handbag"]:
114
+ if class_name in deduplicated_objects_by_class:
115
+ print(f"DEBUG: After spatial deduplication:")
116
+ print(f"DEBUG: {class_name}: {len(deduplicated_objects_by_class[class_name])} objects after dedup")
117
+
118
+ return deduplicated_objects_by_class
119
+
120
+ def sort_object_groups(self, objects_by_class: Dict[str, List[Dict]]) -> List[Tuple[str, List[Dict]]]:
121
+ """
122
+ 排序物件組
123
+
124
+ Args:
125
+ objects_by_class: 按類別分組的物件
126
+
127
+ Returns:
128
+ List[Tuple[str, List[Dict]]]: 排序後的物件組
129
+ """
130
+ def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
131
+ class_name_key, obj_group_list = item_tuple
132
+ priority = 3
133
+ count = len(obj_group_list)
134
+
135
+ # 確保類別名稱已標準化
136
+ normalized_class_name = self._normalize_object_class_name(class_name_key)
137
+
138
+ # 動態優先級
139
+ if normalized_class_name == "person":
140
+ priority = 0
141
+ elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
142
+ priority = 1
143
+ elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
144
+ priority = 2
145
+ elif count >= 3:
146
+ priority = max(1, priority - 1)
147
+ elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
148
+ priority = 2
149
+
150
+ avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
151
+ quantity_bonus = min(count / 5.0, 1.0)
152
+
153
+ return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
154
+
155
+ return sorted(objects_by_class.items(), key=sort_key_object_groups)
156
+
157
+ def generate_object_clauses(self, sorted_object_groups: List[Tuple[str, List[Dict]]],
158
+ object_statistics: Optional[Dict],
159
+ scene_type: str,
160
+ image_width: Optional[int],
161
+ image_height: Optional[int],
162
+ region_analyzer: Optional[Any] = None) -> List[str]:
163
+ """
164
+ 生成物件描述子句
165
+
166
+ Args:
167
+ sorted_object_groups: 排序後的物件組
168
+ object_statistics: 物件統計信息
169
+ scene_type: 場景類型
170
+ image_width: 圖像寬度
171
+ image_height: 圖像高度
172
+ region_analyzer: 區域分析器實例
173
+
174
+ Returns:
175
+ List[str]: 物件描述子句列表
176
+ """
177
+ object_clauses = []
178
+
179
+ for class_name, group_of_objects in sorted_object_groups:
180
+ count = len(group_of_objects)
181
+
182
+ # Debug logging for final count
183
+ if class_name in ["car", "traffic light", "person", "handbag"]:
184
+ print(f"DEBUG: Final count for {class_name}: {count}")
185
+
186
+ if count == 0:
187
+ continue
188
+
189
+ # 標準化class name
190
+ normalized_class_name = self._normalize_object_class_name(class_name)
191
+
192
+ # 使用統計信息確保準確的數量描述
193
+ if object_statistics and class_name in object_statistics:
194
+ actual_count = object_statistics[class_name]["count"]
195
+ formatted_name_with_exact_count = self._format_object_count_description(
196
+ normalized_class_name,
197
+ actual_count,
198
+ scene_type=scene_type
199
+ )
200
+ else:
201
+ formatted_name_with_exact_count = self._format_object_count_description(
202
+ normalized_class_name,
203
+ count,
204
+ scene_type=scene_type
205
+ )
206
+
207
+ if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
208
+ continue
209
+
210
+ # 確定群組的集體位置
211
+ location_description_suffix = self._generate_location_description(
212
+ group_of_objects, count, image_width, image_height, region_analyzer
213
+ )
214
+
215
+ # 首字母大寫
216
+ formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
217
+ object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
218
+
219
+ return object_clauses
220
+
221
+ def format_object_clauses(self, object_clauses: List[str]) -> str:
222
+ """
223
+ 格式化物件描述子句
224
+
225
+ Args:
226
+ object_clauses: 物件描述子句列表
227
+
228
+ Returns:
229
+ str: 格式化後的描述
230
+ """
231
+ if not object_clauses:
232
+ return "No common objects were confidently identified for detailed description."
233
+
234
+ # 處理第一個子句
235
+ first_clause = object_clauses.pop(0)
236
+ result = first_clause + "."
237
+
238
+ # 處理剩餘子句
239
+ if object_clauses:
240
+ result += " The scene features:"
241
+ joined_object_clauses = ". ".join(object_clauses)
242
+ if joined_object_clauses and not joined_object_clauses.endswith("."):
243
+ joined_object_clauses += "."
244
+ result += " " + joined_object_clauses
245
+
246
+ return result
247
+
248
+ def _generate_location_description(self, group_of_objects: List[Dict], count: int,
249
+ image_width: Optional[int], image_height: Optional[int],
250
+ region_analyzer: Optional[Any] = None) -> str:
251
+ """
252
+ 生成位置描述
253
+
254
+ Args:
255
+ group_of_objects: 物件組
256
+ count: 物件數量
257
+ image_width: 圖像寬度
258
+ image_height: 圖像高度
259
+ region_analyzer: 區域分析器實例
260
+
261
+ Returns:
262
+ str: 位置描述
263
+ """
264
+ if count == 1:
265
+ if self.spatial_handler:
266
+ spatial_desc = self.spatial_handler.generate_spatial_description(
267
+ group_of_objects[0], image_width, image_height, region_analyzer
268
+ )
269
+ else:
270
+ spatial_desc = self._get_spatial_description_phrase(group_of_objects[0].get("region", ""))
271
+
272
+ if spatial_desc:
273
+ return f"is {spatial_desc}"
274
+ else:
275
+ distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
276
+ valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
277
+ if not valid_regions:
278
+ return "is positioned in the scene"
279
+ elif len(valid_regions) == 1:
280
+ spatial_desc = self._get_spatial_description_phrase(valid_regions[0])
281
+ return f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
282
+ elif len(valid_regions) == 2:
283
+ clean_region1 = valid_regions[0].replace('_', ' ')
284
+ clean_region2 = valid_regions[1].replace('_', ' ')
285
+ return f"is mainly across the {clean_region1} and {clean_region2} areas"
286
+ else:
287
+ return "is distributed in various parts of the scene"
288
+ else:
289
+ distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
290
+ valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
291
+ if not valid_regions:
292
+ return "are visible in the scene"
293
+ elif len(valid_regions) == 1:
294
+ clean_region = valid_regions[0].replace('_', ' ')
295
+ return f"are primarily in the {clean_region} area"
296
+ elif len(valid_regions) == 2:
297
+ clean_region1 = valid_regions[0].replace('_', ' ')
298
+ clean_region2 = valid_regions[1].replace('_', ' ')
299
+ return f"are mainly across the {clean_region1} and {clean_region2} areas"
300
+ else:
301
+ return "are distributed in various parts of the scene"
302
+
303
+ def _get_spatial_description_phrase(self, region: str) -> str:
304
+ """
305
+ 獲取空間描述短語的備用方法
306
+
307
+ Args:
308
+ region: 區域字符串
309
+
310
+ Returns:
311
+ str: 空間描述短語
312
+ """
313
+ if not region or region == "unknown":
314
+ return ""
315
+
316
+ clean_region = region.replace('_', ' ').strip().lower()
317
+
318
+ region_map = {
319
+ "top left": "in the upper left area",
320
+ "top center": "in the upper area",
321
+ "top right": "in the upper right area",
322
+ "middle left": "on the left side",
323
+ "middle center": "in the center",
324
+ "center": "in the center",
325
+ "middle right": "on the right side",
326
+ "bottom left": "in the lower left area",
327
+ "bottom center": "in the lower area",
328
+ "bottom right": "in the lower right area"
329
+ }
330
+
331
+ return region_map.get(clean_region, "")
332
+
333
+ def _normalize_object_class_name(self, class_name: str) -> str:
334
+ """
335
+ 標準化物件類別名稱
336
+
337
+ Args:
338
+ class_name: 原始類別名稱
339
+
340
+ Returns:
341
+ str: 標準化後的類別名稱
342
+ """
343
+ if self.text_optimizer:
344
+ return self.text_optimizer.normalize_object_class_name(class_name)
345
+ else:
346
+ # 備用標準化邏輯
347
+ if not class_name or not isinstance(class_name, str):
348
+ return "object"
349
+
350
+ # 簡單的標準化處理
351
+ normalized = class_name.replace('_', ' ').strip().lower()
352
+ return normalized
353
+
354
+ def _format_object_count_description(self, class_name: str, count: int,
355
+ scene_type: Optional[str] = None,
356
+ detected_objects: Optional[List[Dict]] = None,
357
+ avg_confidence: float = 0.0) -> str:
358
+ """
359
+ 格式化物件數量描述
360
+
361
+ Args:
362
+ class_name: 標準化後的類別名稱
363
+ count: 物件數量
364
+ scene_type: 場景類型
365
+ detected_objects: 該類型的所有檢測物件
366
+ avg_confidence: 平均檢測置信度
367
+
368
+ Returns:
369
+ str: 完整的格式化數量描述
370
+ """
371
+ if self.text_optimizer:
372
+ return self.text_optimizer.format_object_count_description(
373
+ class_name, count, scene_type, detected_objects, avg_confidence
374
+ )
375
+ else:
376
+ # 備用格式化邏輯
377
+ if count <= 0:
378
+ return ""
379
+ elif count == 1:
380
+ article = "an" if class_name[0].lower() in 'aeiou' else "a"
381
+ return f"{article} {class_name}"
382
+ else:
383
+ # 簡單的複數處理
384
+ plural_form = class_name + "s" if not class_name.endswith("s") else class_name
385
+
386
+ number_words = {
387
+ 2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
388
+ 7: "seven", 8: "eight", 9: "nine", 10: "ten",
389
+ 11: "eleven", 12: "twelve"
390
+ }
391
+
392
+ if count in number_words:
393
+ return f"{number_words[count]} {plural_form}"
394
+ elif count <= 20:
395
+ return f"several {plural_form}"
396
+ else:
397
+ return f"numerous {plural_form}"
pattern_analyzer.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+ import traceback
4
+ import numpy as np
5
+ from typing import Dict, List, Any, Optional
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class PatternAnalyzer:
10
+ """
11
+ 負責各種模式分析,包含交通流動、行人穿越、車輛分佈等的辨識
12
+ 專門處理動態區域和移動相關的區域分析
13
+ """
14
+
15
+ def __init__(self):
16
+ """初始化模式分析器"""
17
+ try:
18
+ logger.info("PatternAnalyzer initialized successfully")
19
+ except Exception as e:
20
+ logger.error(f"Failed to initialize PatternAnalyzer: {str(e)}")
21
+ logger.error(traceback.format_exc())
22
+ raise
23
+
24
+ def analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
25
+ """
26
+ Analyze pedestrian crossing patterns to identify crossing zones.
27
+ 若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
28
+
29
+ Args:
30
+ pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
31
+ traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
32
+
33
+ Returns:
34
+ crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
35
+ """
36
+ try:
37
+ crossing_zones = {}
38
+
39
+ # 如果沒有任何行人,就不辨識任何 crossing zone
40
+ if not pedestrians:
41
+ return crossing_zones
42
+
43
+ # (1) 按照 region 分組行人
44
+ pedestrian_regions = {}
45
+ for p in pedestrians:
46
+ region = p["region"]
47
+ pedestrian_regions.setdefault(region, []).append(p)
48
+
49
+ # (2) 針對每個 region,看是否同時有紅綠燈
50
+ # 建立一個對照表 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
51
+ combined_regions = {}
52
+ for region, peds in pedestrian_regions.items():
53
+ # 取得該 region 下所有紅綠燈
54
+ tls_in_region = [t for t in traffic_lights if t["region"] == region]
55
+ combined_regions[region] = {
56
+ "pedestrians": peds,
57
+ "traffic_lights": tls_in_region
58
+ }
59
+
60
+ # (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
61
+ sorted_regions = sorted(
62
+ combined_regions.items(),
63
+ key=lambda x: len(x[1]["pedestrians"]),
64
+ reverse=True
65
+ )
66
+
67
+ # (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
68
+ for idx, (region, group) in enumerate(sorted_regions[:2]):
69
+ peds = group["pedestrians"]
70
+ tls = group["traffic_lights"]
71
+ has_nearby_signals = len(tls) > 0
72
+
73
+ # 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
74
+ direction = self._get_directional_description_local(region)
75
+ if direction and direction != "central":
76
+ zone_name = f"{direction} crossing area"
77
+ else:
78
+ zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
79
+
80
+ # 組合 description
81
+ description = f"Pedestrian crossing area with {len(peds)} "
82
+ description += "person" if len(peds) == 1 else "people"
83
+ if direction:
84
+ description += f" in {direction} direction"
85
+ if has_nearby_signals:
86
+ description += " near traffic signals"
87
+
88
+ # 將行人 + 同區紅綠燈一併放入 objects
89
+ obj_list = ["pedestrian"] * len(peds)
90
+ if has_nearby_signals:
91
+ obj_list += ["traffic light"] * len(tls)
92
+
93
+ crossing_zones[zone_name] = {
94
+ "region": region,
95
+ "objects": obj_list,
96
+ "description": description
97
+ }
98
+
99
+ return crossing_zones
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error in analyze_crossing_patterns: {str(e)}")
103
+ logger.error(traceback.format_exc())
104
+ return {}
105
+
106
+ def analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
107
+ """
108
+ 分析車輛分布以識別具有方向感知的交通區域
109
+
110
+ Args:
111
+ vehicles: 車輛物件列表
112
+
113
+ Returns:
114
+ 識別出的交通區域字典
115
+ """
116
+ try:
117
+ traffic_zones = {}
118
+
119
+ if not vehicles:
120
+ return traffic_zones
121
+
122
+ # 按區域分組車輛
123
+ vehicle_regions = {}
124
+ for v in vehicles:
125
+ region = v["region"]
126
+ if region not in vehicle_regions:
127
+ vehicle_regions[region] = []
128
+ vehicle_regions[region].append(v)
129
+
130
+ # 為有車輛的區域創建交通區域
131
+ main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
132
+
133
+ if main_traffic_region[0] is not None:
134
+ region = main_traffic_region[0]
135
+ vehicles_in_region = main_traffic_region[1]
136
+
137
+ # 獲取車輛類型列表用於描述
138
+ vehicle_types = [v["class_name"] for v in vehicles_in_region]
139
+ unique_types = list(set(vehicle_types))
140
+
141
+ # 獲取方向描述
142
+ direction = self._get_directional_description_local(region)
143
+
144
+ # 創建描述性區域
145
+ traffic_zones["vehicle_zone"] = {
146
+ "region": region,
147
+ "objects": vehicle_types,
148
+ "description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
149
+ (f" in {direction} area" if direction else "")
150
+ }
151
+
152
+ # 如果車輛分布在多個區域,創建次要區域
153
+ if len(vehicle_regions) > 1:
154
+ # 獲取第二大車輛聚集區域
155
+ sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
156
+ if len(sorted_regions) > 1:
157
+ second_region, second_vehicles = sorted_regions[1]
158
+ direction = self._get_directional_description_local(second_region)
159
+ vehicle_types = [v["class_name"] for v in second_vehicles]
160
+ unique_types = list(set(vehicle_types))
161
+
162
+ traffic_zones["secondary_vehicle_zone"] = {
163
+ "region": second_region,
164
+ "objects": vehicle_types,
165
+ "description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
166
+ (f" in {direction} direction" if direction else "")
167
+ }
168
+
169
+ return traffic_zones
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error analyzing traffic zones: {str(e)}")
173
+ logger.error(traceback.format_exc())
174
+ return {}
175
+
176
+ def analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
177
+ """
178
+ 分析空中視角的車輛交通模式
179
+
180
+ Args:
181
+ vehicle_objs: 車輛物件列表
182
+
183
+ Returns:
184
+ 交通模式區域字典
185
+ """
186
+ try:
187
+ zones = {}
188
+
189
+ if not vehicle_objs:
190
+ return zones
191
+
192
+ # 將位置轉換為數組進行模式分析
193
+ positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
194
+
195
+ if len(positions) >= 2:
196
+ # 計算分布指標
197
+ x_coords = positions[:, 0]
198
+ y_coords = positions[:, 1]
199
+
200
+ x_mean = np.mean(x_coords)
201
+ y_mean = np.mean(y_coords)
202
+ x_std = np.std(x_coords)
203
+ y_std = np.std(y_coords)
204
+
205
+ # 判斷車輛是否組織成車道
206
+ if x_std < y_std * 0.5:
207
+ # 車輛垂直對齊 - 代表南北交通
208
+ zones["vertical_traffic_flow"] = {
209
+ "region": "central_vertical",
210
+ "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
211
+ "description": "North-south traffic flow visible from aerial view"
212
+ }
213
+ elif y_std < x_std * 0.5:
214
+ # 車輛水平對齊 - 代表東西交通
215
+ zones["horizontal_traffic_flow"] = {
216
+ "region": "central_horizontal",
217
+ "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
218
+ "description": "East-west traffic flow visible from aerial view"
219
+ }
220
+ else:
221
+ # 車輛多方向 - 代表十字路口
222
+ zones["intersection_traffic"] = {
223
+ "region": "central",
224
+ "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
225
+ "description": "Multi-directional traffic at intersection visible from aerial view"
226
+ }
227
+
228
+ return zones
229
+
230
+ except Exception as e:
231
+ logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
232
+ logger.error(traceback.format_exc())
233
+ return {}
234
+
235
+ def identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
236
+ """
237
+ 識別公園的休閒活動區域
238
+
239
+ Args:
240
+ detected_objects: 檢測到的物件列表
241
+
242
+ Returns:
243
+ 休閒區域字典
244
+ """
245
+ try:
246
+ zones = {}
247
+
248
+ # 尋找休閒物件(運動球、風箏等)
249
+ rec_items = []
250
+ rec_regions = {}
251
+
252
+ for obj in detected_objects:
253
+ if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
254
+ region = obj["region"]
255
+ if region not in rec_regions:
256
+ rec_regions[region] = []
257
+ rec_regions[region].append(obj)
258
+ rec_items.append(obj["class_name"])
259
+
260
+ if rec_items:
261
+ main_rec_region = max(rec_regions.items(),
262
+ key=lambda x: len(x[1]),
263
+ default=(None, []))
264
+
265
+ if main_rec_region[0] is not None:
266
+ zones["recreational_zone"] = {
267
+ "region": main_rec_region[0],
268
+ "objects": list(set(rec_items)),
269
+ "description": f"Recreational area with {', '.join(list(set(rec_items)))}"
270
+ }
271
+
272
+ return zones
273
+
274
+ except Exception as e:
275
+ logger.error(f"Error identifying park recreational zones: {str(e)}")
276
+ logger.error(traceback.format_exc())
277
+ return {}
278
+
279
+ def identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
280
+ """
281
+ 停車場的停車區域
282
+
283
+ Args:
284
+ detected_objects: 檢測到的物件列表
285
+
286
+ Returns:
287
+ 停車區域字典
288
+ """
289
+ try:
290
+ zones = {}
291
+
292
+ # 尋找停放的汽車
293
+ car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
294
+
295
+ if len(car_objs) >= 3:
296
+ # 檢查汽車是否按模式排列
297
+ car_positions = [obj["normalized_center"] for obj in car_objs]
298
+
299
+ # 通過分析垂直位置檢查行模式
300
+ y_coords = [pos[1] for pos in car_positions]
301
+ y_clusters = {}
302
+
303
+ # 按相似y坐標分組汽車
304
+ for i, y in enumerate(y_coords):
305
+ assigned = False
306
+ for cluster_y in y_clusters.keys():
307
+ if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
308
+ y_clusters[cluster_y].append(i)
309
+ assigned = True
310
+ break
311
+
312
+ if not assigned:
313
+ y_clusters[y] = [i]
314
+
315
+ # 如果有行模式
316
+ if max(len(indices) for indices in y_clusters.values()) >= 2:
317
+ zones["parking_row"] = {
318
+ "region": "central",
319
+ "objects": ["car"] * len(car_objs),
320
+ "description": f"Organized parking area with vehicles arranged in rows"
321
+ }
322
+ else:
323
+ zones["parking_area"] = {
324
+ "region": "wide",
325
+ "objects": ["car"] * len(car_objs),
326
+ "description": f"Parking area with {len(car_objs)} vehicles"
327
+ }
328
+
329
+ return zones
330
+
331
+ except Exception as e:
332
+ logger.error(f"Error identifying parking zones: {str(e)}")
333
+ logger.error(traceback.format_exc())
334
+ return {}
335
+
336
+ def _get_directional_description_local(self, region: str) -> str:
337
+ """
338
+ 本地方向描述方法
339
+ 將區域名稱轉換為方位描述(東西南北)
340
+
341
+ Args:
342
+ region: 區域名稱
343
+
344
+ Returns:
345
+ 方位描述字串
346
+ """
347
+ try:
348
+ region_lower = region.lower()
349
+
350
+ if "top" in region_lower and "left" in region_lower:
351
+ return "northwest"
352
+ elif "top" in region_lower and "right" in region_lower:
353
+ return "northeast"
354
+ elif "bottom" in region_lower and "left" in region_lower:
355
+ return "southwest"
356
+ elif "bottom" in region_lower and "right" in region_lower:
357
+ return "southeast"
358
+ elif "top" in region_lower:
359
+ return "north"
360
+ elif "bottom" in region_lower:
361
+ return "south"
362
+ elif "left" in region_lower:
363
+ return "west"
364
+ elif "right" in region_lower:
365
+ return "east"
366
+ else:
367
+ return "central"
368
+
369
+ except Exception as e:
370
+ logger.error(f"Error getting directional description for region '{region}': {str(e)}")
371
+ return "central"
prominence_calculator.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import numpy as np
3
+ from typing import Dict, List, Optional, Any
4
+
5
+ class ProminenceCalculator:
6
+ """
7
+ 重要性計算器 - 專門處理物件重要性評估和篩選邏輯
8
+ 負責計算物件的重要性分數、類別重要性係數以及重要物件的篩選
9
+ """
10
+
11
+ def __init__(self, min_prominence_score: float = 0.1):
12
+ """
13
+ 初始化重要性計算器
14
+
15
+ Args:
16
+ min_prominence_score: 物件顯著性的最低分數閾值
17
+ """
18
+ self.logger = logging.getLogger(self.__class__.__name__)
19
+ self.min_prominence_score = min_prominence_score
20
+
21
+ def calculate_prominence_score(self, obj: Dict) -> float:
22
+ """
23
+ 計算物件的重要性評分
24
+ 基本上權重設定為信心度 > 尺寸 > 空間 > 類別重要性
25
+
26
+ Args:
27
+ obj: 物件字典,包含檢測信息
28
+
29
+ Returns:
30
+ float: 重要性評分 (0.0-1.0)
31
+ """
32
+ try:
33
+ # 基礎置信度評分 (權重: 40%)
34
+ confidence = obj.get("confidence", 0.5)
35
+ confidence_score = confidence * 0.4
36
+
37
+ # 大小評分 (權重: 30%)
38
+ normalized_area = obj.get("normalized_area", 0.1)
39
+ # 使用對數縮放避免過大物件主導評分
40
+ size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
41
+
42
+ # 位置評分 (權重: 20%)
43
+ # 中心區域的物件通常更重要
44
+ center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
45
+ distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
46
+ position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
47
+
48
+ # 類別重要性評分 (權重: 10%)
49
+ class_importance = self.get_class_importance(obj.get("class_name", "unknown"))
50
+ class_score = class_importance * 0.1
51
+
52
+ total_score = confidence_score + size_score + position_score + class_score
53
+
54
+ # 確保評分在有效範圍內
55
+ return max(0.0, min(1.0, total_score))
56
+
57
+ except Exception as e:
58
+ self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
59
+ return 0.5 # 返回中等評分作為備用
60
+
61
+ def get_class_importance(self, class_name: str) -> float:
62
+ """
63
+ 根據物件類別返回重要性係數
64
+
65
+ Args:
66
+ class_name: 物件類別名稱
67
+
68
+ Returns:
69
+ float: 類別重要性係數 (0.0-1.0)
70
+ """
71
+ # 高重要性物件(人、車輛、建築)
72
+ high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
73
+
74
+ # 中等重要性物件(家具、電器)
75
+ medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
76
+
77
+ # 低重要性物件(小物品、配件)
78
+ low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
79
+
80
+ class_name_lower = class_name.lower()
81
+
82
+ if any(item in class_name_lower for item in high_importance):
83
+ return 1.0
84
+ elif any(item in class_name_lower for item in medium_importance):
85
+ return 0.7
86
+ elif any(item in class_name_lower for item in low_importance):
87
+ return 0.4
88
+ else:
89
+ return 0.6 # 預設中等重要性
90
+
91
+ def filter_prominent_objects(self, detected_objects: List[Dict],
92
+ min_prominence_score: float = 0.5,
93
+ max_categories_to_return: Optional[int] = None) -> List[Dict]:
94
+ """
95
+ 獲取最重要的物件,基於置信度、大小和位置計算重要性評分
96
+
97
+ Args:
98
+ detected_objects: 檢測到的物件列表
99
+ min_prominence_score: 最小重要性分數閾值,範圍 0.0-1.0
100
+ max_categories_to_return: 可選的最大返回類別數量限制
101
+
102
+ Returns:
103
+ List[Dict]: 按重要性排序的物件列表
104
+ """
105
+ try:
106
+ if not detected_objects:
107
+ return []
108
+
109
+ prominent_objects = []
110
+
111
+ for obj in detected_objects:
112
+ # 計算重要性評分
113
+ prominence_score = self.calculate_prominence_score(obj)
114
+
115
+ # 只保留超過閾值的物件
116
+ if prominence_score >= min_prominence_score:
117
+ obj_copy = obj.copy()
118
+ obj_copy['prominence_score'] = prominence_score
119
+ prominent_objects.append(obj_copy)
120
+
121
+ # 按重要性評分排序(從高到低)
122
+ prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
123
+
124
+ # 如果指定了最大類別數量限制,進行過濾
125
+ if max_categories_to_return is not None and max_categories_to_return > 0:
126
+ categories_seen = set()
127
+ filtered_objects = []
128
+
129
+ for obj in prominent_objects:
130
+ class_name = obj.get("class_name", "unknown")
131
+
132
+ # 如果是新類別且未達到限制
133
+ if class_name not in categories_seen:
134
+ if len(categories_seen) < max_categories_to_return:
135
+ categories_seen.add(class_name)
136
+ filtered_objects.append(obj)
137
+ else:
138
+ # 已見過的類別,直接添加
139
+ filtered_objects.append(obj)
140
+
141
+ return filtered_objects
142
+
143
+ return prominent_objects
144
+
145
+ except Exception as e:
146
+ self.logger.error(f"Error calculating prominent objects: {str(e)}")
147
+ return []
scene_zone_identifier.py CHANGED
@@ -3,6 +3,9 @@ import logging
3
  import traceback
4
  import numpy as np
5
  from typing import Dict, List, Any, Optional
 
 
 
6
 
7
  logger = logging.getLogger(__name__)
8
 
@@ -10,11 +13,17 @@ class SceneZoneIdentifier:
10
  """
11
  負責不同場景類型的區域識別邏輯
12
  專注於根據場景類型執行相應的功能區域識別策略
 
13
  """
14
 
15
  def __init__(self):
16
  """初始化場景區域辨識器"""
17
  try:
 
 
 
 
 
18
  logger.info("SceneZoneIdentifier initialized successfully")
19
 
20
  except Exception as e:
@@ -39,18 +48,18 @@ class SceneZoneIdentifier:
39
  zones = {}
40
 
41
  # 主要功能區域(基於物件關聯性而非場景類型)
42
- primary_zone = self._identify_primary_functional_area(detected_objects)
43
  if primary_zone:
44
  # 基於區域內容生成描述性鍵名
45
- descriptive_key = self._generate_descriptive_zone_key_from_data(primary_zone, "primary")
46
  zones[descriptive_key] = primary_zone
47
 
48
  # 只有明確證據且物件數量足夠時創建次要功能區域
49
  if len(zones) >= 1 and len(detected_objects) >= 6:
50
- secondary_zone = self._identify_secondary_functional_area(detected_objects, zones)
51
  if secondary_zone:
52
  # 基於區域內容生成描述性鍵名
53
- descriptive_key = self._generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
54
  zones[descriptive_key] = secondary_zone
55
 
56
  logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
@@ -61,92 +70,9 @@ class SceneZoneIdentifier:
61
  logger.error(traceback.format_exc())
62
  return {}
63
 
64
- def _generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
65
- """
66
- 基於區域數據生成描述性鍵名
67
-
68
- Args:
69
- zone_data: 區域數據字典
70
- priority_level: 優先級別(primary/secondary)
71
-
72
- Returns:
73
- str: 描述性區域鍵名
74
- """
75
- try:
76
- objects = zone_data.get("objects", [])
77
- region = zone_data.get("region", "")
78
- description = zone_data.get("description", "")
79
-
80
- # 基於物件內容確定功能類型
81
- if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
82
- base_name = "dining area"
83
- elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
84
- base_name = "seating area"
85
- elif any("bed" in obj.lower() for obj in objects):
86
- base_name = "sleeping area"
87
- elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
88
- base_name = "workspace area"
89
- elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
90
- base_name = "decorative area"
91
- elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
92
- base_name = "kitchen area"
93
- else:
94
- # 基於描述內容推斷
95
- if "dining" in description.lower():
96
- base_name = "dining area"
97
- elif "seating" in description.lower() or "relaxation" in description.lower():
98
- base_name = "seating area"
99
- elif "work" in description.lower():
100
- base_name = "workspace area"
101
- elif "decorative" in description.lower():
102
- base_name = "decorative area"
103
- else:
104
- base_name = "functional area"
105
-
106
- # 為次要區域添加位置標識以區分
107
- if priority_level == "secondary" and region:
108
- spatial_context = self._get_spatial_context_description(region)
109
- if spatial_context:
110
- return f"{spatial_context} {base_name}"
111
-
112
- return base_name
113
-
114
- except Exception as e:
115
- logger.warning(f"Error generating descriptive zone key: {str(e)}")
116
- return "activity area"
117
-
118
- def _get_spatial_context_description(self, region: str) -> str:
119
- """
120
- 獲取空間上下文描述
121
-
122
- Args:
123
- region: 區域位置標識
124
-
125
- Returns:
126
- str: 空間上下文描述
127
- """
128
- try:
129
- spatial_mapping = {
130
- "top_left": "upper left",
131
- "top_center": "upper",
132
- "top_right": "upper right",
133
- "middle_left": "left side",
134
- "middle_center": "central",
135
- "middle_right": "right side",
136
- "bottom_left": "lower left",
137
- "bottom_center": "lower",
138
- "bottom_right": "lower right"
139
- }
140
-
141
- return spatial_mapping.get(region, "")
142
-
143
- except Exception as e:
144
- logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
145
- return ""
146
-
147
  def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
148
  """
149
- 識別一般戶外場景的功能區域
150
 
151
  Args:
152
  category_regions: 按類別和區域分組的物件字典
@@ -215,11 +141,11 @@ class SceneZoneIdentifier:
215
 
216
  # 針對公園區域的特殊處理
217
  if scene_type == "park_area":
218
- zones.update(self._identify_park_recreational_zones(detected_objects))
219
 
220
  # 針對停車場的特殊處理
221
  if scene_type == "parking_lot":
222
- zones.update(self._identify_parking_zones(detected_objects))
223
 
224
  logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
225
  return zones
@@ -232,7 +158,7 @@ class SceneZoneIdentifier:
232
  def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
233
  """
234
  辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
235
- 若有行人,則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
236
 
237
  Args:
238
  category_regions: 按類別和 region 分組的物件字典
@@ -251,7 +177,7 @@ class SceneZoneIdentifier:
251
  traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
252
 
253
  # 2. Step A: 無條件建立 Traffic Control Area
254
- # 把每個 region 下的紅綠燈都先分群,生成對應 zone,確保「只要偵測到紅綠燈就一定顯示」
255
  signal_regions_all = {}
256
  for t in traffic_light_objs:
257
  region = t["region"]
@@ -285,8 +211,8 @@ class SceneZoneIdentifier:
285
 
286
  # 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
287
  if pedestrian_objs:
288
- # 先呼叫 _analyze_crossing_patterns,讓它回傳「行人 + 同 region 的紅綠燈」區
289
- crossing_zones = self._analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
290
 
291
  # 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
292
  for zone_key, zone_info in crossing_zones.items():
@@ -323,8 +249,8 @@ class SceneZoneIdentifier:
323
 
324
  # 5. Step D: 分析車輛交通區域(Vehicle Zones)
325
  if vehicle_objs:
326
- traffic_zones = self._analyze_traffic_zones(vehicle_objs)
327
- # _analyze_traffic_zones 內部已用英文 debug,直接更新
328
  for zone_key, zone_info in traffic_zones.items():
329
  if zone_key in zones:
330
  suffix = 1
@@ -396,15 +322,15 @@ class SceneZoneIdentifier:
396
  # 識別車輛模式進行交通分析
397
  vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
398
  if vehicle_objs:
399
- zones.update(self._analyze_aerial_traffic_patterns(vehicle_objs))
400
 
401
  # 針對十字路口特定空中視角的處理
402
  if "intersection" in scene_type:
403
- zones.update(self._identify_aerial_intersection_features(detected_objects))
404
 
405
  # 針對廣場空中視角的處理
406
  if "plaza" in scene_type:
407
- zones.update(self._identify_aerial_plaza_features(people_objs))
408
 
409
  logger.info(f"Identified {len(zones)} aerial view zones")
410
  return zones
@@ -460,11 +386,11 @@ class SceneZoneIdentifier:
460
  "description": f"Asian commercial storefront with pedestrian activity"
461
  }
462
 
463
- # 辨識行人通道
464
- zones.update(self._identify_asian_pedestrian_pathway(detected_objects))
465
 
466
  # 辨識攤販區域(小攤/商店 - 從情境推斷)
467
- zones.update(self._identify_vendor_zones(detected_objects))
468
 
469
  # 針對夜市的特殊處理
470
  if scene_type == "asian_night_market":
@@ -521,13 +447,13 @@ class SceneZoneIdentifier:
521
  }
522
 
523
  # 識別裝飾區域,增強檢測
524
- zones.update(self._identify_upscale_decorative_zones(detected_objects))
525
 
526
  # 識別座位安排區域
527
- zones.update(self._identify_dining_seating_zones(detected_objects))
528
 
529
  # 識別服務區域(如果與餐飲區域不同)
530
- zones.update(self._identify_serving_zones(detected_objects, zones))
531
 
532
  logger.info(f"Identified {len(zones)} upscale dining zones")
533
  return zones
@@ -576,10 +502,10 @@ class SceneZoneIdentifier:
576
  }
577
 
578
  # 側邊建築區域(從場景情境推斷)
579
- zones.update(self._identify_building_zones(detected_objects))
580
 
581
  # 行人區域
582
- zones.update(self._identify_financial_pedestrian_zones(detected_objects))
583
 
584
  logger.info(f"Identified {len(zones)} financial district zones")
585
  return zones
@@ -666,7 +592,7 @@ class SceneZoneIdentifier:
666
  }
667
 
668
  # 創建相關輔助功能區,如攝影區、紀念品販賣區
669
- auxiliary_zones = self._create_landmark_auxiliary_zones(landmark, 0)
670
  if auxiliary_zones:
671
  landmark_zones.update(auxiliary_zones)
672
 
@@ -678,357 +604,10 @@ class SceneZoneIdentifier:
678
  logger.error(traceback.format_exc())
679
  return {}
680
 
681
-
682
- def _identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
683
- """
684
- 識別主要功能區域,基於最強的物件關聯性組合
685
- 採用通用邏輯處理各種室內場景
686
-
687
- Args:
688
- detected_objects: 檢測到的物件列表
689
-
690
- Returns:
691
- 主要功能區域字典或None
692
- """
693
- try:
694
- # 用餐區域檢測(桌椅組合)
695
- dining_area = self._detect_functional_combination(
696
- detected_objects,
697
- primary_objects=[60], # dining table
698
- supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
699
- min_supporting=2,
700
- description_template="Dining area with table and seating arrangement"
701
- )
702
- if dining_area:
703
- return dining_area
704
-
705
- # 休息區域檢測(沙發電視組合或床)
706
- seating_area = self._detect_functional_combination(
707
- detected_objects,
708
- primary_objects=[57, 59], # sofa, bed
709
- supporting_objects=[62, 58, 56], # tv, potted plant, chair
710
- min_supporting=1,
711
- description_template="Seating and relaxation area"
712
- )
713
- if seating_area:
714
- return seating_area
715
-
716
- # 工作區域檢測(電子設備與家具組合)
717
- work_area = self._detect_functional_combination(
718
- detected_objects,
719
- primary_objects=[63, 66], # laptop, keyboard
720
- supporting_objects=[60, 56, 64], # dining table, chair, mouse
721
- min_supporting=2,
722
- description_template="Workspace area with electronics and furniture"
723
- )
724
- if work_area:
725
- return work_area
726
-
727
- return None
728
-
729
- except Exception as e:
730
- logger.error(f"Error identifying primary functional area: {str(e)}")
731
- logger.error(traceback.format_exc())
732
- return None
733
-
734
- def _identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
735
- """
736
- 識別次要功能區域,避免與主要區域重疊
737
-
738
- Args:
739
- detected_objects: 檢測到的物件列表
740
- existing_zones: 已存在的功能區域
741
-
742
- Returns:
743
- 次要功能區域字典或None
744
- """
745
- try:
746
- # 獲取已使用的區域
747
- used_regions = set(zone.get("region") for zone in existing_zones.values())
748
-
749
- # 裝飾區域檢測(植物集中區域)
750
- decorative_area = self._detect_functional_combination(
751
- detected_objects,
752
- primary_objects=[58], # potted plant
753
- supporting_objects=[75], # vase
754
- min_supporting=0,
755
- min_primary=3, # 至少需要3個植物
756
- description_template="Decorative area with plants and ornamental items",
757
- exclude_regions=used_regions
758
- )
759
- if decorative_area:
760
- return decorative_area
761
-
762
- # 儲存區域檢測(廚房電器組合)
763
- storage_area = self._detect_functional_combination(
764
- detected_objects,
765
- primary_objects=[72, 68, 69], # refrigerator, microwave, oven
766
- supporting_objects=[71], # sink
767
- min_supporting=0,
768
- min_primary=2,
769
- description_template="Kitchen appliance and storage area",
770
- exclude_regions=used_regions
771
- )
772
- if storage_area:
773
- return storage_area
774
-
775
- return None
776
-
777
- except Exception as e:
778
- logger.error(f"Error identifying secondary functional area: {str(e)}")
779
- logger.error(traceback.format_exc())
780
- return None
781
-
782
- def _detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
783
- supporting_objects: List[int], min_supporting: int,
784
- description_template: str, min_primary: int = 1,
785
- exclude_regions: set = None) -> Dict:
786
- """
787
- 通用的功能組合檢測方法
788
- 基於主要物件和支持物件的組合判斷功能區域
789
-
790
- Args:
791
- detected_objects: 檢測到的物件列表
792
- primary_objects: 主要物件的class_id列表
793
- supporting_objects: 支持物件的class_id列表
794
- min_supporting: 最少需要的支持物件數量
795
- description_template: 描述模板
796
- min_primary: 最少需要的主要物件數量
797
- exclude_regions: 需要排除的區域集合
798
-
799
- Returns:
800
- 功能區域資訊字典,如果不符合條件則返回None
801
- """
802
- try:
803
- if exclude_regions is None:
804
- exclude_regions = set()
805
-
806
- # 收集主要物件
807
- primary_objs = [obj for obj in detected_objects
808
- if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
809
-
810
- # 收集支持物件
811
- supporting_objs = [obj for obj in detected_objects
812
- if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
813
-
814
- # 檢查是否滿足最少數量要求
815
- if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
816
- return None
817
-
818
- # 按區域組織物件
819
- region_combinations = {}
820
- all_relevant_objs = primary_objs + supporting_objs
821
-
822
- for obj in all_relevant_objs:
823
- region = obj.get("region")
824
-
825
- # 排除指定區域
826
- if region in exclude_regions:
827
- continue
828
-
829
- if region not in region_combinations:
830
- region_combinations[region] = {"primary": [], "supporting": [], "all": []}
831
-
832
- region_combinations[region]["all"].append(obj)
833
-
834
- if obj.get("class_id") in primary_objects:
835
- region_combinations[region]["primary"].append(obj)
836
- else:
837
- region_combinations[region]["supporting"].append(obj)
838
-
839
- # 找到最佳區域組合
840
- best_region = None
841
- best_score = 0
842
-
843
- for region, objs in region_combinations.items():
844
- # 計算該區域的評分
845
- primary_count = len(objs["primary"])
846
- supporting_count = len(objs["supporting"])
847
-
848
- # 必須滿足最低要求
849
- if primary_count < min_primary or supporting_count < min_supporting:
850
- continue
851
-
852
- # 計算組合評分(主要物件權重較高)
853
- score = primary_count * 2 + supporting_count
854
-
855
- if score > best_score:
856
- best_score = score
857
- best_region = region
858
-
859
- if best_region is None:
860
- return None
861
-
862
- best_combination = region_combinations[best_region]
863
- all_objects = [obj["class_name"] for obj in best_combination["all"]]
864
-
865
- return {
866
- "region": best_region,
867
- "objects": all_objects,
868
- "description": description_template
869
- }
870
-
871
- except Exception as e:
872
- logger.error(f"Error detecting functional combination: {str(e)}")
873
- logger.error(traceback.format_exc())
874
- return None
875
-
876
- def _analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
877
- """
878
- Analyze pedestrian crossing patterns to identify crossing zones.
879
- 若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
880
-
881
- Args:
882
- pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
883
- traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
884
-
885
- Returns:
886
- crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
887
- """
888
- try:
889
- crossing_zones = {}
890
-
891
- # 如果沒有任何行人,就不辨識任何 crossing zone
892
- if not pedestrians:
893
- return crossing_zones
894
-
895
- # (1) 按照 region 分組行人
896
- pedestrian_regions = {}
897
- for p in pedestrians:
898
- region = p["region"]
899
- pedestrian_regions.setdefault(region, []).append(p)
900
-
901
- # (2) 針對每個 region,看是否同時有紅綠燈
902
- # 建立一個 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
903
- combined_regions = {}
904
- for region, peds in pedestrian_regions.items():
905
- # 取得該 region 下所有紅綠燈
906
- tls_in_region = [t for t in traffic_lights if t["region"] == region]
907
- combined_regions[region] = {
908
- "pedestrians": peds,
909
- "traffic_lights": tls_in_region
910
- }
911
-
912
- # (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
913
- sorted_regions = sorted(
914
- combined_regions.items(),
915
- key=lambda x: len(x[1]["pedestrians"]),
916
- reverse=True
917
- )
918
-
919
- # (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
920
- for idx, (region, group) in enumerate(sorted_regions[:2]):
921
- peds = group["pedestrians"]
922
- tls = group["traffic_lights"]
923
- has_nearby_signals = len(tls) > 0
924
-
925
- # 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
926
- direction = self._get_directional_description(region)
927
- if direction and direction != "central":
928
- zone_name = f"{direction} crossing area"
929
- else:
930
- zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
931
-
932
- # 組合 description
933
- description = f"Pedestrian crossing area with {len(peds)} "
934
- description += "person" if len(peds) == 1 else "people"
935
- if direction:
936
- description += f" in {direction} direction"
937
- if has_nearby_signals:
938
- description += " near traffic signals"
939
-
940
- # ======= 將行人 + 同區紅綠燈一併放入 objects =======
941
- obj_list = ["pedestrian"] * len(peds)
942
- if has_nearby_signals:
943
- obj_list += ["traffic light"] * len(tls)
944
-
945
- crossing_zones[zone_name] = {
946
- "region": region,
947
- "objects": obj_list,
948
- "description": description
949
- }
950
-
951
- return crossing_zones
952
-
953
- except Exception as e:
954
- logger.error(f"Error in _analyze_crossing_patterns: {str(e)}")
955
- logger.error(traceback.format_exc())
956
- return {}
957
-
958
-
959
- def _analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
960
- """
961
- 分析車輛分布以識別具有方向感知的交通區域
962
-
963
- Args:
964
- vehicles: 車輛物件列表
965
-
966
- Returns:
967
- 識別出的交通區域字典
968
- """
969
- try:
970
- traffic_zones = {}
971
-
972
- if not vehicles:
973
- return traffic_zones
974
-
975
- # 按區域分組車輛
976
- vehicle_regions = {}
977
- for v in vehicles:
978
- region = v["region"]
979
- if region not in vehicle_regions:
980
- vehicle_regions[region] = []
981
- vehicle_regions[region].append(v)
982
-
983
- # 為有車輛的區域創建交通區域
984
- main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
985
-
986
- if main_traffic_region[0] is not None:
987
- region = main_traffic_region[0]
988
- vehicles_in_region = main_traffic_region[1]
989
-
990
- # 獲取車輛類型列表用於描述
991
- vehicle_types = [v["class_name"] for v in vehicles_in_region]
992
- unique_types = list(set(vehicle_types))
993
-
994
- # 獲取方向描述
995
- direction = self._get_directional_description(region)
996
-
997
- # 創建描述性區域
998
- traffic_zones["vehicle_zone"] = {
999
- "region": region,
1000
- "objects": vehicle_types,
1001
- "description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
1002
- (f" in {direction} area" if direction else "")
1003
- }
1004
-
1005
- # 如果車輛分布在多個區域,創建次要區域
1006
- if len(vehicle_regions) > 1:
1007
- # 獲取第二大車輛聚集區域
1008
- sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
1009
- if len(sorted_regions) > 1:
1010
- second_region, second_vehicles = sorted_regions[1]
1011
- direction = self._get_directional_description(second_region)
1012
- vehicle_types = [v["class_name"] for v in second_vehicles]
1013
- unique_types = list(set(vehicle_types))
1014
-
1015
- traffic_zones["secondary_vehicle_zone"] = {
1016
- "region": second_region,
1017
- "objects": vehicle_types,
1018
- "description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
1019
- (f" in {direction} direction" if direction else "")
1020
- }
1021
-
1022
- return traffic_zones
1023
-
1024
- except Exception as e:
1025
- logger.error(f"Error analyzing traffic zones: {str(e)}")
1026
- logger.error(traceback.format_exc())
1027
- return {}
1028
-
1029
  def _get_directional_description(self, region: str) -> str:
1030
  """
1031
  將區域名稱轉換為方位描述(東西南北)
 
1032
 
1033
  Args:
1034
  region: 區域名稱
@@ -1061,668 +640,3 @@ class SceneZoneIdentifier:
1061
  except Exception as e:
1062
  logger.error(f"Error getting directional description for region '{region}': {str(e)}")
1063
  return "central"
1064
-
1065
- def _identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
1066
- """
1067
- 識別公園的休閒活動區域
1068
-
1069
- Args:
1070
- detected_objects: 檢測到的物件列表
1071
-
1072
- Returns:
1073
- 休閒區域字典
1074
- """
1075
- try:
1076
- zones = {}
1077
-
1078
- # 尋找休閒物件(運動球、風箏等)
1079
- rec_items = []
1080
- rec_regions = {}
1081
-
1082
- for obj in detected_objects:
1083
- if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
1084
- region = obj["region"]
1085
- if region not in rec_regions:
1086
- rec_regions[region] = []
1087
- rec_regions[region].append(obj)
1088
- rec_items.append(obj["class_name"])
1089
-
1090
- if rec_items:
1091
- main_rec_region = max(rec_regions.items(),
1092
- key=lambda x: len(x[1]),
1093
- default=(None, []))
1094
-
1095
- if main_rec_region[0] is not None:
1096
- zones["recreational_zone"] = {
1097
- "region": main_rec_region[0],
1098
- "objects": list(set(rec_items)),
1099
- "description": f"Recreational area with {', '.join(list(set(rec_items)))}"
1100
- }
1101
-
1102
- return zones
1103
-
1104
- except Exception as e:
1105
- logger.error(f"Error identifying park recreational zones: {str(e)}")
1106
- logger.error(traceback.format_exc())
1107
- return {}
1108
-
1109
- def _identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
1110
- """
1111
- 停車場的停車區域
1112
-
1113
- Args:
1114
- detected_objects: 檢測到的物件列表
1115
-
1116
- Returns:
1117
- 停車區域字典
1118
- """
1119
- try:
1120
- zones = {}
1121
-
1122
- # 尋找停放的汽車
1123
- car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
1124
-
1125
- if len(car_objs) >= 3:
1126
- # 檢查汽車是否按模式排列(簡化)
1127
- car_positions = [obj["normalized_center"] for obj in car_objs]
1128
-
1129
- # 通過分析垂直位置檢查行模式
1130
- y_coords = [pos[1] for pos in car_positions]
1131
- y_clusters = {}
1132
-
1133
- # 簡化聚類 - 按相似y坐標分組汽車
1134
- for i, y in enumerate(y_coords):
1135
- assigned = False
1136
- for cluster_y in y_clusters.keys():
1137
- if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
1138
- y_clusters[cluster_y].append(i)
1139
- assigned = True
1140
- break
1141
-
1142
- if not assigned:
1143
- y_clusters[y] = [i]
1144
-
1145
- # 如果有行模式
1146
- if max(len(indices) for indices in y_clusters.values()) >= 2:
1147
- zones["parking_row"] = {
1148
- "region": "central",
1149
- "objects": ["car"] * len(car_objs),
1150
- "description": f"Organized parking area with vehicles arranged in rows"
1151
- }
1152
- else:
1153
- zones["parking_area"] = {
1154
- "region": "wide",
1155
- "objects": ["car"] * len(car_objs),
1156
- "description": f"Parking area with {len(car_objs)} vehicles"
1157
- }
1158
-
1159
- return zones
1160
-
1161
- except Exception as e:
1162
- logger.error(f"Error identifying parking zones: {str(e)}")
1163
- logger.error(traceback.format_exc())
1164
- return {}
1165
-
1166
- def _analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
1167
- """
1168
- 分析空中視角的車輛交通模式
1169
-
1170
- Args:
1171
- vehicle_objs: 車輛物件列表
1172
-
1173
- Returns:
1174
- 交通模式區域字典
1175
- """
1176
- try:
1177
- zones = {}
1178
-
1179
- if not vehicle_objs:
1180
- return zones
1181
-
1182
- # 將位置轉換為數組進行模式分析
1183
- positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
1184
-
1185
- if len(positions) >= 2:
1186
- # 計算分布指標
1187
- x_coords = positions[:, 0]
1188
- y_coords = positions[:, 1]
1189
-
1190
- x_mean = np.mean(x_coords)
1191
- y_mean = np.mean(y_coords)
1192
- x_std = np.std(x_coords)
1193
- y_std = np.std(y_coords)
1194
-
1195
- # 判斷車輛是否組織成車道
1196
- if x_std < y_std * 0.5:
1197
- # 車輛垂直對齊 - 表示南北交通
1198
- zones["vertical_traffic_flow"] = {
1199
- "region": "central_vertical",
1200
- "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
1201
- "description": "North-south traffic flow visible from aerial view"
1202
- }
1203
- elif y_std < x_std * 0.5:
1204
- # 車輛水平對齊 - 表示東西交通
1205
- zones["horizontal_traffic_flow"] = {
1206
- "region": "central_horizontal",
1207
- "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
1208
- "description": "East-west traffic flow visible from aerial view"
1209
- }
1210
- else:
1211
- # 車輛多方向 - 表示十字路口
1212
- zones["intersection_traffic"] = {
1213
- "region": "central",
1214
- "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
1215
- "description": "Multi-directional traffic at intersection visible from aerial view"
1216
- }
1217
-
1218
- return zones
1219
-
1220
- except Exception as e:
1221
- logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
1222
- logger.error(traceback.format_exc())
1223
- return {}
1224
-
1225
- def _identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
1226
- """
1227
- 空中視角十字路口特徵
1228
-
1229
- Args:
1230
- detected_objects: 檢測到的物件列表
1231
-
1232
- Returns:
1233
- 十字路口特徵區域字典
1234
- """
1235
- try:
1236
- zones = {}
1237
-
1238
- # 檢查交通信號
1239
- traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
1240
- if traffic_light_objs:
1241
- zones["traffic_control_pattern"] = {
1242
- "region": "intersection",
1243
- "objects": ["traffic light"] * len(traffic_light_objs),
1244
- "description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
1245
- }
1246
-
1247
- # 人行道從空中視角的情境推斷
1248
- zones["crossing_pattern"] = {
1249
- "region": "central",
1250
- "objects": ["inferred crosswalk"],
1251
- "description": "Crossing pattern visible from aerial perspective"
1252
- }
1253
-
1254
- return zones
1255
-
1256
- except Exception as e:
1257
- logger.error(f"Error identifying aerial intersection features: {str(e)}")
1258
- logger.error(traceback.format_exc())
1259
- return {}
1260
-
1261
- def _identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
1262
- """
1263
- 識別空中視角廣場特徵
1264
-
1265
- Args:
1266
- people_objs: 行人物件列表
1267
-
1268
- Returns:
1269
- 廣場特徵區域字典
1270
- """
1271
- try:
1272
- zones = {}
1273
-
1274
- if people_objs:
1275
- # 檢查人群是否聚集在中央區域
1276
- central_people = [obj for obj in people_objs
1277
- if "middle" in obj["region"]]
1278
-
1279
- if central_people:
1280
- zones["central_gathering"] = {
1281
- "region": "middle_center",
1282
- "objects": ["person"] * len(central_people),
1283
- "description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
1284
- }
1285
-
1286
- return zones
1287
-
1288
- except Exception as e:
1289
- logger.error(f"Error identifying aerial plaza features: {str(e)}")
1290
- logger.error(traceback.format_exc())
1291
- return {}
1292
-
1293
- def _identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
1294
- """
1295
- 亞洲文化場景中的行人通道
1296
-
1297
- Args:
1298
- detected_objects: 檢測到的物件列表
1299
-
1300
- Returns:
1301
- 行人通道區域字典
1302
- """
1303
- try:
1304
- zones = {}
1305
-
1306
- pathway_items = []
1307
- pathway_regions = {}
1308
-
1309
- # 提取人群用於通道分析
1310
- people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
1311
-
1312
- # 分析人群是否形成線形(商業街的特徵)
1313
- people_positions = [obj["normalized_center"] for obj in people_objs]
1314
-
1315
- structured_path = False
1316
- path_direction = "meandering"
1317
-
1318
- if len(people_positions) >= 3:
1319
- # 檢查人群是否沿相似y坐標排列(水平路徑)
1320
- y_coords = [pos[1] for pos in people_positions]
1321
- y_mean = sum(y_coords) / len(y_coords)
1322
- y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
1323
-
1324
- horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
1325
-
1326
- # 檢查人群是否沿相似x坐標排列(垂直路徑)
1327
- x_coords = [pos[0] for pos in people_positions]
1328
- x_mean = sum(x_coords) / len(x_coords)
1329
- x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
1330
-
1331
- vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
1332
-
1333
- structured_path = horizontal_path or vertical_path
1334
- path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
1335
-
1336
- # 收集通道物件(人、自行車、摩托車在中間區域)
1337
- for obj in detected_objects:
1338
- if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
1339
- y_pos = obj["normalized_center"][1]
1340
- # 按垂直位置分組(圖像中間可能是通道)
1341
- if 0.25 <= y_pos <= 0.75:
1342
- region = obj["region"]
1343
- if region not in pathway_regions:
1344
- pathway_regions[region] = []
1345
- pathway_regions[region].append(obj)
1346
- pathway_items.append(obj["class_name"])
1347
-
1348
- if pathway_items:
1349
- path_desc = "Pedestrian walkway with people moving through the commercial area"
1350
- if structured_path:
1351
- path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
1352
-
1353
- zones["pedestrian_pathway"] = {
1354
- "region": "middle_center", # 假設:通道通常在中間
1355
- "objects": list(set(pathway_items)),
1356
- "description": path_desc
1357
- }
1358
-
1359
- return zones
1360
-
1361
- except Exception as e:
1362
- logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
1363
- logger.error(traceback.format_exc())
1364
- return {}
1365
-
1366
- def _identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
1367
- """
1368
- 識別攤販區域
1369
-
1370
- Args:
1371
- detected_objects: 檢測到的物件列表
1372
-
1373
- Returns:
1374
- 攤販區域字典
1375
- """
1376
- try:
1377
- zones = {}
1378
-
1379
- # 識別攤販區域(小攤/商店 - 從情境推斷)
1380
- has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
1381
- has_people = any(obj["class_id"] == 0 for obj in detected_objects)
1382
-
1383
- if has_small_objects and has_people:
1384
- # 可能的攤販區域是人群和小物件聚集的地方
1385
- small_obj_regions = {}
1386
-
1387
- for obj in detected_objects:
1388
- if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
1389
- region = obj["region"]
1390
- if region not in small_obj_regions:
1391
- small_obj_regions[region] = []
1392
- small_obj_regions[region].append(obj)
1393
-
1394
- if small_obj_regions:
1395
- main_vendor_region = max(small_obj_regions.items(),
1396
- key=lambda x: len(x[1]),
1397
- default=(None, []))
1398
-
1399
- if main_vendor_region[0] is not None:
1400
- vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
1401
- zones["vendor_zone"] = {
1402
- "region": main_vendor_region[0],
1403
- "objects": list(set(vendor_items)),
1404
- "description": "Vendor or market stall area with small merchandise"
1405
- }
1406
-
1407
- return zones
1408
-
1409
- except Exception as e:
1410
- logger.error(f"Error identifying vendor zones: {str(e)}")
1411
- logger.error(traceback.format_exc())
1412
- return {}
1413
-
1414
- def _identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
1415
- """
1416
- 識別高級餐飲的裝飾區域
1417
-
1418
- Args:
1419
- detected_objects: 檢測到的物件列表
1420
-
1421
- Returns:
1422
- 裝飾區域字典
1423
- """
1424
- try:
1425
- zones = {}
1426
-
1427
- decor_items = []
1428
- decor_regions = {}
1429
-
1430
- # 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
1431
- for obj in detected_objects:
1432
- if obj["class_id"] in [75, 40]: # Vase, wine glass
1433
- region = obj["region"]
1434
- if region not in decor_regions:
1435
- decor_regions[region] = []
1436
- decor_regions[region].append(obj)
1437
- decor_items.append(obj["class_name"])
1438
-
1439
- if decor_items:
1440
- main_decor_region = max(decor_regions.items(),
1441
- key=lambda x: len(x[1]),
1442
- default=(None, []))
1443
-
1444
- if main_decor_region[0] is not None:
1445
- zones["decorative_zone"] = {
1446
- "region": main_decor_region[0],
1447
- "objects": list(set(decor_items)),
1448
- "description": f"Decorative area with {', '.join(list(set(decor_items)))}"
1449
- }
1450
-
1451
- return zones
1452
-
1453
- except Exception as e:
1454
- logger.error(f"Error identifying upscale decorative zones: {str(e)}")
1455
- logger.error(traceback.format_exc())
1456
- return {}
1457
-
1458
- def _identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
1459
- """
1460
- 識別餐廳座位安排區域
1461
-
1462
- Args:
1463
- detected_objects: 檢測到的物件列表
1464
-
1465
- Returns:
1466
- 座位區域字典
1467
- """
1468
- try:
1469
- zones = {}
1470
-
1471
- # 識別座位安排區域
1472
- chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
1473
- if len(chairs) >= 2:
1474
- chair_regions = {}
1475
- for obj in chairs:
1476
- region = obj["region"]
1477
- if region not in chair_regions:
1478
- chair_regions[region] = []
1479
- chair_regions[region].append(obj)
1480
-
1481
- if chair_regions:
1482
- main_seating_region = max(chair_regions.items(),
1483
- key=lambda x: len(x[1]),
1484
- default=(None, []))
1485
-
1486
- if main_seating_region[0] is not None:
1487
- zones["dining_seating_zone"] = {
1488
- "region": main_seating_region[0],
1489
- "objects": ["chair"] * len(main_seating_region[1]),
1490
- "description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
1491
- }
1492
-
1493
- return zones
1494
-
1495
- except Exception as e:
1496
- logger.error(f"Error identifying dining seating zones: {str(e)}")
1497
- logger.error(traceback.format_exc())
1498
- return {}
1499
-
1500
- def _identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
1501
- """
1502
- 識別服務區域
1503
-
1504
- Args:
1505
- detected_objects: 檢測到的物件列表
1506
- existing_zones: 已存在的功能區域
1507
-
1508
- Returns:
1509
- 服務區域字典
1510
- """
1511
- try:
1512
- zones = {}
1513
-
1514
- serving_items = []
1515
- serving_regions = {}
1516
-
1517
- # 服務區域可能有瓶子、碗、容器
1518
- for obj in detected_objects:
1519
- if obj["class_id"] in [39, 45]: # Bottle, bowl
1520
- # 檢查是否在與主餐桌不同的區域
1521
- if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
1522
- region = obj["region"]
1523
- if region not in serving_regions:
1524
- serving_regions[region] = []
1525
- serving_regions[region].append(obj)
1526
- serving_items.append(obj["class_name"])
1527
-
1528
- if serving_items:
1529
- main_serving_region = max(serving_regions.items(),
1530
- key=lambda x: len(x[1]),
1531
- default=(None, []))
1532
-
1533
- if main_serving_region[0] is not None:
1534
- zones["serving_zone"] = {
1535
- "region": main_serving_region[0],
1536
- "objects": list(set(serving_items)),
1537
- "description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
1538
- }
1539
-
1540
- return zones
1541
-
1542
- except Exception as e:
1543
- logger.error(f"Error identifying serving zones: {str(e)}")
1544
- logger.error(traceback.format_exc())
1545
- return {}
1546
-
1547
- def _identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
1548
- """
1549
- 識別建築區域(從場景情境推斷)
1550
-
1551
- Args:
1552
- detected_objects: 檢測到的物件列表
1553
-
1554
- Returns:
1555
- 建築區域字典
1556
- """
1557
- try:
1558
- zones = {}
1559
-
1560
- # 側邊建築區域(從場景情境推斷)
1561
- # 檢查是否有實際可能包含建築物的區域
1562
- left_side_regions = ["top_left", "middle_left", "bottom_left"]
1563
- right_side_regions = ["top_right", "middle_right", "bottom_right"]
1564
-
1565
- # 檢查左側
1566
- left_building_evidence = True
1567
- for region in left_side_regions:
1568
- # 如果此區域有很多車輛或人群,不太可能是建築物
1569
- vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
1570
- for obj in detected_objects)
1571
- people_in_region = any(obj["region"] == region and obj["class_id"] == 0
1572
- for obj in detected_objects)
1573
-
1574
- if vehicle_in_region or people_in_region:
1575
- left_building_evidence = False
1576
- break
1577
-
1578
- # 檢查右側
1579
- right_building_evidence = True
1580
- for region in right_side_regions:
1581
- # 如果此區域有很多車輛或人群,不太可能是建築物
1582
- vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
1583
- for obj in detected_objects)
1584
- people_in_region = any(obj["region"] == region and obj["class_id"] == 0
1585
- for obj in detected_objects)
1586
-
1587
- if vehicle_in_region or people_in_region:
1588
- right_building_evidence = False
1589
- break
1590
-
1591
- # 如果證據支持,添加建築區域
1592
- if left_building_evidence:
1593
- zones["building_zone_left"] = {
1594
- "region": "middle_left",
1595
- "objects": ["building"], # 推斷
1596
- "description": "Tall buildings line the left side of the street"
1597
- }
1598
-
1599
- if right_building_evidence:
1600
- zones["building_zone_right"] = {
1601
- "region": "middle_right",
1602
- "objects": ["building"], # 推斷
1603
- "description": "Tall buildings line the right side of the street"
1604
- }
1605
-
1606
- return zones
1607
-
1608
- except Exception as e:
1609
- logger.error(f"Error identifying building zones: {str(e)}")
1610
- logger.error(traceback.format_exc())
1611
- return {}
1612
-
1613
- def _identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
1614
- """
1615
- 識別金融區的行人區域
1616
-
1617
- Args:
1618
- detected_objects: 檢測到的物件列表
1619
-
1620
- Returns:
1621
- 行人區域字典
1622
- """
1623
- try:
1624
- zones = {}
1625
-
1626
- # 識別行人區域(如果有人群)
1627
- people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
1628
- if people_objs:
1629
- people_regions = {}
1630
- for obj in people_objs:
1631
- region = obj["region"]
1632
- if region not in people_regions:
1633
- people_regions[region] = []
1634
- people_regions[region].append(obj)
1635
-
1636
- if people_regions:
1637
- main_pedestrian_region = max(people_regions.items(),
1638
- key=lambda x: len(x[1]),
1639
- default=(None, []))
1640
-
1641
- if main_pedestrian_region[0] is not None:
1642
- zones["pedestrian_zone"] = {
1643
- "region": main_pedestrian_region[0],
1644
- "objects": ["person"] * len(main_pedestrian_region[1]),
1645
- "description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
1646
- }
1647
-
1648
- return zones
1649
-
1650
- except Exception as e:
1651
- logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
1652
- logger.error(traceback.format_exc())
1653
- return {}
1654
-
1655
- def _create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
1656
- """
1657
- 創建地標相關的輔助區域(攝影區、紀念品區等)
1658
-
1659
- Args:
1660
- landmark: 地標物件字典
1661
- index: 地標索引
1662
-
1663
- Returns:
1664
- 輔助區域字典
1665
- """
1666
- try:
1667
- auxiliary_zones = {}
1668
- landmark_region = landmark.get("region", "middle_center")
1669
- landmark_name = landmark.get("class_name", "Landmark")
1670
-
1671
- # 創建攝影區
1672
- # 根據地標位置調整攝影區位置(地標前方通常是攝影區)
1673
- region_mapping = {
1674
- "top_left": "bottom_right",
1675
- "top_center": "bottom_center",
1676
- "top_right": "bottom_left",
1677
- "middle_left": "middle_right",
1678
- "middle_center": "bottom_center",
1679
- "middle_right": "middle_left",
1680
- "bottom_left": "top_right",
1681
- "bottom_center": "top_center",
1682
- "bottom_right": "top_left"
1683
- }
1684
-
1685
- photo_region = region_mapping.get(landmark_region, landmark_region)
1686
-
1687
- photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
1688
- auxiliary_zones[photo_key] = {
1689
- "name": f"{landmark_name} Photography Spot",
1690
- "description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
1691
- "objects": ["camera", "person", "cell phone"],
1692
- "region": photo_region,
1693
- "primary_function": "Tourist photography"
1694
- }
1695
-
1696
- # 如果是著名地標,可能有紀念品販售區
1697
- if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
1698
- # 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
1699
- adjacent_regions = {
1700
- "top_left": ["top_center", "middle_left"],
1701
- "top_center": ["top_left", "top_right"],
1702
- "top_right": ["top_center", "middle_right"],
1703
- "middle_left": ["top_left", "bottom_left"],
1704
- "middle_center": ["middle_left", "middle_right"],
1705
- "middle_right": ["top_right", "bottom_right"],
1706
- "bottom_left": ["middle_left", "bottom_center"],
1707
- "bottom_center": ["bottom_left", "bottom_right"],
1708
- "bottom_right": ["bottom_center", "middle_right"]
1709
- }
1710
-
1711
- if landmark_region in adjacent_regions:
1712
- souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
1713
-
1714
- souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
1715
- auxiliary_zones[souvenir_key] = {
1716
- "name": f"{landmark_name} Souvenir Area",
1717
- "description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
1718
- "objects": ["person", "handbag", "backpack"],
1719
- "region": souvenir_region,
1720
- "primary_function": "Tourism commerce"
1721
- }
1722
-
1723
- return auxiliary_zones
1724
-
1725
- except Exception as e:
1726
- logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
1727
- logger.error(traceback.format_exc())
1728
- return {}
 
3
  import traceback
4
  import numpy as np
5
  from typing import Dict, List, Any, Optional
6
+ from functional_zone_detector import FunctionalZoneDetector
7
+ from pattern_analyzer import PatternAnalyzer
8
+ from specialized_scene_processor import SpecializedSceneProcessor
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
13
  """
14
  負責不同場景類型的區域識別邏輯
15
  專注於根據場景類型執行相應的功能區域識別策略
16
+ 整合所有專門的區域辨識組件,主要須整合至SpatialAnalyzer
17
  """
18
 
19
  def __init__(self):
20
  """初始化場景區域辨識器"""
21
  try:
22
+ # 初始化各個專門組件
23
+ self.functional_detector = FunctionalZoneDetector()
24
+ self.pattern_analyzer = PatternAnalyzer()
25
+ self.scene_processor = SpecializedSceneProcessor()
26
+
27
  logger.info("SceneZoneIdentifier initialized successfully")
28
 
29
  except Exception as e:
 
48
  zones = {}
49
 
50
  # 主要功能區域(基於物件關聯性而非場景類型)
51
+ primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
52
  if primary_zone:
53
  # 基於區域內容生成描述性鍵名
54
+ descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
55
  zones[descriptive_key] = primary_zone
56
 
57
  # 只有明確證據且物件數量足夠時創建次要功能區域
58
  if len(zones) >= 1 and len(detected_objects) >= 6:
59
+ secondary_zone = self.functional_detector.identify_secondary_functional_area(detected_objects, zones)
60
  if secondary_zone:
61
  # 基於區域內容生成描述性鍵名
62
+ descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
63
  zones[descriptive_key] = secondary_zone
64
 
65
  logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
 
70
  logger.error(traceback.format_exc())
71
  return {}
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
74
  """
75
+ 辨識一般戶外場景的功能區域
76
 
77
  Args:
78
  category_regions: 按類別和區域分組的物件字典
 
141
 
142
  # 針對公園區域的特殊處理
143
  if scene_type == "park_area":
144
+ zones.update(self.pattern_analyzer.identify_park_recreational_zones(detected_objects))
145
 
146
  # 針對停車場的特殊處理
147
  if scene_type == "parking_lot":
148
+ zones.update(self.pattern_analyzer.identify_parking_zones(detected_objects))
149
 
150
  logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
151
  return zones
 
158
  def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
159
  """
160
  辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
161
+ 如果有行人,則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
162
 
163
  Args:
164
  category_regions: 按類別和 region 分組的物件字典
 
177
  traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
178
 
179
  # 2. Step A: 無條件建立 Traffic Control Area
180
+ # 把每個 region 下的紅綠燈都先分群,生成對應 zone,確保"只要偵測到紅綠燈就一定顯示"
181
  signal_regions_all = {}
182
  for t in traffic_light_objs:
183
  region = t["region"]
 
211
 
212
  # 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
213
  if pedestrian_objs:
214
+ # 先呼叫 analyze_crossing_patterns,讓它回傳「行人 + 同 region 的紅綠燈」區
215
+ crossing_zones = self.pattern_analyzer.analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
216
 
217
  # 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
218
  for zone_key, zone_info in crossing_zones.items():
 
249
 
250
  # 5. Step D: 分析車輛交通區域(Vehicle Zones)
251
  if vehicle_objs:
252
+ traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
253
+ # analyze_traffic_zones 內部已用英文 debug,直接更新
254
  for zone_key, zone_info in traffic_zones.items():
255
  if zone_key in zones:
256
  suffix = 1
 
322
  # 識別車輛模式進行交通分析
323
  vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
324
  if vehicle_objs:
325
+ zones.update(self.pattern_analyzer.analyze_aerial_traffic_patterns(vehicle_objs))
326
 
327
  # 針對十字路口特定空中視角的處理
328
  if "intersection" in scene_type:
329
+ zones.update(self.scene_processor.identify_aerial_intersection_features(detected_objects))
330
 
331
  # 針對廣場空中視角的處理
332
  if "plaza" in scene_type:
333
+ zones.update(self.scene_processor.identify_aerial_plaza_features(people_objs))
334
 
335
  logger.info(f"Identified {len(zones)} aerial view zones")
336
  return zones
 
386
  "description": f"Asian commercial storefront with pedestrian activity"
387
  }
388
 
389
+ # 辨識行人通道
390
+ zones.update(self.scene_processor.identify_asian_pedestrian_pathway(detected_objects))
391
 
392
  # 辨識攤販區域(小攤/商店 - 從情境推斷)
393
+ zones.update(self.scene_processor.identify_vendor_zones(detected_objects))
394
 
395
  # 針對夜市的特殊處理
396
  if scene_type == "asian_night_market":
 
447
  }
448
 
449
  # 識別裝飾區域,增強檢測
450
+ zones.update(self.scene_processor.identify_upscale_decorative_zones(detected_objects))
451
 
452
  # 識別座位安排區域
453
+ zones.update(self.scene_processor.identify_dining_seating_zones(detected_objects))
454
 
455
  # 識別服務區域(如果與餐飲區域不同)
456
+ zones.update(self.scene_processor.identify_serving_zones(detected_objects, zones))
457
 
458
  logger.info(f"Identified {len(zones)} upscale dining zones")
459
  return zones
 
502
  }
503
 
504
  # 側邊建築區域(從場景情境推斷)
505
+ zones.update(self.scene_processor.identify_building_zones(detected_objects))
506
 
507
  # 行人區域
508
+ zones.update(self.scene_processor.identify_financial_pedestrian_zones(detected_objects))
509
 
510
  logger.info(f"Identified {len(zones)} financial district zones")
511
  return zones
 
592
  }
593
 
594
  # 創建相關輔助功能區,如攝影區、紀念品販賣區
595
+ auxiliary_zones = self.scene_processor.create_landmark_auxiliary_zones(landmark, 0)
596
  if auxiliary_zones:
597
  landmark_zones.update(auxiliary_zones)
598
 
 
604
  logger.error(traceback.format_exc())
605
  return {}
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  def _get_directional_description(self, region: str) -> str:
608
  """
609
  將區域名稱轉換為方位描述(東西南北)
610
+ 這是核心工具方法,供所有組件使用
611
 
612
  Args:
613
  region: 區域名稱
 
640
  except Exception as e:
641
  logger.error(f"Error getting directional description for region '{region}': {str(e)}")
642
  return "central"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
spatial_location_handler.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import traceback
3
+ import numpy as np
4
+ from typing import Dict, List, Optional, Any, Tuple
5
+
6
+ class SpatialLocationHandler:
7
+ """
8
+ 空間位置處理器 - 專門處理空間描述生成和排列模式分析
9
+ 負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合
10
+ """
11
+
12
+ def __init__(self, region_analyzer: Optional[Any] = None):
13
+ """
14
+ 初始化空間位置處理器
15
+
16
+ Args:
17
+ region_analyzer: RegionAnalyzer實例
18
+ """
19
+ self.logger = logging.getLogger(self.__class__.__name__)
20
+ self.region_analyzer = region_analyzer
21
+
22
+ def set_region_analyzer(self, region_analyzer: Any) -> None:
23
+ """
24
+ 設置RegionAnalyzer,用於標準化空間描述生成
25
+
26
+ Args:
27
+ region_analyzer: RegionAnalyzer實例
28
+ """
29
+ try:
30
+ self.region_analyzer = region_analyzer
31
+ self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler")
32
+ except Exception as e:
33
+ self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
34
+
35
+ def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
36
+ image_height: Optional[int] = None,
37
+ region_analyzer: Optional[Any] = None) -> str:
38
+ """
39
+ 為物件生成空間位置描述
40
+
41
+ Args:
42
+ obj: 物件字典
43
+ image_width: 可選的圖像寬度
44
+ image_height: 可選的圖像高度
45
+ region_analyzer: 可選的RegionAnalyzer實例,用於生成標準化描述
46
+
47
+ Returns:
48
+ str: 空間描述字符串,空值region時返回空字串
49
+ """
50
+ try:
51
+ region = obj.get("region") or ""
52
+ object_type = obj.get("class_name", "")
53
+
54
+ # 處理空值或無效region,直接返回空字串避免不完整描述
55
+ if not region.strip() or region == "unknown":
56
+ # 根據物件類型提供合適的預設位置描述
57
+ if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
58
+ return "positioned in the scene"
59
+ elif object_type and "person" in object_type.lower():
60
+ return "present in the area"
61
+ else:
62
+ return "located in the scene"
63
+
64
+ # 如果提供了RegionAnalyzer實例,使用其標準化方法
65
+ if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
66
+ if hasattr(region_analyzer, 'get_contextual_spatial_description'):
67
+ spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
68
+ else:
69
+ spatial_desc = region_analyzer.get_spatial_description_phrase(region)
70
+
71
+ if spatial_desc:
72
+ return spatial_desc
73
+
74
+ # 備用邏輯:使用改進的內建映射
75
+ clean_region = region.replace('_', ' ').strip().lower()
76
+
77
+ region_map = {
78
+ "top left": "in the upper left area",
79
+ "top center": "in the upper area",
80
+ "top right": "in the upper right area",
81
+ "middle left": "on the left side",
82
+ "middle center": "in the center",
83
+ "center": "in the center",
84
+ "middle right": "on the right side",
85
+ "bottom left": "in the lower left area",
86
+ "bottom center": "in the lower area",
87
+ "bottom right": "in the lower right area"
88
+ }
89
+
90
+ # 直接映射匹配
91
+ if clean_region in region_map:
92
+ return region_map[clean_region]
93
+
94
+ # 比較模糊籠統的方位匹配
95
+ if "top" in clean_region and "left" in clean_region:
96
+ return "in the upper left area"
97
+ elif "top" in clean_region and "right" in clean_region:
98
+ return "in the upper right area"
99
+ elif "bottom" in clean_region and "left" in clean_region:
100
+ return "in the lower left area"
101
+ elif "bottom" in clean_region and "right" in clean_region:
102
+ return "in the lower right area"
103
+ elif "top" in clean_region:
104
+ return "in the upper area"
105
+ elif "bottom" in clean_region:
106
+ return "in the lower area"
107
+ elif "left" in clean_region:
108
+ return "on the left side"
109
+ elif "right" in clean_region:
110
+ return "on the right side"
111
+ elif "center" in clean_region or "middle" in clean_region:
112
+ return "in the center"
113
+
114
+ # 如果region無法辨識,使用normalized_center作為備用
115
+ norm_center = obj.get("normalized_center")
116
+ if norm_center and image_width and image_height:
117
+ x_norm, y_norm = norm_center
118
+ h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
119
+ v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
120
+
121
+ if h_pos == "center" and v_pos == "center":
122
+ return "in the center"
123
+ return f"in the {v_pos} {h_pos} area"
124
+
125
+ # 如果所有方法都失敗,返回空字串
126
+ return ""
127
+
128
+ except Exception as e:
129
+ self.logger.warning(f"Error generating spatial description: {str(e)}")
130
+ return ""
131
+
132
+ def get_standardized_spatial_description(self, obj: Dict) -> str:
133
+ """
134
+ 使用RegionAnalyzer生成標準化空間描述的內部方法
135
+
136
+ Args:
137
+ obj: 物件字典
138
+
139
+ Returns:
140
+ str: 標準化空間描述,失敗時返回空字串
141
+ """
142
+ try:
143
+ if hasattr(self, 'region_analyzer') and self.region_analyzer:
144
+ region = obj.get("region", "")
145
+ object_type = obj.get("class_name", "")
146
+
147
+ if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
148
+ return self.region_analyzer.get_contextual_spatial_description(region, object_type)
149
+ elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
150
+ return self.region_analyzer.get_spatial_description_phrase(region)
151
+
152
+ return ""
153
+
154
+ except Exception as e:
155
+ self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
156
+ object_type = obj.get("class_name", "")
157
+ if object_type:
158
+ return "visible in the scene"
159
+ return "present in the view"
160
+
161
+ def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str],
162
+ detected_objects: Optional[List[Dict]],
163
+ count: int) -> Optional[str]:
164
+ """
165
+ 分析物件的空間排列模式並生成相應描述
166
+
167
+ Args:
168
+ class_name: 物件類別名稱
169
+ scene_type: 場景類型
170
+ detected_objects: 該類型的所有檢測物件
171
+ count: 物件數量
172
+
173
+ Returns:
174
+ Optional[str]: 空間排列描述,如果無法分析則返回None
175
+ """
176
+ if not detected_objects or len(detected_objects) < 2:
177
+ return None
178
+
179
+ try:
180
+ # 提取物件的標準化位置
181
+ positions = []
182
+ for obj in detected_objects:
183
+ center = obj.get("normalized_center", [0.5, 0.5])
184
+ if isinstance(center, (list, tuple)) and len(center) >= 2:
185
+ positions.append(center)
186
+
187
+ if len(positions) < 2:
188
+ return None
189
+
190
+ # 分析排列模式
191
+ arrangement_pattern = self._analyze_arrangement_pattern(positions)
192
+
193
+ # 根據物件類型和場景生成描述
194
+ return self._generate_arrangement_description(class_name, scene_type,
195
+ arrangement_pattern, count)
196
+
197
+ except Exception as e:
198
+ self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
199
+ return None
200
+
201
+ def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
202
+ """
203
+ 分析位置點的排列模式
204
+
205
+ Args:
206
+ positions: 標準化的位置座標列表
207
+
208
+ Returns:
209
+ str: 排列模式類型(linear, clustered, scattered, circular等)
210
+ """
211
+ if len(positions) < 2:
212
+ return "single"
213
+
214
+ # 轉換為numpy陣列便於計算
215
+ pos_array = np.array(positions)
216
+
217
+ # 計算位置的分布特徵
218
+ x_coords = pos_array[:, 0]
219
+ y_coords = pos_array[:, 1]
220
+
221
+ # 分析x和y方向的變異程度
222
+ x_variance = np.var(x_coords)
223
+ y_variance = np.var(y_coords)
224
+
225
+ # 計算物件間的平均距離
226
+ distances = []
227
+ for i in range(len(positions)):
228
+ for j in range(i + 1, len(positions)):
229
+ dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
230
+ (positions[i][1] - positions[j][1])**2)
231
+ distances.append(dist)
232
+
233
+ avg_distance = np.mean(distances) if distances else 0
234
+ distance_variance = np.var(distances) if distances else 0
235
+
236
+ # 判斷排列模式
237
+ if len(positions) >= 4 and self._is_circular_pattern(positions):
238
+ return "circular"
239
+ elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
240
+ return "linear"
241
+ elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
242
+ return "clustered"
243
+ elif avg_distance > 0.6: # 物件分散
244
+ return "scattered"
245
+ elif distance_variance < 0.03: # 距離一致,可能是規則排列
246
+ return "regular"
247
+ else:
248
+ return "distributed"
249
+
250
+ def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
251
+ """
252
+ 檢查位置是否形成圓形或環形排列
253
+
254
+ Args:
255
+ positions: 位置座標列表
256
+
257
+ Returns:
258
+ bool: 是否為圓形排列
259
+ """
260
+ if len(positions) < 4:
261
+ return False
262
+
263
+ try:
264
+ pos_array = np.array(positions)
265
+
266
+ # 計算中心點
267
+ center_x = np.mean(pos_array[:, 0])
268
+ center_y = np.mean(pos_array[:, 1])
269
+
270
+ # 計算每個點到中心的距離
271
+ distances_to_center = []
272
+ for pos in positions:
273
+ dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
274
+ distances_to_center.append(dist)
275
+
276
+ # 如果所有距離都相近,可能是圓形排列
277
+ distance_variance = np.var(distances_to_center)
278
+ return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
279
+
280
+ except:
281
+ return False
282
+
283
+ def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
284
+ arrangement_pattern: str, count: int) -> Optional[str]:
285
+ """
286
+ 根據物件類型、場景和排列模式生成空間描述
287
+
288
+ Args:
289
+ class_name: 物件類別名稱
290
+ scene_type: 場景類型
291
+ arrangement_pattern: 排列模式
292
+ count: 物件數量
293
+
294
+ Returns:
295
+ Optional[str]: 生成的空間排列描述
296
+ """
297
+ # 基於物件類型的描述模板
298
+ arrangement_templates = {
299
+ "chair": {
300
+ "linear": "arranged in a row",
301
+ "clustered": "grouped together for conversation",
302
+ "circular": "arranged around the table",
303
+ "scattered": "positioned throughout the space",
304
+ "regular": "evenly spaced",
305
+ "distributed": "thoughtfully positioned"
306
+ },
307
+ "dining table": {
308
+ "linear": "aligned to create a unified dining space",
309
+ "clustered": "grouped to form intimate dining areas",
310
+ "scattered": "distributed to optimize space flow",
311
+ "regular": "systematically positioned",
312
+ "distributed": "strategically placed"
313
+ },
314
+ "car": {
315
+ "linear": "parked in sequence",
316
+ "clustered": "grouped in the parking area",
317
+ "scattered": "distributed throughout the lot",
318
+ "regular": "neatly parked",
319
+ "distributed": "positioned across the area"
320
+ },
321
+ "person": {
322
+ "linear": "moving in a line",
323
+ "clustered": "gathered together",
324
+ "circular": "forming a circle",
325
+ "scattered": "spread across the area",
326
+ "distributed": "positioned throughout the scene"
327
+ }
328
+ }
329
+
330
+ # 獲取對應的描述模板
331
+ if class_name in arrangement_templates:
332
+ template_dict = arrangement_templates[class_name]
333
+ base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
334
+ else:
335
+ # 通用的排列描述
336
+ generic_templates = {
337
+ "linear": "arranged in a line",
338
+ "clustered": "grouped together",
339
+ "circular": "arranged in a circular pattern",
340
+ "scattered": "distributed across the space",
341
+ "regular": "evenly positioned",
342
+ "distributed": "thoughtfully placed"
343
+ }
344
+ base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
345
+
346
+ return base_description
specialized_scene_processor.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+ import traceback
4
+ import numpy as np
5
+ from typing import Dict, List, Any, Optional
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class SpecializedSceneProcessor:
10
+ """
11
+ 負責處理特殊場景類型和地標識別
12
+ 包含亞洲文化場景、高級餐飲、金融區、空中視角等專門處理邏輯
13
+ """
14
+
15
+ def __init__(self):
16
+ """初始化特殊場景處理器"""
17
+ try:
18
+ logger.info("SpecializedSceneProcessor initialized successfully")
19
+ except Exception as e:
20
+ logger.error(f"Failed to initialize SpecializedSceneProcessor: {str(e)}")
21
+ logger.error(traceback.format_exc())
22
+ raise
23
+
24
+ def identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
25
+ """
26
+ 空中視角十字路口特徵
27
+
28
+ Args:
29
+ detected_objects: 檢測到的物件列表
30
+
31
+ Returns:
32
+ 十字路口特徵區域字典
33
+ """
34
+ try:
35
+ zones = {}
36
+
37
+ # 檢查交通信號
38
+ traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
39
+ if traffic_light_objs:
40
+ zones["traffic_control_pattern"] = {
41
+ "region": "intersection",
42
+ "objects": ["traffic light"] * len(traffic_light_objs),
43
+ "description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
44
+ }
45
+
46
+ # 人行道從空中視角的情境推斷
47
+ zones["crossing_pattern"] = {
48
+ "region": "central",
49
+ "objects": ["inferred crosswalk"],
50
+ "description": "Crossing pattern visible from aerial perspective"
51
+ }
52
+
53
+ return zones
54
+
55
+ except Exception as e:
56
+ logger.error(f"Error identifying aerial intersection features: {str(e)}")
57
+ logger.error(traceback.format_exc())
58
+ return {}
59
+
60
+ def identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
61
+ """
62
+ 識別空中視角廣場特徵
63
+
64
+ Args:
65
+ people_objs: 行人物件列表
66
+
67
+ Returns:
68
+ 廣場特徵區域字典
69
+ """
70
+ try:
71
+ zones = {}
72
+
73
+ if people_objs:
74
+ # 檢查人群是否聚集在中央區域
75
+ central_people = [obj for obj in people_objs
76
+ if "middle" in obj["region"]]
77
+
78
+ if central_people:
79
+ zones["central_gathering"] = {
80
+ "region": "middle_center",
81
+ "objects": ["person"] * len(central_people),
82
+ "description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
83
+ }
84
+
85
+ return zones
86
+
87
+ except Exception as e:
88
+ logger.error(f"Error identifying aerial plaza features: {str(e)}")
89
+ logger.error(traceback.format_exc())
90
+ return {}
91
+
92
+ def identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
93
+ """
94
+ 亞洲文化場景中的行人通道
95
+
96
+ Args:
97
+ detected_objects: 檢測到的物件列表
98
+
99
+ Returns:
100
+ 行人通道區域字典
101
+ """
102
+ try:
103
+ zones = {}
104
+
105
+ pathway_items = []
106
+ pathway_regions = {}
107
+
108
+ # 提取人群用於通道分析
109
+ people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
110
+
111
+ # 分析人群是否形成線形(商業街的特徵)
112
+ people_positions = [obj["normalized_center"] for obj in people_objs]
113
+
114
+ structured_path = False
115
+ path_direction = "meandering"
116
+
117
+ if len(people_positions) >= 3:
118
+ # 檢查人群是否沿相似y坐標排列(水平路徑)
119
+ y_coords = [pos[1] for pos in people_positions]
120
+ y_mean = sum(y_coords) / len(y_coords)
121
+ y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
122
+
123
+ horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
124
+
125
+ # 檢查人群是否沿相似x坐標排列(垂直路徑)
126
+ x_coords = [pos[0] for pos in people_positions]
127
+ x_mean = sum(x_coords) / len(x_coords)
128
+ x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
129
+
130
+ vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
131
+
132
+ structured_path = horizontal_path or vertical_path
133
+ path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
134
+
135
+ # 收集通道物件(人、自行車、摩托車在中間區域)
136
+ for obj in detected_objects:
137
+ if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
138
+ y_pos = obj["normalized_center"][1]
139
+ # 按垂直位置分組(圖像中間可能是通道)
140
+ if 0.25 <= y_pos <= 0.75:
141
+ region = obj["region"]
142
+ if region not in pathway_regions:
143
+ pathway_regions[region] = []
144
+ pathway_regions[region].append(obj)
145
+ pathway_items.append(obj["class_name"])
146
+
147
+ if pathway_items:
148
+ path_desc = "Pedestrian walkway with people moving through the commercial area"
149
+ if structured_path:
150
+ path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
151
+
152
+ zones["pedestrian_pathway"] = {
153
+ "region": "middle_center", # 通道通常會在中間area
154
+ "objects": list(set(pathway_items)),
155
+ "description": path_desc
156
+ }
157
+
158
+ return zones
159
+
160
+ except Exception as e:
161
+ logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
162
+ logger.error(traceback.format_exc())
163
+ return {}
164
+
165
+ def identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
166
+ """
167
+ 識別攤販區域
168
+
169
+ Args:
170
+ detected_objects: 檢測到的物件列表
171
+
172
+ Returns:
173
+ 攤販區域字典
174
+ """
175
+ try:
176
+ zones = {}
177
+
178
+ # 識別攤販區域(小攤/商店 - 從情境推斷)
179
+ has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
180
+ has_people = any(obj["class_id"] == 0 for obj in detected_objects)
181
+
182
+ if has_small_objects and has_people:
183
+ # 可能的攤販區域是人群和小物件聚集的地方
184
+ small_obj_regions = {}
185
+
186
+ for obj in detected_objects:
187
+ if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
188
+ region = obj["region"]
189
+ if region not in small_obj_regions:
190
+ small_obj_regions[region] = []
191
+ small_obj_regions[region].append(obj)
192
+
193
+ if small_obj_regions:
194
+ main_vendor_region = max(small_obj_regions.items(),
195
+ key=lambda x: len(x[1]),
196
+ default=(None, []))
197
+
198
+ if main_vendor_region[0] is not None:
199
+ vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
200
+ zones["vendor_zone"] = {
201
+ "region": main_vendor_region[0],
202
+ "objects": list(set(vendor_items)),
203
+ "description": "Vendor or market stall area with small merchandise"
204
+ }
205
+
206
+ return zones
207
+
208
+ except Exception as e:
209
+ logger.error(f"Error identifying vendor zones: {str(e)}")
210
+ logger.error(traceback.format_exc())
211
+ return {}
212
+
213
+ def identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
214
+ """
215
+ 識別高級餐飲的裝飾區域
216
+
217
+ Args:
218
+ detected_objects: 檢測到的物件列表
219
+
220
+ Returns:
221
+ 裝飾區域字典
222
+ """
223
+ try:
224
+ zones = {}
225
+
226
+ decor_items = []
227
+ decor_regions = {}
228
+
229
+ # 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
230
+ for obj in detected_objects:
231
+ if obj["class_id"] in [75, 40]: # Vase, wine glass
232
+ region = obj["region"]
233
+ if region not in decor_regions:
234
+ decor_regions[region] = []
235
+ decor_regions[region].append(obj)
236
+ decor_items.append(obj["class_name"])
237
+
238
+ if decor_items:
239
+ main_decor_region = max(decor_regions.items(),
240
+ key=lambda x: len(x[1]),
241
+ default=(None, []))
242
+
243
+ if main_decor_region[0] is not None:
244
+ zones["decorative_zone"] = {
245
+ "region": main_decor_region[0],
246
+ "objects": list(set(decor_items)),
247
+ "description": f"Decorative area with {', '.join(list(set(decor_items)))}"
248
+ }
249
+
250
+ return zones
251
+
252
+ except Exception as e:
253
+ logger.error(f"Error identifying upscale decorative zones: {str(e)}")
254
+ logger.error(traceback.format_exc())
255
+ return {}
256
+
257
+ def identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
258
+ """
259
+ 識別餐廳座位安排區域
260
+
261
+ Args:
262
+ detected_objects: 檢測到的物件列表
263
+
264
+ Returns:
265
+ 座位區域字典
266
+ """
267
+ try:
268
+ zones = {}
269
+
270
+ # 識別座位安排區域
271
+ chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
272
+ if len(chairs) >= 2:
273
+ chair_regions = {}
274
+ for obj in chairs:
275
+ region = obj["region"]
276
+ if region not in chair_regions:
277
+ chair_regions[region] = []
278
+ chair_regions[region].append(obj)
279
+
280
+ if chair_regions:
281
+ main_seating_region = max(chair_regions.items(),
282
+ key=lambda x: len(x[1]),
283
+ default=(None, []))
284
+
285
+ if main_seating_region[0] is not None:
286
+ zones["dining_seating_zone"] = {
287
+ "region": main_seating_region[0],
288
+ "objects": ["chair"] * len(main_seating_region[1]),
289
+ "description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
290
+ }
291
+
292
+ return zones
293
+
294
+ except Exception as e:
295
+ logger.error(f"Error identifying dining seating zones: {str(e)}")
296
+ logger.error(traceback.format_exc())
297
+ return {}
298
+
299
+ def identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
300
+ """
301
+ 識別服務區域
302
+
303
+ Args:
304
+ detected_objects: 檢測到的物件列表
305
+ existing_zones: 已存在的功能區域
306
+
307
+ Returns:
308
+ 服務區域字典
309
+ """
310
+ try:
311
+ zones = {}
312
+
313
+ serving_items = []
314
+ serving_regions = {}
315
+
316
+ # 服務區域可能有瓶子、碗、容器
317
+ for obj in detected_objects:
318
+ if obj["class_id"] in [39, 45]: # Bottle, bowl
319
+ # 檢查是否在與主餐桌不同的區域
320
+ if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
321
+ region = obj["region"]
322
+ if region not in serving_regions:
323
+ serving_regions[region] = []
324
+ serving_regions[region].append(obj)
325
+ serving_items.append(obj["class_name"])
326
+
327
+ if serving_items:
328
+ main_serving_region = max(serving_regions.items(),
329
+ key=lambda x: len(x[1]),
330
+ default=(None, []))
331
+
332
+ if main_serving_region[0] is not None:
333
+ zones["serving_zone"] = {
334
+ "region": main_serving_region[0],
335
+ "objects": list(set(serving_items)),
336
+ "description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
337
+ }
338
+
339
+ return zones
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error identifying serving zones: {str(e)}")
343
+ logger.error(traceback.format_exc())
344
+ return {}
345
+
346
+ def identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
347
+ """
348
+ 識別建築區域(從場景情境推斷)
349
+
350
+ Args:
351
+ detected_objects: 檢測到的物件列表
352
+
353
+ Returns:
354
+ 建築區域字典
355
+ """
356
+ try:
357
+ zones = {}
358
+
359
+ # 側邊建築區域(從場景情境推斷)
360
+ # 檢查是否有實際可能包含建築物的區域
361
+ left_side_regions = ["top_left", "middle_left", "bottom_left"]
362
+ right_side_regions = ["top_right", "middle_right", "bottom_right"]
363
+
364
+ # 檢查左側
365
+ left_building_evidence = True
366
+ for region in left_side_regions:
367
+ # 如果此區域有很多車輛或人群,不太可能是建築物
368
+ vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
369
+ for obj in detected_objects)
370
+ people_in_region = any(obj["region"] == region and obj["class_id"] == 0
371
+ for obj in detected_objects)
372
+
373
+ if vehicle_in_region or people_in_region:
374
+ left_building_evidence = False
375
+ break
376
+
377
+ # 檢查右側
378
+ right_building_evidence = True
379
+ for region in right_side_regions:
380
+ # 如果此區域有很多車輛或人群,不太可能是建築物
381
+ vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
382
+ for obj in detected_objects)
383
+ people_in_region = any(obj["region"] == region and obj["class_id"] == 0
384
+ for obj in detected_objects)
385
+
386
+ if vehicle_in_region or people_in_region:
387
+ right_building_evidence = False
388
+ break
389
+
390
+ # 如果證據支持,添加建築區域
391
+ if left_building_evidence:
392
+ zones["building_zone_left"] = {
393
+ "region": "middle_left",
394
+ "objects": ["building"], # 推斷
395
+ "description": "Tall buildings line the left side of the street"
396
+ }
397
+
398
+ if right_building_evidence:
399
+ zones["building_zone_right"] = {
400
+ "region": "middle_right",
401
+ "objects": ["building"], # 推斷
402
+ "description": "Tall buildings line the right side of the street"
403
+ }
404
+
405
+ return zones
406
+
407
+ except Exception as e:
408
+ logger.error(f"Error identifying building zones: {str(e)}")
409
+ logger.error(traceback.format_exc())
410
+ return {}
411
+
412
+ def identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
413
+ """
414
+ 識別金融區的行人區域
415
+
416
+ Args:
417
+ detected_objects: 檢測到的物件列表
418
+
419
+ Returns:
420
+ 行人區域字典
421
+ """
422
+ try:
423
+ zones = {}
424
+
425
+ # 辨識行人區域(如果有人群)
426
+ people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
427
+ if people_objs:
428
+ people_regions = {}
429
+ for obj in people_objs:
430
+ region = obj["region"]
431
+ if region not in people_regions:
432
+ people_regions[region] = []
433
+ people_regions[region].append(obj)
434
+
435
+ if people_regions:
436
+ main_pedestrian_region = max(people_regions.items(),
437
+ key=lambda x: len(x[1]),
438
+ default=(None, []))
439
+
440
+ if main_pedestrian_region[0] is not None:
441
+ zones["pedestrian_zone"] = {
442
+ "region": main_pedestrian_region[0],
443
+ "objects": ["person"] * len(main_pedestrian_region[1]),
444
+ "description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
445
+ }
446
+
447
+ return zones
448
+
449
+ except Exception as e:
450
+ logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
451
+ logger.error(traceback.format_exc())
452
+ return {}
453
+
454
+ def create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
455
+ """
456
+ 創建地標相關的輔助區域(攝影區、紀念品區等)
457
+
458
+ Args:
459
+ landmark: 地標物件字典
460
+ index: 地標索引
461
+
462
+ Returns:
463
+ 輔助區域字典
464
+ """
465
+ try:
466
+ auxiliary_zones = {}
467
+ landmark_region = landmark.get("region", "middle_center")
468
+ landmark_name = landmark.get("class_name", "Landmark")
469
+
470
+ # 創建攝影區
471
+ # 根據地標位置調整攝影區位置(地標前方通常是攝影區)
472
+ region_mapping = {
473
+ "top_left": "bottom_right",
474
+ "top_center": "bottom_center",
475
+ "top_right": "bottom_left",
476
+ "middle_left": "middle_right",
477
+ "middle_center": "bottom_center",
478
+ "middle_right": "middle_left",
479
+ "bottom_left": "top_right",
480
+ "bottom_center": "top_center",
481
+ "bottom_right": "top_left"
482
+ }
483
+
484
+ photo_region = region_mapping.get(landmark_region, landmark_region)
485
+
486
+ photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
487
+ auxiliary_zones[photo_key] = {
488
+ "name": f"{landmark_name} Photography Spot",
489
+ "description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
490
+ "objects": ["camera", "person", "cell phone"],
491
+ "region": photo_region,
492
+ "primary_function": "Tourist photography"
493
+ }
494
+
495
+ # 如果是著名地標,可能有紀念品販售區
496
+ if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
497
+ # 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
498
+ adjacent_regions = {
499
+ "top_left": ["top_center", "middle_left"],
500
+ "top_center": ["top_left", "top_right"],
501
+ "top_right": ["top_center", "middle_right"],
502
+ "middle_left": ["top_left", "bottom_left"],
503
+ "middle_center": ["middle_left", "middle_right"],
504
+ "middle_right": ["top_right", "bottom_right"],
505
+ "bottom_left": ["middle_left", "bottom_center"],
506
+ "bottom_center": ["bottom_left", "bottom_right"],
507
+ "bottom_right": ["bottom_center", "middle_right"]
508
+ }
509
+
510
+ if landmark_region in adjacent_regions:
511
+ souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
512
+
513
+ souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
514
+ auxiliary_zones[souvenir_key] = {
515
+ "name": f"{landmark_name} Souvenir Area",
516
+ "description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
517
+ "objects": ["person", "handbag", "backpack"],
518
+ "region": souvenir_region,
519
+ "primary_function": "Tourism commerce"
520
+ }
521
+
522
+ return auxiliary_zones
523
+
524
+ except Exception as e:
525
+ logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
526
+ logger.error(traceback.format_exc())
527
+ return {}
statistics_processor.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, List, Optional, Any
3
+
4
+ class StatisticsProcessor:
5
+ """
6
+ 統計分析處理器 - 負責複雜的物件統計分析和數據轉換
7
+
8
+ 此類別專門處理物件統計信息的深度分析、Places365信息處理,
9
+ 以及基於統計數據生成替換內容的複雜邏輯。
10
+ """
11
+
12
+ def __init__(self):
13
+ """初始化統計分析處理器"""
14
+ self.logger = logging.getLogger(self.__class__.__name__)
15
+ self.logger.debug("StatisticsProcessor initialized successfully")
16
+
17
+ def generate_statistics_replacements(self, object_statistics: Optional[Dict]) -> Dict[str, str]:
18
+ """
19
+ 基於物體統計信息生成模板替換內容
20
+
21
+ Args:
22
+ object_statistics: 物體統計信息
23
+
24
+ Returns:
25
+ Dict[str, str]: 統計信息基礎的替換內容
26
+ """
27
+ replacements = {}
28
+
29
+ if not object_statistics:
30
+ return replacements
31
+
32
+ try:
33
+ # 處理植物元素
34
+ if "potted plant" in object_statistics:
35
+ count = object_statistics["potted plant"]["count"]
36
+ if count == 1:
37
+ replacements["plant_elements"] = "a potted plant"
38
+ elif count <= 3:
39
+ replacements["plant_elements"] = f"{count} potted plants"
40
+ else:
41
+ replacements["plant_elements"] = f"multiple potted plants ({count} total)"
42
+
43
+ # 處理座位(椅子)相關
44
+ if "chair" in object_statistics:
45
+ count = object_statistics["chair"]["count"]
46
+
47
+ # 使用統一的數字轉換邏輯
48
+ number_words = {
49
+ 1: "one", 2: "two", 3: "three", 4: "four",
50
+ 5: "five", 6: "six", 7: "seven", 8: "eight",
51
+ 9: "nine", 10: "ten", 11: "eleven", 12: "twelve"
52
+ }
53
+
54
+ if count == 1:
55
+ replacements["seating"] = "a chair"
56
+ replacements["furniture"] = "a chair"
57
+ elif count in number_words:
58
+ word_count = number_words[count]
59
+ replacements["seating"] = f"{word_count} chairs"
60
+ replacements["furniture"] = f"{word_count} chairs"
61
+ elif count <= 20:
62
+ replacements["seating"] = f"several chairs"
63
+ replacements["furniture"] = f"several chairs"
64
+ else:
65
+ replacements["seating"] = f"numerous chairs ({count} total)"
66
+ replacements["furniture"] = f"numerous chairs"
67
+
68
+ # 處理混合家具情況(當存在多種家具類型時)
69
+ furniture_items = []
70
+ furniture_counts = []
71
+
72
+ # 收集所有家具類型的統計
73
+ for furniture_type in ["chair", "dining table", "couch", "bed"]:
74
+ if furniture_type in object_statistics:
75
+ count = object_statistics[furniture_type]["count"]
76
+ if count > 0:
77
+ furniture_items.append(furniture_type)
78
+ furniture_counts.append(count)
79
+
80
+ # 如果只有椅子,那就用上面的方式
81
+ # 如果有多種家具類型,生成組合描述
82
+ if len(furniture_items) > 1 and "furniture" not in replacements:
83
+ main_furniture = furniture_items[0] # 數量最多的家具類型
84
+ main_count = furniture_counts[0]
85
+
86
+ if main_furniture == "chair":
87
+ number_words = ["", "one", "two", "three", "four", "five", "six"]
88
+ if main_count <= 6:
89
+ replacements["furniture"] = f"{number_words[main_count]} chairs and other furniture"
90
+ else:
91
+ replacements["furniture"] = "multiple chairs and other furniture"
92
+
93
+ # 處理人員
94
+ if "person" in object_statistics:
95
+ count = object_statistics["person"]["count"]
96
+ if count == 1:
97
+ replacements["people_and_vehicles"] = "a person"
98
+ replacements["pedestrian_flow"] = "an individual walking"
99
+ elif count <= 5:
100
+ replacements["people_and_vehicles"] = f"{count} people"
101
+ replacements["pedestrian_flow"] = f"{count} people walking"
102
+ else:
103
+ replacements["people_and_vehicles"] = f"many people ({count} individuals)"
104
+ replacements["pedestrian_flow"] = f"a crowd of {count} people"
105
+
106
+ # 處理桌子設置
107
+ if "dining table" in object_statistics:
108
+ count = object_statistics["dining table"]["count"]
109
+ if count == 1:
110
+ replacements["table_setup"] = "a dining table"
111
+ replacements["table_description"] = "a dining surface"
112
+ else:
113
+ replacements["table_setup"] = f"{count} dining tables"
114
+ replacements["table_description"] = f"{count} dining surfaces"
115
+
116
+ self.logger.debug(f"Generated {len(replacements)} statistics-based replacements")
117
+
118
+ except Exception as e:
119
+ self.logger.warning(f"Error generating statistics replacements: {str(e)}")
120
+
121
+ return replacements
122
+
123
+ def generate_places365_replacements(self, places365_info: Optional[Dict]) -> Dict[str, str]:
124
+ """
125
+ 基於Places365信息生成模板替換內容
126
+
127
+ Args:
128
+ places365_info: Places365場景分類信息
129
+
130
+ Returns:
131
+ Dict[str, str]: Places365基礎的替換內容
132
+ """
133
+ replacements = {}
134
+
135
+ if not places365_info or places365_info.get('confidence', 0) <= 0.35:
136
+ replacements["places365_context"] = ""
137
+ replacements["places365_atmosphere"] = ""
138
+ return replacements
139
+
140
+ try:
141
+ scene_label = places365_info.get('scene_label', '').replace('_', ' ')
142
+ attributes = places365_info.get('attributes', [])
143
+
144
+ # 生成場景上下文
145
+ if scene_label:
146
+ replacements["places365_context"] = f"characteristic of a {scene_label}"
147
+ else:
148
+ replacements["places365_context"] = ""
149
+
150
+ # 生成氛圍描述
151
+ if 'natural_lighting' in attributes:
152
+ replacements["places365_atmosphere"] = "with natural illumination"
153
+ elif 'artificial_lighting' in attributes:
154
+ replacements["places365_atmosphere"] = "under artificial lighting"
155
+ else:
156
+ replacements["places365_atmosphere"] = ""
157
+
158
+ self.logger.debug("Generated Places365-based replacements")
159
+
160
+ except Exception as e:
161
+ self.logger.warning(f"Error generating Places365 replacements: {str(e)}")
162
+ replacements["places365_context"] = ""
163
+ replacements["places365_atmosphere"] = ""
164
+
165
+ return replacements
166
+
167
+ def analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
168
+ """
169
+ 分析場景組成以確定模板複雜度
170
+
171
+ Args:
172
+ detected_objects: 檢測到的物件列表
173
+
174
+ Returns:
175
+ Dict: 場景組成統計信息
176
+ """
177
+ try:
178
+ total_objects = len(detected_objects)
179
+
180
+ # 統計不同類型的物件
181
+ object_categories = {}
182
+ for obj in detected_objects:
183
+ class_name = obj.get("class_name", "unknown")
184
+ object_categories[class_name] = object_categories.get(class_name, 0) + 1
185
+
186
+ # 計算場景多樣性
187
+ unique_categories = len(object_categories)
188
+
189
+ return {
190
+ "total_objects": total_objects,
191
+ "unique_categories": unique_categories,
192
+ "category_distribution": object_categories,
193
+ "complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
194
+ }
195
+
196
+ except Exception as e:
197
+ self.logger.warning(f"Error analyzing scene composition: {str(e)}")
198
+ return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
199
+
200
+ def generate_zone_descriptions(self, zone_data: Dict[str, Any], section: Dict[str, Any]) -> List[str]:
201
+ """
202
+ 生成功能區域描述
203
+
204
+ Args:
205
+ zone_data: 區域數據字典
206
+ section: 區域配置信息
207
+
208
+ Returns:
209
+ List[str]: 區域描述列表
210
+ """
211
+ try:
212
+ descriptions = []
213
+
214
+ if not zone_data:
215
+ return descriptions
216
+
217
+ # 直接處理區域資料(zone_data 本身就是區域字典)
218
+ sorted_zones = sorted(zone_data.items(),
219
+ key=lambda x: len(x[1].get("objects", [])),
220
+ reverse=True)
221
+
222
+ for zone_name, zone_info in sorted_zones:
223
+ description = zone_info.get("description", "")
224
+ objects = zone_info.get("objects", [])
225
+
226
+ if objects:
227
+ # 使用現有描述或生成基於物件的描述
228
+ if description and not any(tech in description.lower() for tech in ['zone', 'area', 'region']):
229
+ zone_desc = description
230
+ else:
231
+ # 生成更自然的區域描述
232
+ clean_zone_name = zone_name.replace('_', ' ').replace(' area', '').replace(' zone', '')
233
+ object_list = ', '.join(objects[:3])
234
+
235
+ if 'crossing' in zone_name or 'pedestrian' in zone_name:
236
+ zone_desc = f"In the central crossing area, there are {object_list}."
237
+ elif 'vehicle' in zone_name or 'traffic' in zone_name:
238
+ zone_desc = f"The vehicle movement area includes {object_list}."
239
+ elif 'control' in zone_name:
240
+ zone_desc = f"Traffic control elements include {object_list}."
241
+ else:
242
+ zone_desc = f"The {clean_zone_name} contains {object_list}."
243
+
244
+ if len(objects) > 3:
245
+ zone_desc += f" Along with {len(objects) - 3} additional elements."
246
+
247
+ descriptions.append(zone_desc)
248
+
249
+ return descriptions
250
+
251
+ except Exception as e:
252
+ self.logger.error(f"Error generating zone descriptions: {str(e)}")
253
+ return []
254
+
255
+ def generate_object_summary(self, object_data: List[Dict], section: Dict[str, Any]) -> str:
256
+ """
257
+ 生成物件摘要描述
258
+
259
+ Args:
260
+ object_data: 物件數據列表
261
+ section: 摘要配置信息
262
+
263
+ Returns:
264
+ str: 物件摘要描述
265
+ """
266
+ try:
267
+ if not object_data:
268
+ return ""
269
+
270
+ # 統計物件類型並計算重要性
271
+ object_stats = {}
272
+ for obj in object_data:
273
+ class_name = obj.get("class_name", "unknown")
274
+ confidence = obj.get("confidence", 0.5)
275
+
276
+ if class_name not in object_stats:
277
+ object_stats[class_name] = {"count": 0, "total_confidence": 0}
278
+
279
+ object_stats[class_name]["count"] += 1
280
+ object_stats[class_name]["total_confidence"] += confidence
281
+
282
+ # 按重要性排序(結合數量和置信度)
283
+ sorted_objects = []
284
+ for class_name, stats in object_stats.items():
285
+ count = stats["count"]
286
+ avg_confidence = stats["total_confidence"] / count
287
+ importance = count * 0.6 + avg_confidence * 0.4
288
+ sorted_objects.append((class_name, count, importance))
289
+
290
+ sorted_objects.sort(key=lambda x: x[2], reverse=True)
291
+
292
+ # 生成自然語言描述
293
+ descriptions = []
294
+ for class_name, count, _ in sorted_objects[:5]:
295
+ clean_name = class_name.replace('_', ' ')
296
+ if count == 1:
297
+ article = "an" if clean_name[0].lower() in 'aeiou' else "a"
298
+ descriptions.append(f"{article} {clean_name}")
299
+ else:
300
+ descriptions.append(f"{count} {clean_name}s")
301
+
302
+ if len(descriptions) == 1:
303
+ return f"The scene features {descriptions[0]}."
304
+ elif len(descriptions) == 2:
305
+ return f"The scene features {descriptions[0]} and {descriptions[1]}."
306
+ else:
307
+ main_items = ", ".join(descriptions[:-1])
308
+ return f"The scene features {main_items}, and {descriptions[-1]}."
309
+
310
+ except Exception as e:
311
+ self.logger.error(f"Error generating object summary: {str(e)}")
312
+ return ""
313
+
314
+ def generate_conclusion(self, template: Dict[str, Any], zone_data: Dict[str, Any],
315
+ object_data: List[Dict]) -> str:
316
+ """
317
+ 生成結論描述
318
+
319
+ Args:
320
+ template: 模板配置信息
321
+ zone_data: 區域數據
322
+ object_data: 物件數據
323
+
324
+ Returns:
325
+ str: 結論描述
326
+ """
327
+ try:
328
+ scene_type = template.get("scene_type", "general")
329
+ zones_count = len(zone_data)
330
+ objects_count = len(object_data)
331
+
332
+ if scene_type == "indoor":
333
+ conclusion = f"This indoor environment demonstrates clear functional organization with {zones_count} distinct areas and {objects_count} identified objects."
334
+ elif scene_type == "outdoor":
335
+ conclusion = f"This outdoor scene shows dynamic activity patterns across {zones_count} functional zones with {objects_count} detected elements."
336
+ else:
337
+ conclusion = f"The scene analysis reveals {zones_count} functional areas containing {objects_count} identifiable objects."
338
+
339
+ return conclusion
340
+
341
+ except Exception as e:
342
+ self.logger.error(f"Error generating conclusion: {str(e)}")
343
+ return ""
template_manager.py CHANGED
The diff for this file is too large to render. See raw diff
 
template_processor.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import traceback
3
+ import re
4
+ from typing import Dict, List, Optional, Union, Any
5
+
6
+ class TemplateProcessor:
7
+ """
8
+ 模板處理器 - 負責模板填充、後處理和結構化模板渲染
9
+
10
+ 此類別專門處理模板的最終填充過程、文本格式化、
11
+ 語法修復以及結構化模板的渲染邏輯。
12
+ """
13
+
14
+ def __init__(self):
15
+ """初始化模板處理器"""
16
+ self.logger = logging.getLogger(self.__class__.__name__)
17
+ self.logger.debug("TemplateProcessor initialized successfully")
18
+
19
+ def preprocess_template(self, template: str) -> str:
20
+ """
21
+ 預處理模板,修復常見問題
22
+
23
+ Args:
24
+ template: 原始模板字符串
25
+
26
+ Returns:
27
+ str: 預處理後的模板
28
+ """
29
+ try:
30
+ # 移除可能導致問題的模式
31
+ template = re.sub(r'\{[^}]*\}\s*,\s*\{[^}]*\}', '{combined_elements}', template)
32
+
33
+ # 確保模板不以逗號開始
34
+ template = re.sub(r'^[,\s]*', '', template)
35
+
36
+ return template.strip()
37
+
38
+ except Exception as e:
39
+ self.logger.warning(f"Error preprocessing template: {str(e)}")
40
+ return template
41
+
42
+ def postprocess_filled_template(self, filled_template: str) -> str:
43
+ """
44
+ 後處理填充完成的模板,修復語法問題
45
+
46
+ Args:
47
+ filled_template: 填充後的模板字符串
48
+
49
+ Returns:
50
+ str: 修復後的模板字符串
51
+ """
52
+ try:
53
+ # 修復 "In , " 模式
54
+ filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
55
+ filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
56
+ filled_template = re.sub(r'\bWithin\s*,\s*', 'Within this area, ', filled_template)
57
+
58
+ # 修復連續逗號
59
+ filled_template = re.sub(r',\s*,', ',', filled_template)
60
+
61
+ # 修復開頭的逗號
62
+ filled_template = re.sub(r'^[,\s]*', '', filled_template)
63
+
64
+ # 確保首字母大寫
65
+ if filled_template and not filled_template[0].isupper():
66
+ filled_template = filled_template[0].upper() + filled_template[1:]
67
+
68
+ # 確保以句號結尾
69
+ if filled_template and not filled_template.endswith(('.', '!', '?')):
70
+ filled_template += '.'
71
+
72
+ return filled_template.strip()
73
+
74
+ except Exception as e:
75
+ self.logger.warning(f"Error postprocessing filled template: {str(e)}")
76
+ return filled_template
77
+
78
+ def get_template_by_scene_type(self, scene_type: str, detected_objects: List[Dict],
79
+ functional_zones: Dict, template_repository) -> str:
80
+ """
81
+ 根據場景類型選擇合適的模板並進行標準化處理
82
+
83
+ Args:
84
+ scene_type: 場景類型
85
+ detected_objects: 檢測到的物件列表
86
+ functional_zones: 功能區域字典
87
+ template_repository: 模板庫實例
88
+
89
+ Returns:
90
+ str: 標準化後的模板字符串
91
+ """
92
+ try:
93
+ # 獲取場景的物件統計信息
94
+ object_stats = self._analyze_scene_composition(detected_objects)
95
+ zone_count = len(functional_zones) if functional_zones else 0
96
+
97
+ # 根據場景複雜度和類型選擇模板
98
+ templates = template_repository.templates
99
+ if scene_type in templates:
100
+ scene_templates = templates[scene_type]
101
+
102
+ # 根據複雜度選擇合適的模板變體
103
+ if zone_count >= 3 and object_stats.get("total_objects", 0) >= 10:
104
+ template_key = "complex"
105
+ elif zone_count >= 2 or object_stats.get("total_objects", 0) >= 5:
106
+ template_key = "moderate"
107
+ else:
108
+ template_key = "simple"
109
+
110
+ if template_key in scene_templates:
111
+ raw_template = scene_templates[template_key]
112
+ else:
113
+ raw_template = scene_templates.get("default", scene_templates[list(scene_templates.keys())[0]])
114
+ else:
115
+ # 如果沒有特定場景的模板,使用通用模板
116
+ raw_template = self._get_generic_template(object_stats, zone_count)
117
+
118
+ # 標準化模板中的佔位符和格式
119
+ standardized_template = self._standardize_template_format(raw_template)
120
+ return standardized_template
121
+
122
+ except Exception as e:
123
+ self.logger.error(f"Error selecting template for scene type '{scene_type}': {str(e)}")
124
+ return self._get_fallback_template()
125
+
126
+ def _analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
127
+ """
128
+ 分析場景組成以確定模板複雜度
129
+
130
+ Args:
131
+ detected_objects: 檢測到的物件列表
132
+
133
+ Returns:
134
+ Dict: 場景組成統計信息
135
+ """
136
+ try:
137
+ total_objects = len(detected_objects)
138
+
139
+ # 統計不同類型的物件
140
+ object_categories = {}
141
+ for obj in detected_objects:
142
+ class_name = obj.get("class_name", "unknown")
143
+ object_categories[class_name] = object_categories.get(class_name, 0) + 1
144
+
145
+ # 計算場景多樣性
146
+ unique_categories = len(object_categories)
147
+
148
+ return {
149
+ "total_objects": total_objects,
150
+ "unique_categories": unique_categories,
151
+ "category_distribution": object_categories,
152
+ "complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
153
+ }
154
+
155
+ except Exception as e:
156
+ self.logger.warning(f"Error analyzing scene composition: {str(e)}")
157
+ return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
158
+
159
+ def _get_generic_template(self, object_stats: Dict, zone_count: int) -> str:
160
+ """
161
+ 獲取通用模板
162
+
163
+ Args:
164
+ object_stats: 物件統計信息
165
+ zone_count: 功能區域數量
166
+
167
+ Returns:
168
+ str: 通用模板字符串
169
+ """
170
+ try:
171
+ complexity_score = object_stats.get("complexity_score", 0)
172
+
173
+ if complexity_score >= 7 or zone_count >= 3:
174
+ return "This scene presents a comprehensive view featuring {functional_area} with {primary_objects}. The spatial organization demonstrates {spatial_arrangement} across multiple {activity_areas}, creating a dynamic environment with diverse elements and clear functional zones."
175
+ elif complexity_score >= 4 or zone_count >= 2:
176
+ return "The scene displays {functional_area} containing {primary_objects}. The arrangement shows {spatial_organization} with distinct areas serving different purposes within the overall space."
177
+ else:
178
+ return "A {scene_description} featuring {primary_objects} arranged in {basic_layout} within the visible area."
179
+
180
+ except Exception as e:
181
+ self.logger.warning(f"Error getting generic template: {str(e)}")
182
+ return self._get_fallback_template()
183
+
184
+ def _get_fallback_template(self) -> str:
185
+ """
186
+ 獲取備用模板
187
+
188
+ Returns:
189
+ str: 備用模板字符串
190
+ """
191
+ return "A scene featuring various elements and organized areas of activity within the visible space."
192
+
193
+ def _standardize_template_format(self, template: str) -> str:
194
+ """
195
+ 標準化模板格式,確保佔位符和表達方式符合自然語言要求
196
+
197
+ Args:
198
+ template: 原始模板字符串
199
+
200
+ Returns:
201
+ str: 標準化後的模板字符串
202
+ """
203
+ try:
204
+ if not template:
205
+ return self._get_fallback_template()
206
+
207
+ standardized = template
208
+
209
+ # 標準化佔位符格式,移除技術性標記
210
+ placeholder_mapping = {
211
+ r'\{zone_\d+\}': '{functional_area}',
212
+ r'\{object_group_\d+\}': '{primary_objects}',
213
+ r'\{region_\d+\}': '{spatial_area}',
214
+ r'\{category_\d+\}': '{object_category}',
215
+ r'\{area_\d+\}': '{activity_area}',
216
+ r'\{section_\d+\}': '{scene_section}'
217
+ }
218
+
219
+ for pattern, replacement in placeholder_mapping.items():
220
+ standardized = re.sub(pattern, replacement, standardized)
221
+
222
+ # 標準化常見的技術性術語
223
+ term_replacements = {
224
+ 'functional_zones': 'areas of activity',
225
+ 'object_detection': 'visible elements',
226
+ 'category_regions': 'organized sections',
227
+ 'spatial_distribution': 'arrangement throughout the space',
228
+ 'viewpoint_analysis': 'perspective view'
229
+ }
230
+
231
+ for tech_term, natural_term in term_replacements.items():
232
+ standardized = standardized.replace(tech_term, natural_term)
233
+
234
+ # 確保模板語法的自然性
235
+ standardized = self._improve_template_readability(standardized)
236
+
237
+ return standardized
238
+
239
+ except Exception as e:
240
+ self.logger.warning(f"Error standardizing template format: {str(e)}")
241
+ return template if template else self._get_fallback_template()
242
+
243
+ def _improve_template_readability(self, template: str) -> str:
244
+ """
245
+ 改善模板的可讀性和自然性
246
+
247
+ Args:
248
+ template: 模板字符串
249
+
250
+ Returns:
251
+ str: 改善後的模板字符串
252
+ """
253
+ try:
254
+ # 移除多餘的空格和換行
255
+ improved = re.sub(r'\s+', ' ', template).strip()
256
+
257
+ # 改善句子連接
258
+ improved = improved.replace(' . ', '. ')
259
+ improved = improved.replace(' , ', ', ')
260
+ improved = improved.replace(' ; ', '; ')
261
+
262
+ # 確保適當的句號結尾
263
+ if improved and not improved.endswith(('.', '!', '?')):
264
+ improved += '.'
265
+
266
+ # 改善常見的表達問題
267
+ readability_fixes = [
268
+ (r'\bthe the\b', 'the'),
269
+ (r'\ba a\b', 'a'),
270
+ (r'\ban an\b', 'an'),
271
+ (r'\bwith with\b', 'with'),
272
+ (r'\bin in\b', 'in'),
273
+ (r'\bof of\b', 'of'),
274
+ (r'\band and\b', 'and')
275
+ ]
276
+
277
+ for pattern, replacement in readability_fixes:
278
+ improved = re.sub(pattern, replacement, improved, flags=re.IGNORECASE)
279
+
280
+ return improved
281
+
282
+ except Exception as e:
283
+ self.logger.warning(f"Error improving template readability: {str(e)}")
284
+ return template
285
+
286
+ def process_structured_template(self, template: Dict[str, Any], scene_data: Dict[str, Any],
287
+ statistics_processor) -> str:
288
+ """
289
+ 處理結構化模板字典
290
+
291
+ Args:
292
+ template: 結構化模板字典
293
+ scene_data: 場景分析資料
294
+ statistics_processor: 統計處理器實例
295
+
296
+ Returns:
297
+ str: 生成的場景描述
298
+ """
299
+ try:
300
+ # 提取 scene_data 中各區塊資料
301
+ zone_data = scene_data.get("functional_zones", scene_data.get("zones", {}))
302
+ object_data = scene_data.get("detected_objects", [])
303
+ scene_context = scene_data.get("scene_context", "")
304
+
305
+ # 獲取模板結構
306
+ structure = template.get("structure", [])
307
+ if not structure:
308
+ self.logger.warning("Template has no structure defined")
309
+ return self._generate_fallback_scene_description(scene_data)
310
+
311
+ description_parts = []
312
+
313
+ # 按照模板結構生成描述
314
+ for section in structure:
315
+ section_type = section.get("type", "")
316
+ content = section.get("content", "")
317
+
318
+ if section_type == "opening":
319
+ description_parts.append(content)
320
+
321
+ elif section_type == "zone_analysis":
322
+ zone_descriptions = statistics_processor.generate_zone_descriptions(zone_data, section)
323
+ if zone_descriptions:
324
+ description_parts.extend(zone_descriptions)
325
+
326
+ elif section_type == "object_summary":
327
+ object_summary = statistics_processor.generate_object_summary(object_data, section)
328
+ if object_summary:
329
+ description_parts.append(object_summary)
330
+
331
+ elif section_type == "conclusion":
332
+ conclusion = statistics_processor.generate_conclusion(template, zone_data, object_data)
333
+ if conclusion:
334
+ description_parts.append(conclusion)
335
+
336
+ # 合併並標準化輸出
337
+ final_description = self._standardize_final_description(" ".join(description_parts))
338
+ self.logger.info("Successfully applied structured template")
339
+ return final_description
340
+
341
+ except Exception as e:
342
+ self.logger.error(f"Error processing structured template: {str(e)}")
343
+ return self._generate_fallback_scene_description(scene_data)
344
+
345
+ def _generate_fallback_scene_description(self, scene_data: Dict[str, Any]) -> str:
346
+ """
347
+ 生成備用場景描述
348
+
349
+ Args:
350
+ scene_data: 場景分析資料
351
+
352
+ Returns:
353
+ str: 備用場景描述
354
+ """
355
+ try:
356
+ detected_objects = scene_data.get("detected_objects", [])
357
+ zones = scene_data.get("functional_zones", scene_data.get("zones", {}))
358
+ scene_type = scene_data.get("scene_type", "general")
359
+
360
+ object_count = len(detected_objects)
361
+ zone_count = len(zones)
362
+
363
+ if zone_count > 0 and object_count > 0:
364
+ return f"Scene analysis completed with {zone_count} functional areas containing {object_count} identified objects."
365
+ elif object_count > 0:
366
+ return f"Scene analysis identified {object_count} objects in this {scene_type.replace('_', ' ')} environment."
367
+ else:
368
+ return f"Scene analysis completed for this {scene_type.replace('_', ' ')} environment."
369
+
370
+ except Exception as e:
371
+ self.logger.warning(f"Error generating fallback description: {str(e)}")
372
+ return "Scene analysis completed with detected objects and functional areas."
373
+
374
+ def _standardize_final_description(self, description: str) -> str:
375
+ """
376
+ 對最終描述進行標準化處理
377
+
378
+ Args:
379
+ description: 原始描述文本
380
+
381
+ Returns:
382
+ str: 標準化後的描述文本
383
+ """
384
+ try:
385
+ # 移除多餘空格
386
+ description = " ".join(description.split())
387
+
388
+ # 確保句子間有適當間距
389
+ description = description.replace(". ", ". ")
390
+
391
+ # 移除任何殘留的技術性標識符
392
+ technical_patterns = [
393
+ r'zone_\d+', r'area_\d+', r'region_\d+',
394
+ r'_zone', r'_area', r'_region'
395
+ ]
396
+
397
+ for pattern in technical_patterns:
398
+ description = re.sub(pattern, '', description, flags=re.IGNORECASE)
399
+
400
+ return description.strip()
401
+
402
+ except Exception as e:
403
+ self.logger.error(f"Error standardizing final description: {str(e)}")
404
+ return description
405
+
406
+ def generate_fallback_description(self, scene_type: str, detected_objects: List[Dict]) -> str:
407
+ """
408
+ 生成備用描述,當模板填充完全失敗時使用
409
+
410
+ Args:
411
+ scene_type: 場景類型
412
+ detected_objects: 檢測到的物體列表
413
+
414
+ Returns:
415
+ str: 備用描述
416
+ """
417
+ try:
418
+ object_count = len(detected_objects)
419
+
420
+ if object_count == 0:
421
+ return f"A {scene_type.replace('_', ' ')} scene."
422
+ elif object_count == 1:
423
+ return f"A {scene_type.replace('_', ' ')} scene with one visible element."
424
+ else:
425
+ return f"A {scene_type.replace('_', ' ')} scene with {object_count} visible elements."
426
+
427
+ except Exception as e:
428
+ self.logger.warning(f"Error generating fallback description: {str(e)}")
429
+ return "A scene with various elements."
template_repository.py ADDED
@@ -0,0 +1,834 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import traceback
3
+ from typing import Dict, List, Optional, Any
4
+
5
+ from scene_detail_templates import SCENE_DETAIL_TEMPLATES
6
+ from object_template_fillers import OBJECT_TEMPLATE_FILLERS
7
+ from viewpoint_templates import VIEWPOINT_TEMPLATES
8
+ from cultural_templates import CULTURAL_TEMPLATES
9
+ from lighting_conditions import LIGHTING_CONDITIONS
10
+ from confidence_templates import CONFIDENCE_TEMPLATES
11
+
12
+ class TemplateRepository:
13
+ """
14
+ 模板資料的管理器 - 負責模板的載入、儲存、檢索和驗證
15
+
16
+ 此類別專門處理模板資源的管理,包括從各種來源載入模板、
17
+ 驗證模板完整性,以及提供統一的模板檢索介面。
18
+ """
19
+
20
+ def __init__(self, custom_templates_db: Optional[Dict] = None):
21
+ """
22
+ 初始化模板庫管理器
23
+
24
+ Args:
25
+ custom_templates_db: 可選的自定義模板數據庫,如果提供則會與默認模板合併
26
+ """
27
+ self.logger = logging.getLogger(self.__class__.__name__)
28
+ self.templates = {}
29
+ self.template_registry = {}
30
+
31
+ try:
32
+ # 載入模板數據庫
33
+ self.templates = self._load_templates()
34
+
35
+ # 初始化模板註冊表
36
+ self.template_registry = self._initialize_template_registry()
37
+
38
+ # 如果提供了自定義模板,則進行合併
39
+ if custom_templates_db:
40
+ self._merge_custom_templates(custom_templates_db)
41
+
42
+ # 驗證模板完整性
43
+ self._validate_templates()
44
+
45
+ self.logger.info("TemplateRepository initialized successfully with %d template categories",
46
+ len(self.templates))
47
+
48
+ except Exception as e:
49
+ error_msg = f"Failed to initialize TemplateRepository: {str(e)}"
50
+ self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
51
+ # 初始化基本的空模板
52
+ self.templates = self._initialize_fallback_templates()
53
+
54
+ def _load_templates(self) -> Dict:
55
+ """
56
+ 載入所有描述模板
57
+
58
+ Returns:
59
+ Dict: 包含所有模板類別的字典
60
+ """
61
+ try:
62
+ templates = {}
63
+
64
+ # 載入場景詳細描述模板
65
+ self.logger.debug("Loading scene detail templates")
66
+ try:
67
+ templates["scene_detail_templates"] = SCENE_DETAIL_TEMPLATES
68
+ except NameError:
69
+ self.logger.warning("SCENE_DETAIL_TEMPLATES not defined, using empty dict")
70
+ templates["scene_detail_templates"] = {}
71
+
72
+ # 載入物體模板填充器
73
+ self.logger.debug("Loading object template fillers")
74
+ try:
75
+ templates["object_template_fillers"] = OBJECT_TEMPLATE_FILLERS
76
+ except NameError:
77
+ self.logger.warning("OBJECT_TEMPLATE_FILLERS not defined, using empty dict")
78
+ templates["object_template_fillers"] = {}
79
+
80
+ # 載入視角模板
81
+ self.logger.debug("Loading viewpoint templates")
82
+ try:
83
+ templates["viewpoint_templates"] = VIEWPOINT_TEMPLATES
84
+ except NameError:
85
+ self.logger.warning("VIEWPOINT_TEMPLATES not defined, using empty dict")
86
+ templates["viewpoint_templates"] = {}
87
+
88
+ # 載入文化模板
89
+ self.logger.debug("Loading cultural templates")
90
+ try:
91
+ templates["cultural_templates"] = CULTURAL_TEMPLATES
92
+ except NameError:
93
+ self.logger.warning("CULTURAL_TEMPLATES not defined, using empty dict")
94
+ templates["cultural_templates"] = {}
95
+
96
+ # 從照明條件模組載入照明模板
97
+ self.logger.debug("Loading lighting templates")
98
+ try:
99
+ templates["lighting_templates"] = self._extract_lighting_templates()
100
+ except Exception as e:
101
+ self.logger.warning(f"Failed to extract lighting templates: {str(e)}")
102
+ templates["lighting_templates"] = {}
103
+
104
+ # 載入信心度模板
105
+ self.logger.debug("Loading confidence templates")
106
+ try:
107
+ templates["confidence_templates"] = CONFIDENCE_TEMPLATES
108
+ except NameError:
109
+ self.logger.warning("CONFIDENCE_TEMPLATES not defined, using empty dict")
110
+ templates["confidence_templates"] = {}
111
+
112
+ # 初始化默認模板(當成備份)
113
+ self._initialize_default_templates(templates)
114
+
115
+ self.logger.info("Successfully loaded %d template categories", len(templates))
116
+ return templates
117
+
118
+ except Exception as e:
119
+ error_msg = f"Unexpected error during template loading: {str(e)}"
120
+ self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
121
+ # 返回基本模板
122
+ return self._initialize_fallback_templates()
123
+
124
+ def _initialize_template_registry(self) -> Dict[str, Dict[str, Any]]:
125
+ """
126
+ 初始化模板註冊表,包含各種場景類型的結構化模板
127
+
128
+ Returns:
129
+ Dict[str, Dict[str, Any]]: 模板註冊表字典
130
+ """
131
+ try:
132
+ template_registry = {
133
+ "indoor_detailed": {
134
+ "scene_type": "indoor",
135
+ "complexity": "high",
136
+ "structure": [
137
+ {
138
+ "type": "opening",
139
+ "content": "This indoor scene presents a comprehensive view of a well-organized living space."
140
+ },
141
+ {
142
+ "type": "zone_analysis",
143
+ "priority": "functional_areas",
144
+ "detail_level": "detailed"
145
+ },
146
+ {
147
+ "type": "object_summary",
148
+ "grouping": "by_category",
149
+ "include_counts": True
150
+ },
151
+ {
152
+ "type": "conclusion",
153
+ "style": "analytical"
154
+ }
155
+ ]
156
+ },
157
+
158
+ "indoor_moderate": {
159
+ "scene_type": "indoor",
160
+ "complexity": "medium",
161
+ "structure": [
162
+ {
163
+ "type": "opening",
164
+ "content": "The indoor environment displays organized functional areas."
165
+ },
166
+ {
167
+ "type": "zone_analysis",
168
+ "priority": "main_areas",
169
+ "detail_level": "moderate"
170
+ },
171
+ {
172
+ "type": "object_summary",
173
+ "grouping": "by_function",
174
+ "include_counts": False
175
+ },
176
+ {
177
+ "type": "conclusion",
178
+ "style": "descriptive"
179
+ }
180
+ ]
181
+ },
182
+
183
+ "indoor_simple": {
184
+ "scene_type": "indoor",
185
+ "complexity": "low",
186
+ "structure": [
187
+ {
188
+ "type": "opening",
189
+ "content": "An indoor space with visible furniture and household items."
190
+ },
191
+ {
192
+ "type": "zone_analysis",
193
+ "priority": "basic_areas",
194
+ "detail_level": "simple"
195
+ },
196
+ {
197
+ "type": "object_summary",
198
+ "grouping": "general",
199
+ "include_counts": False
200
+ }
201
+ ]
202
+ },
203
+
204
+ "outdoor_detailed": {
205
+ "scene_type": "outdoor",
206
+ "complexity": "high",
207
+ "structure": [
208
+ {
209
+ "type": "opening",
210
+ "content": "This outdoor scene captures a dynamic urban environment with multiple activity zones."
211
+ },
212
+ {
213
+ "type": "zone_analysis",
214
+ "priority": "activity_areas",
215
+ "detail_level": "detailed"
216
+ },
217
+ {
218
+ "type": "object_summary",
219
+ "grouping": "by_location",
220
+ "include_counts": True
221
+ },
222
+ {
223
+ "type": "conclusion",
224
+ "style": "environmental"
225
+ }
226
+ ]
227
+ },
228
+
229
+ "outdoor_moderate": {
230
+ "scene_type": "outdoor",
231
+ "complexity": "medium",
232
+ "structure": [
233
+ {
234
+ "type": "opening",
235
+ "content": "The outdoor scene shows organized public spaces and pedestrian areas."
236
+ },
237
+ {
238
+ "type": "zone_analysis",
239
+ "priority": "public_areas",
240
+ "detail_level": "moderate"
241
+ },
242
+ {
243
+ "type": "object_summary",
244
+ "grouping": "by_type",
245
+ "include_counts": False
246
+ },
247
+ {
248
+ "type": "conclusion",
249
+ "style": "observational"
250
+ }
251
+ ]
252
+ },
253
+
254
+ "outdoor_simple": {
255
+ "scene_type": "outdoor",
256
+ "complexity": "low",
257
+ "structure": [
258
+ {
259
+ "type": "opening",
260
+ "content": "An outdoor area with pedestrians and urban elements."
261
+ },
262
+ {
263
+ "type": "zone_analysis",
264
+ "priority": "basic_areas",
265
+ "detail_level": "simple"
266
+ },
267
+ {
268
+ "type": "object_summary",
269
+ "grouping": "general",
270
+ "include_counts": False
271
+ }
272
+ ]
273
+ },
274
+
275
+ "commercial_detailed": {
276
+ "scene_type": "commercial",
277
+ "complexity": "high",
278
+ "structure": [
279
+ {
280
+ "type": "opening",
281
+ "content": "This commercial environment demonstrates organized retail and customer service areas."
282
+ },
283
+ {
284
+ "type": "zone_analysis",
285
+ "priority": "service_areas",
286
+ "detail_level": "detailed"
287
+ },
288
+ {
289
+ "type": "object_summary",
290
+ "grouping": "by_function",
291
+ "include_counts": True
292
+ },
293
+ {
294
+ "type": "conclusion",
295
+ "style": "business"
296
+ }
297
+ ]
298
+ },
299
+
300
+ "transportation_detailed": {
301
+ "scene_type": "transportation",
302
+ "complexity": "high",
303
+ "structure": [
304
+ {
305
+ "type": "opening",
306
+ "content": "This transportation hub features organized passenger facilities and transit infrastructure."
307
+ },
308
+ {
309
+ "type": "zone_analysis",
310
+ "priority": "transit_areas",
311
+ "detail_level": "detailed"
312
+ },
313
+ {
314
+ "type": "object_summary",
315
+ "grouping": "by_transit_function",
316
+ "include_counts": True
317
+ },
318
+ {
319
+ "type": "conclusion",
320
+ "style": "infrastructure"
321
+ }
322
+ ]
323
+ },
324
+
325
+ "default": {
326
+ "scene_type": "general",
327
+ "complexity": "medium",
328
+ "structure": [
329
+ {
330
+ "type": "opening",
331
+ "content": "The scene displays various elements organized across functional areas."
332
+ },
333
+ {
334
+ "type": "zone_analysis",
335
+ "priority": "general_areas",
336
+ "detail_level": "moderate"
337
+ },
338
+ {
339
+ "type": "object_summary",
340
+ "grouping": "general",
341
+ "include_counts": False
342
+ },
343
+ {
344
+ "type": "conclusion",
345
+ "style": "general"
346
+ }
347
+ ]
348
+ }
349
+ }
350
+
351
+ self.logger.debug(f"Initialized template registry with {len(template_registry)} templates")
352
+ return template_registry
353
+
354
+ except Exception as e:
355
+ error_msg = f"Error initializing template registry: {str(e)}"
356
+ self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
357
+ # 返回最基本的註冊表
358
+ return {
359
+ "default": {
360
+ "scene_type": "general",
361
+ "complexity": "low",
362
+ "structure": [
363
+ {
364
+ "type": "opening",
365
+ "content": "Scene analysis completed with identified objects and areas."
366
+ }
367
+ ]
368
+ }
369
+ }
370
+
371
+ def _extract_lighting_templates(self) -> Dict:
372
+ """
373
+ 從照明條件模組提取照明描述模板
374
+
375
+ Returns:
376
+ Dict: 照明模板字典
377
+ """
378
+ try:
379
+ lighting_templates = {}
380
+
381
+ # 從 LIGHTING_CONDITIONS 提取時間描述
382
+ time_descriptions = LIGHTING_CONDITIONS.get("time_descriptions", {})
383
+
384
+ for time_key, time_data in time_descriptions.items():
385
+ if isinstance(time_data, dict) and "general" in time_data:
386
+ lighting_templates[time_key] = time_data["general"]
387
+ else:
388
+ # 如果數據結構不符合預期,使用備用描述
389
+ lighting_templates[time_key] = f"The scene is captured during {time_key.replace('_', ' ')}."
390
+
391
+ # 確保至少有基本的照明模板
392
+ if not lighting_templates:
393
+ self.logger.warning("No lighting templates found, using defaults")
394
+ lighting_templates = self._get_default_lighting_templates()
395
+
396
+ self.logger.debug("Extracted %d lighting templates", len(lighting_templates))
397
+ return lighting_templates
398
+
399
+ except Exception as e:
400
+ self.logger.warning(f"Error extracting lighting templates: {str(e)}, using defaults")
401
+ return self._get_default_lighting_templates()
402
+
403
+ def _get_default_lighting_templates(self) -> Dict:
404
+ """獲取默認照明模板"""
405
+ return {
406
+ "day_clear": "The scene is captured during clear daylight conditions.",
407
+ "day_overcast": "The scene is captured during overcast daylight.",
408
+ "night": "The scene is captured at night with artificial lighting.",
409
+ "dawn": "The scene is captured during dawn with soft natural lighting.",
410
+ "dusk": "The scene is captured during dusk with diminishing natural light.",
411
+ "unknown": "The lighting conditions are not clearly identifiable."
412
+ }
413
+
414
+ def _initialize_default_templates(self, templates: Dict):
415
+ """
416
+ 初始化默認模板作為備份機制
417
+
418
+ Args:
419
+ templates: 要檢查和補充的模板字典
420
+ """
421
+ try:
422
+ # 置信度模板備份
423
+ if "confidence_templates" not in templates or not templates["confidence_templates"]:
424
+ templates["confidence_templates"] = {
425
+ "high": "{description} {details}",
426
+ "medium": "This appears to be {description} {details}",
427
+ "low": "This might be {description}, but the confidence is low. {details}"
428
+ }
429
+
430
+ # 場景詳細模板備份
431
+ if "scene_detail_templates" not in templates or not templates["scene_detail_templates"]:
432
+ templates["scene_detail_templates"] = {
433
+ "default": ["A scene with various elements and objects."]
434
+ }
435
+
436
+ # 物體填充模板備份
437
+ if "object_template_fillers" not in templates or not templates["object_template_fillers"]:
438
+ templates["object_template_fillers"] = {
439
+ "default": ["various items", "different objects", "multiple elements"]
440
+ }
441
+
442
+ # 視角模板備份
443
+ if "viewpoint_templates" not in templates or not templates["viewpoint_templates"]:
444
+ templates["viewpoint_templates"] = {
445
+ "eye_level": {
446
+ "prefix": "From eye level, ",
447
+ "observation": "the scene is viewed straight ahead.",
448
+ "short_desc": "at eye level"
449
+ },
450
+ "aerial": {
451
+ "prefix": "From above, ",
452
+ "observation": "the scene is viewed from a bird's-eye perspective.",
453
+ "short_desc": "from above"
454
+ },
455
+ "low_angle": {
456
+ "prefix": "From a low angle, ",
457
+ "observation": "the scene is viewed from below looking upward.",
458
+ "short_desc": "from below"
459
+ },
460
+ "elevated": {
461
+ "prefix": "From an elevated position, ",
462
+ "observation": "the scene is viewed from a higher vantage point.",
463
+ "short_desc": "from an elevated position"
464
+ }
465
+ }
466
+
467
+ # 文化模板備份
468
+ if "cultural_templates" not in templates or not templates["cultural_templates"]:
469
+ templates["cultural_templates"] = {
470
+ "asian": {
471
+ "elements": ["traditional architectural elements", "cultural signage", "Asian design features"],
472
+ "description": "The scene displays distinctive Asian cultural characteristics with {elements}."
473
+ },
474
+ "european": {
475
+ "elements": ["classical architecture", "European design elements", "historic features"],
476
+ "description": "The scene exhibits European architectural and cultural elements including {elements}."
477
+ }
478
+ }
479
+
480
+ self.logger.debug("Default templates initialized as backup")
481
+
482
+ except Exception as e:
483
+ self.logger.error(f"Error initializing default templates: {str(e)}")
484
+
485
+ def _merge_custom_templates(self, custom_templates: Dict):
486
+ """
487
+ 合併自定義模板到現有模板庫
488
+
489
+ Args:
490
+ custom_templates: 自定義模板字典
491
+ """
492
+ try:
493
+ for template_category, custom_content in custom_templates.items():
494
+ if template_category in self.templates:
495
+ if isinstance(self.templates[template_category], dict) and isinstance(custom_content, dict):
496
+ self.templates[template_category].update(custom_content)
497
+ self.logger.debug(f"Merged custom templates for category: {template_category}")
498
+ else:
499
+ self.templates[template_category] = custom_content
500
+ self.logger.debug(f"Replaced templates for category: {template_category}")
501
+ else:
502
+ self.templates[template_category] = custom_content
503
+ self.logger.debug(f"Added new template category: {template_category}")
504
+
505
+ self.logger.info("Successfully merged custom templates")
506
+
507
+ except Exception as e:
508
+ self.logger.warning(f"Error merging custom templates: {str(e)}")
509
+
510
+ def _validate_templates(self):
511
+ """
512
+ 驗證模板完整性和有效性
513
+ """
514
+ try:
515
+ required_categories = [
516
+ "scene_detail_templates",
517
+ "object_template_fillers",
518
+ "viewpoint_templates",
519
+ "cultural_templates",
520
+ "lighting_templates",
521
+ "confidence_templates"
522
+ ]
523
+
524
+ missing_categories = []
525
+ for category in required_categories:
526
+ if category not in self.templates:
527
+ missing_categories.append(category)
528
+ elif not self.templates[category]:
529
+ self.logger.warning(f"Template category '{category}' is empty")
530
+
531
+ if missing_categories:
532
+ error_msg = f"Missing required template categories: {missing_categories}"
533
+ self.logger.warning(error_msg)
534
+ # 為缺失的類別創建空模板
535
+ for category in missing_categories:
536
+ self.templates[category] = {}
537
+
538
+ # 驗證視角模板結構
539
+ self._validate_viewpoint_templates()
540
+
541
+ # 驗證文化模板結構
542
+ self._validate_cultural_templates()
543
+
544
+ self.logger.debug("Template validation completed successfully")
545
+
546
+ except Exception as e:
547
+ error_msg = f"Template validation failed: {str(e)}"
548
+ self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
549
+
550
+ def _validate_viewpoint_templates(self):
551
+ """驗證視角模板結構"""
552
+ viewpoint_templates = self.templates.get("viewpoint_templates", {})
553
+
554
+ for viewpoint, template_data in viewpoint_templates.items():
555
+ if not isinstance(template_data, dict):
556
+ self.logger.warning(f"Invalid viewpoint template structure for '{viewpoint}'")
557
+ continue
558
+
559
+ required_keys = ["prefix", "observation"]
560
+ for key in required_keys:
561
+ if key not in template_data:
562
+ self.logger.warning(f"Missing '{key}' in viewpoint template '{viewpoint}'")
563
+
564
+ def _validate_cultural_templates(self):
565
+ """驗證文化模板結構"""
566
+ cultural_templates = self.templates.get("cultural_templates", {})
567
+
568
+ for culture, template_data in cultural_templates.items():
569
+ if not isinstance(template_data, dict):
570
+ self.logger.warning(f"Invalid cultural template structure for '{culture}'")
571
+ continue
572
+
573
+ if "elements" not in template_data or "description" not in template_data:
574
+ self.logger.warning(f"Missing required keys in cultural template '{culture}'")
575
+
576
+ def _initialize_fallback_templates(self) -> Dict:
577
+ """
578
+ 初始化備用模板系統,當主要載入失敗時使用
579
+
580
+ Returns:
581
+ Dict: 最基本的模板字典
582
+ """
583
+ return {
584
+ "scene_detail_templates": {"default": ["A scene with various elements."]},
585
+ "object_template_fillers": {"default": ["various items"]},
586
+ "viewpoint_templates": {
587
+ "eye_level": {
588
+ "prefix": "From eye level, ",
589
+ "observation": "the scene is viewed straight ahead.",
590
+ "short_desc": "at eye level"
591
+ }
592
+ },
593
+ "cultural_templates": {"default": {"elements": ["elements"], "description": "The scene displays cultural elements."}},
594
+ "lighting_templates": {"unknown": "The lighting conditions are not clearly identifiable."},
595
+ "confidence_templates": {"medium": "{description} {details}"}
596
+ }
597
+
598
+ def get_template(self, category: str, key: Optional[str] = None) -> Any:
599
+ """
600
+ 獲取指定類別的模板
601
+
602
+ Args:
603
+ category: 模板類別名稱
604
+ key: 可選的具體模板鍵值
605
+
606
+ Returns:
607
+ Any: 請求的模板內容,如果不存在則返回空字典或空字符串
608
+ """
609
+ try:
610
+ if category not in self.templates:
611
+ self.logger.warning(f"Template category '{category}' not found")
612
+ return {} if key is None else ""
613
+
614
+ if key is None:
615
+ return self.templates[category]
616
+
617
+ category_templates = self.templates[category]
618
+ if not isinstance(category_templates, dict):
619
+ self.logger.warning(f"Template category '{category}' is not a dictionary")
620
+ return ""
621
+
622
+ if key not in category_templates:
623
+ self.logger.warning(f"Template key '{key}' not found in category '{category}'")
624
+ return ""
625
+
626
+ return category_templates[key]
627
+
628
+ except Exception as e:
629
+ error_msg = f"Error retrieving template {category}.{key}: {str(e)}"
630
+ self.logger.error(error_msg)
631
+ return {} if key is None else ""
632
+
633
+ def get_template_categories(self) -> List[str]:
634
+ """
635
+ 獲取所有可用的模板類別名稱
636
+
637
+ Returns:
638
+ List[str]: 模板類別名稱列表
639
+ """
640
+ return list(self.templates.keys())
641
+
642
+ def template_exists(self, category: str, key: Optional[str] = None) -> bool:
643
+ """
644
+ 檢查模板是否存在
645
+
646
+ Args:
647
+ category: 模板類別
648
+ key: 可選的模板鍵值
649
+
650
+ Returns:
651
+ bool: 模板是否存在
652
+ """
653
+ try:
654
+ if category not in self.templates:
655
+ return False
656
+
657
+ if key is None:
658
+ return True
659
+
660
+ category_templates = self.templates[category]
661
+ if isinstance(category_templates, dict):
662
+ return key in category_templates
663
+
664
+ return False
665
+
666
+ except Exception as e:
667
+ self.logger.warning(f"Error checking template existence for {category}.{key}: {str(e)}")
668
+ return False
669
+
670
+ def get_confidence_template(self, confidence_level: str) -> str:
671
+ """
672
+ 獲取指定信心度級別的模板
673
+
674
+ Args:
675
+ confidence_level: 信心度級別 ('high', 'medium', 'low')
676
+
677
+ Returns:
678
+ str: 信心度模板字符串
679
+ """
680
+ try:
681
+ confidence_templates = self.templates.get("confidence_templates", {})
682
+
683
+ if confidence_level in confidence_templates:
684
+ return confidence_templates[confidence_level]
685
+
686
+ # 備用模板
687
+ fallback_templates = {
688
+ "high": "{description} {details}",
689
+ "medium": "This appears to be {description} {details}",
690
+ "low": "This might be {description}, but the confidence is low. {details}"
691
+ }
692
+
693
+ return fallback_templates.get(confidence_level, "{description} {details}")
694
+
695
+ except Exception as e:
696
+ self.logger.warning(f"Error getting confidence template for '{confidence_level}': {str(e)}")
697
+ return "{description} {details}"
698
+
699
+ def get_lighting_template(self, lighting_type: str) -> str:
700
+ """
701
+ 獲取指定照明類型的模板
702
+
703
+ Args:
704
+ lighting_type: 照明類型
705
+
706
+ Returns:
707
+ str: 照明描述模板
708
+ """
709
+ try:
710
+ lighting_templates = self.templates.get("lighting_templates", {})
711
+
712
+ if lighting_type in lighting_templates:
713
+ return lighting_templates[lighting_type]
714
+
715
+ # 備用模板
716
+ return f"The scene is captured with {lighting_type.replace('_', ' ')} lighting conditions."
717
+
718
+ except Exception as e:
719
+ self.logger.warning(f"Error getting lighting template for '{lighting_type}': {str(e)}")
720
+ return "The lighting conditions are not clearly identifiable."
721
+
722
+ def get_viewpoint_template(self, viewpoint: str) -> Dict[str, str]:
723
+ """
724
+ 獲取指定視角的模板
725
+
726
+ Args:
727
+ viewpoint: 視角類型
728
+
729
+ Returns:
730
+ Dict[str, str]: 包含prefix、observation等鍵的視角模板字典
731
+ """
732
+ try:
733
+ viewpoint_templates = self.templates.get("viewpoint_templates", {})
734
+
735
+ if viewpoint in viewpoint_templates:
736
+ return viewpoint_templates[viewpoint]
737
+
738
+ # 備用模板
739
+ fallback_templates = {
740
+ "eye_level": {
741
+ "prefix": "From eye level, ",
742
+ "observation": "the scene is viewed straight ahead.",
743
+ "short_desc": "at eye level"
744
+ },
745
+ "aerial": {
746
+ "prefix": "From above, ",
747
+ "observation": "the scene is viewed from a bird's-eye perspective.",
748
+ "short_desc": "from above"
749
+ },
750
+ "low_angle": {
751
+ "prefix": "From a low angle, ",
752
+ "observation": "the scene is viewed from below looking upward.",
753
+ "short_desc": "from below"
754
+ },
755
+ "elevated": {
756
+ "prefix": "From an elevated position, ",
757
+ "observation": "the scene is viewed from a higher vantage point.",
758
+ "short_desc": "from an elevated position"
759
+ }
760
+ }
761
+
762
+ return fallback_templates.get(viewpoint, fallback_templates["eye_level"])
763
+
764
+ except Exception as e:
765
+ self.logger.warning(f"Error getting viewpoint template for '{viewpoint}': {str(e)}")
766
+ return {
767
+ "prefix": "",
768
+ "observation": "the scene is viewed normally.",
769
+ "short_desc": "normally"
770
+ }
771
+
772
+ def get_cultural_template(self, cultural_context: str) -> Dict[str, Any]:
773
+ """
774
+ 獲取指定文化語境的模板
775
+
776
+ Args:
777
+ cultural_context: 文化語境
778
+
779
+ Returns:
780
+ Dict[str, Any]: 文化模板字典
781
+ """
782
+ try:
783
+ cultural_templates = self.templates.get("cultural_templates", {})
784
+
785
+ if cultural_context in cultural_templates:
786
+ return cultural_templates[cultural_context]
787
+
788
+ # 備用模板
789
+ return {
790
+ "elements": ["cultural elements"],
791
+ "description": f"The scene displays {cultural_context} cultural characteristics."
792
+ }
793
+
794
+ except Exception as e:
795
+ self.logger.warning(f"Error getting cultural template for '{cultural_context}': {str(e)}")
796
+ return {
797
+ "elements": ["various elements"],
798
+ "description": "The scene displays cultural characteristics."
799
+ }
800
+
801
+ def get_scene_detail_templates(self, scene_type: str, viewpoint: Optional[str] = None) -> List[str]:
802
+ """
803
+ 獲取場景詳細描述模板
804
+
805
+ Args:
806
+ scene_type: 場景類型
807
+ viewpoint: 可選的視角類型
808
+
809
+ Returns:
810
+ List[str]: 場景描述模板列表
811
+ """
812
+ try:
813
+ scene_templates = self.templates.get("scene_detail_templates", {})
814
+
815
+ # 首先嘗試獲取特定視角的模板
816
+ if viewpoint:
817
+ viewpoint_key = f"{scene_type}_{viewpoint}"
818
+ if viewpoint_key in scene_templates:
819
+ return scene_templates[viewpoint_key]
820
+
821
+ # 然後嘗試獲取場景類型的通用模板
822
+ if scene_type in scene_templates:
823
+ return scene_templates[scene_type]
824
+
825
+ # 最後使用默認模板
826
+ if "default" in scene_templates:
827
+ return scene_templates["default"]
828
+
829
+ # 備用模板
830
+ return ["A scene with various elements and objects."]
831
+
832
+ except Exception as e:
833
+ self.logger.warning(f"Error getting scene detail templates for '{scene_type}': {str(e)}")
834
+ return ["A scene with various elements and objects."]
text_optimizer.py ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import logging
3
+ from typing import Dict, List, Optional, Any, Tuple
4
+
5
+ class TextOptimizer:
6
+ """
7
+ 文本優化器 - 專門處理文本格式化、清理和優化
8
+ 負責物件列表格式化、重複移除、複數形式處理以及描述文本的優化
9
+ """
10
+
11
+ def __init__(self):
12
+ """初始化文本優化器"""
13
+ self.logger = logging.getLogger(self.__class__.__name__)
14
+
15
+ def format_object_list_for_description(self,
16
+ objects: List[Dict],
17
+ use_indefinite_article_for_one: bool = False,
18
+ count_threshold_for_generalization: int = -1,
19
+ max_types_to_list: int = 5) -> str:
20
+ """
21
+ 將物件列表格式化為人類可讀的字符串,包含總計數字
22
+
23
+ Args:
24
+ objects: 物件字典列表,每個應包含 'class_name'
25
+ use_indefinite_article_for_one: 單個物件是否使用 "a/an",否則使用 "one"
26
+ count_threshold_for_generalization: 超過此計數時使用通用術語,-1表示精確計數
27
+ max_types_to_list: 列表中包含的不同物件類型最大數量
28
+
29
+ Returns:
30
+ str: 格式化的物件描述字符串
31
+ """
32
+ try:
33
+ if not objects:
34
+ return "no specific objects clearly identified"
35
+
36
+ counts: Dict[str, int] = {}
37
+ for obj in objects:
38
+ name = obj.get("class_name", "unknown object")
39
+ if name == "unknown object" or not name:
40
+ continue
41
+ counts[name] = counts.get(name, 0) + 1
42
+
43
+ if not counts:
44
+ return "no specific objects clearly identified"
45
+
46
+ descriptions = []
47
+ # 按計數降序然後按名稱升序排序,限制物件類型數量
48
+ sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
49
+
50
+ for name, count in sorted_counts:
51
+ if count == 1:
52
+ if use_indefinite_article_for_one:
53
+ if name[0].lower() in 'aeiou':
54
+ descriptions.append(f"an {name}")
55
+ else:
56
+ descriptions.append(f"a {name}")
57
+ else:
58
+ descriptions.append(f"one {name}")
59
+ else:
60
+ # 處理複數形式
61
+ plural_name = self._get_plural_form(name)
62
+
63
+ if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
64
+ if count <= count_threshold_for_generalization + 3:
65
+ descriptions.append(f"several {plural_name}")
66
+ else:
67
+ descriptions.append(f"many {plural_name}")
68
+ else:
69
+ descriptions.append(f"{count} {plural_name}")
70
+
71
+ if not descriptions:
72
+ return "no specific objects clearly identified"
73
+
74
+ if len(descriptions) == 1:
75
+ return descriptions[0]
76
+ elif len(descriptions) == 2:
77
+ return f"{descriptions[0]} and {descriptions[1]}"
78
+ else:
79
+ # 使用牛津逗號格式
80
+ return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
81
+
82
+ except Exception as e:
83
+ self.logger.warning(f"Error formatting object list: {str(e)}")
84
+ return "various objects"
85
+
86
+ def optimize_object_description(self, description: str) -> str:
87
+ """
88
+ 優化物件描述文本,消除多餘重複並改善表達流暢度
89
+
90
+ 這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
91
+ 產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
92
+ 列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
93
+
94
+ Args:
95
+ description: 原始的場景描述文本,可能包含重複或冗餘的表達
96
+
97
+ Returns:
98
+ str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
99
+ """
100
+ try:
101
+ # 1. 處理多餘的空間限定表達
102
+ # 使用通用模式來識別和移除不必要的空間描述
103
+ description = self._remove_redundant_spatial_qualifiers(description)
104
+
105
+ # 2. 辨識並處理物件列表的重複問題
106
+ # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
107
+ object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
108
+
109
+ # 遍歷每個找到的物件列表進行重複檢測和優化
110
+ for obj_list in object_lists:
111
+ # 3. 解析單個物件列表中的項目
112
+ all_items = self._parse_object_list_items(obj_list)
113
+
114
+ # 4. 統計物件出現頻���
115
+ item_counts = self._count_object_items(all_items)
116
+
117
+ # 5. 生成優化後的物件列表
118
+ if item_counts:
119
+ new_items = self._generate_optimized_item_list(item_counts)
120
+ new_list = self._format_item_list(new_items)
121
+ description = description.replace(obj_list, new_list)
122
+
123
+ return description
124
+
125
+ except Exception as e:
126
+ self.logger.warning(f"Error optimizing object description: {str(e)}")
127
+ return description
128
+
129
+ def remove_repetitive_descriptors(self, description: str) -> str:
130
+ """
131
+ 移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
132
+
133
+ Args:
134
+ description: 原始描述文本
135
+
136
+ Returns:
137
+ str: 清理後的描述文本
138
+ """
139
+ try:
140
+ # 定義需要移除或替換的模式
141
+ cleanup_patterns = [
142
+ # 移除 "identical" 描述模式
143
+ (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
144
+ (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
145
+ (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
146
+
147
+ # 改善 "comprehensive arrangement" 等過於技術性的表達
148
+ (r'\bcomprehensive arrangement of\b', 'arrangement of'),
149
+ (r'\bcomprehensive view featuring\b', 'scene featuring'),
150
+ (r'\bcomprehensive display of\b', 'display of'),
151
+
152
+ # 簡化過度描述性的短語
153
+ (r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
154
+ (r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
155
+ ]
156
+
157
+ processed_description = description
158
+ for pattern, replacement in cleanup_patterns:
159
+ processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
160
+
161
+ # 進一步清理可能的多餘空格
162
+ processed_description = re.sub(r'\s+', ' ', processed_description).strip()
163
+
164
+ self.logger.debug(f"Cleaned description: removed repetitive descriptors")
165
+ return processed_description
166
+
167
+ except Exception as e:
168
+ self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
169
+ return description
170
+
171
+ def format_object_count_description(self, class_name: str, count: int,
172
+ scene_type: Optional[str] = None,
173
+ detected_objects: Optional[List[Dict]] = None,
174
+ avg_confidence: float = 0.0) -> str:
175
+ """
176
+ 格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
177
+
178
+ Args:
179
+ class_name: 標準化後的類別名稱
180
+ count: 物件數量
181
+ scene_type: 場景類型,用於語境化描述
182
+ detected_objects: 該類型的所有檢測物件,用於空間分析
183
+ avg_confidence: 平均檢測置信度,影響材質推斷的可信度
184
+
185
+ Returns:
186
+ str: 完整的格式化數量描述
187
+ """
188
+ try:
189
+ if count <= 0:
190
+ return ""
191
+
192
+ # 獲取基礎的複數形式
193
+ plural_form = self._get_plural_form(class_name)
194
+
195
+ # 單數情況的處理
196
+ if count == 1:
197
+ return self._format_single_object_description(class_name, scene_type,
198
+ detected_objects, avg_confidence)
199
+
200
+ # 複數情況的處理
201
+ return self._format_multiple_objects_description(class_name, count, plural_form,
202
+ scene_type, detected_objects, avg_confidence)
203
+
204
+ except Exception as e:
205
+ self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
206
+ return f"{count} {class_name}s" if count > 1 else class_name
207
+
208
+ def normalize_object_class_name(self, class_name: str) -> str:
209
+ """
210
+ 標準化物件類別名稱,確保輸出自然語言格式
211
+
212
+ Args:
213
+ class_name: 原始類別名稱
214
+
215
+ Returns:
216
+ str: 標準化後的類別名稱
217
+ """
218
+ try:
219
+ if not class_name or not isinstance(class_name, str):
220
+ return "object"
221
+
222
+ # 移除可能的技術性前綴或後綴
223
+ normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
224
+ normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
225
+
226
+ # 將下劃線和連字符替換為空格
227
+ normalized = normalized.replace('_', ' ').replace('-', ' ')
228
+
229
+ # 移除多餘空格
230
+ normalized = ' '.join(normalized.split())
231
+
232
+ # 特殊類別名稱的標準化映射
233
+ class_name_mapping = {
234
+ 'traffic light': 'traffic light',
235
+ 'stop sign': 'stop sign',
236
+ 'fire hydrant': 'fire hydrant',
237
+ 'dining table': 'dining table',
238
+ 'potted plant': 'potted plant',
239
+ 'tv monitor': 'television',
240
+ 'cell phone': 'mobile phone',
241
+ 'wine glass': 'wine glass',
242
+ 'hot dog': 'hot dog',
243
+ 'teddy bear': 'teddy bear',
244
+ 'hair drier': 'hair dryer',
245
+ 'toothbrush': 'toothbrush'
246
+ }
247
+
248
+ return class_name_mapping.get(normalized, normalized)
249
+
250
+ except Exception as e:
251
+ self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
252
+ return class_name if isinstance(class_name, str) else "object"
253
+
254
+ def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
255
+ """
256
+ 移除描述中冗餘的空間限定詞
257
+
258
+ Args:
259
+ description: 包含可能多餘空間描述的文本
260
+
261
+ Returns:
262
+ str: 移除多餘空間限定詞後的文本
263
+ """
264
+ # 定義常見的多餘空間表達模式
265
+ redundant_patterns = [
266
+ # 室內物件的多餘房間描述
267
+ (r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
268
+ # 廚房物件的多餘描述
269
+ (r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
270
+ # 浴室物件的多餘描述
271
+ (r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
272
+ # 一般性的多餘表達:「在場景中」、「在圖片中」等
273
+ (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
274
+ ]
275
+
276
+ for pattern, replacement in redundant_patterns:
277
+ description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
278
+
279
+ return description
280
+
281
+ def _parse_object_list_items(self, obj_list: str) -> List[str]:
282
+ """
283
+ 解析物件列表中的項目
284
+
285
+ Args:
286
+ obj_list: 物件列表字符串
287
+
288
+ Returns:
289
+ List[str]: 解析後的項目列表
290
+ """
291
+ # 先處理逗號格式 "A, B, and C"
292
+ if ", and " in obj_list:
293
+ before_last_and = obj_list.rsplit(", and ", 1)[0]
294
+ last_item = obj_list.rsplit(", and ", 1)[1]
295
+ front_items = [item.strip() for item in before_last_and.split(",")]
296
+ all_items = front_items + [last_item.strip()]
297
+ elif " and " in obj_list:
298
+ all_items = [item.strip() for item in obj_list.split(" and ")]
299
+ else:
300
+ all_items = [item.strip() for item in obj_list.split(",")]
301
+
302
+ return all_items
303
+
304
+ def _count_object_items(self, all_items: List[str]) -> Dict[str, int]:
305
+ """
306
+ 統計物件項目的出現次數
307
+
308
+ Args:
309
+ all_items: 所有項目列表
310
+
311
+ Returns:
312
+ Dict[str, int]: 項目計數字典
313
+ """
314
+ item_counts = {}
315
+
316
+ for item in all_items:
317
+ item = item.strip()
318
+ if item and item not in ["and", "with", ""]:
319
+ clean_item = self._normalize_item_for_counting(item)
320
+ if clean_item not in item_counts:
321
+ item_counts[clean_item] = 0
322
+ item_counts[clean_item] += 1
323
+
324
+ return item_counts
325
+
326
+ def _generate_optimized_item_list(self, item_counts: Dict[str, int]) -> List[str]:
327
+ """
328
+ 生成優化後的項目列表
329
+
330
+ Args:
331
+ item_counts: 項目計數字典
332
+
333
+ Returns:
334
+ List[str]: 優化後的項目列表
335
+ """
336
+ new_items = []
337
+
338
+ for item, count in item_counts.items():
339
+ if count > 1:
340
+ plural_item = self._make_plural(item)
341
+ new_items.append(f"{count} {plural_item}")
342
+ else:
343
+ new_items.append(item)
344
+
345
+ return new_items
346
+
347
+ def _format_item_list(self, new_items: List[str]) -> str:
348
+ """
349
+ 格式化項目列表為字符串
350
+
351
+ Args:
352
+ new_items: 新項目列表
353
+
354
+ Returns:
355
+ str: 格式化後的字符串
356
+ """
357
+ if len(new_items) == 1:
358
+ return new_items[0]
359
+ elif len(new_items) == 2:
360
+ return f"{new_items[0]} and {new_items[1]}"
361
+ else:
362
+ return ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
363
+
364
+ def _normalize_item_for_counting(self, item: str) -> str:
365
+ """
366
+ 正規化物件項目以便準確計數
367
+
368
+ Args:
369
+ item: 原始物件項目字串
370
+
371
+ Returns:
372
+ str: 正規化後的物件項目
373
+ """
374
+ item = re.sub(r'^(a|an|the)\s+', '', item.lower())
375
+ return item.strip()
376
+
377
+ def _make_plural(self, item: str) -> str:
378
+ """
379
+ 將單數名詞轉換為複數形式
380
+
381
+ Args:
382
+ item: 單數形式的名詞
383
+
384
+ Returns:
385
+ str: 複數形式的名詞
386
+ """
387
+ if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
388
+ return item[:-1] + "ies"
389
+ elif item.endswith(("s", "sh", "ch", "x", "z")):
390
+ return item + "es"
391
+ elif not item.endswith("s"):
392
+ return item + "s"
393
+ else:
394
+ return item
395
+
396
+ def _get_plural_form(self, word: str) -> str:
397
+ """
398
+ 獲取詞彙的複數形式
399
+
400
+ Args:
401
+ word: 單數詞彙
402
+
403
+ Returns:
404
+ str: 複數形式
405
+ """
406
+ try:
407
+ # 特殊複數形式
408
+ irregular_plurals = {
409
+ 'person': 'people',
410
+ 'child': 'children',
411
+ 'foot': 'feet',
412
+ 'tooth': 'teeth',
413
+ 'mouse': 'mice',
414
+ 'man': 'men',
415
+ 'woman': 'women'
416
+ }
417
+
418
+ if word.lower() in irregular_plurals:
419
+ return irregular_plurals[word.lower()]
420
+
421
+ # 規則複數形式
422
+ if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
423
+ return word + 'es'
424
+ elif word.endswith('y') and word[-2] not in 'aeiou':
425
+ return word[:-1] + 'ies'
426
+ elif word.endswith('f'):
427
+ return word[:-1] + 'ves'
428
+ elif word.endswith('fe'):
429
+ return word[:-2] + 'ves'
430
+ else:
431
+ return word + 's'
432
+
433
+ except Exception as e:
434
+ self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
435
+ return word + 's'
436
+
437
+ def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
438
+ detected_objects: Optional[List[Dict]],
439
+ avg_confidence: float) -> str:
440
+ """
441
+ 處理單個物件的描述生成
442
+
443
+ Args:
444
+ class_name: 物件類別名稱
445
+ scene_type: 場景類型
446
+ detected_objects: 檢測物件列表
447
+ avg_confidence: 平均置信度
448
+
449
+ Returns:
450
+ str: 單個物件的完整描述
451
+ """
452
+ article = "an" if class_name[0].lower() in 'aeiou' else "a"
453
+
454
+ # 獲取材質描述符
455
+ material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
456
+
457
+ # 獲取位置或特徵描述符
458
+ feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
459
+
460
+ # 組合描述
461
+ descriptors = []
462
+ if material_descriptor:
463
+ descriptors.append(material_descriptor)
464
+ if feature_descriptor:
465
+ descriptors.append(feature_descriptor)
466
+
467
+ if descriptors:
468
+ return f"{article} {' '.join(descriptors)} {class_name}"
469
+ else:
470
+ return f"{article} {class_name}"
471
+
472
+ def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
473
+ scene_type: Optional[str], detected_objects: Optional[List[Dict]],
474
+ avg_confidence: float) -> str:
475
+ """
476
+ 處理多個物件的描述生成
477
+
478
+ Args:
479
+ class_name: 物件類別名稱
480
+ count: 物件數量
481
+ plural_form: 複數形式
482
+ scene_type: 場景類型
483
+ detected_objects: 檢測物件列表
484
+ avg_confidence: 平均置信度
485
+
486
+ Returns:
487
+ str: 多個物件的完整描述
488
+ """
489
+ # 數字到文字的轉換映射
490
+ number_words = {
491
+ 2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
492
+ 7: "seven", 8: "eight", 9: "nine", 10: "ten",
493
+ 11: "eleven", 12: "twelve"
494
+ }
495
+
496
+ # 確定基礎數量表達
497
+ if count in number_words:
498
+ count_expression = number_words[count]
499
+ elif count <= 20:
500
+ count_expression = "several"
501
+ else:
502
+ count_expression = "numerous"
503
+
504
+ # 獲取材質或功能描述符
505
+ material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
506
+
507
+ # 構建基礎描述
508
+ descriptors = []
509
+ if material_descriptor:
510
+ descriptors.append(material_descriptor)
511
+
512
+ base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
513
+ return base_description
514
+
515
+ def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
516
+ avg_confidence: float) -> Optional[str]:
517
+ """
518
+ 基於場景語境和置信度進行材質推斷
519
+
520
+ Args:
521
+ class_name: 物件類別名稱
522
+ scene_type: 場景類型
523
+ avg_confidence: 檢測置信度
524
+
525
+ Returns:
526
+ Optional[str]: 材質描述符
527
+ """
528
+ # 只有在置信度足夠高時才進行材質推斷
529
+ if avg_confidence < 0.5:
530
+ return None
531
+
532
+ # 餐廳和用餐相關場景
533
+ if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
534
+ material_mapping = {
535
+ "chair": "wooden" if avg_confidence > 0.7 else None,
536
+ "dining table": "wooden",
537
+ "couch": "upholstered",
538
+ "vase": "decorative"
539
+ }
540
+ return material_mapping.get(class_name)
541
+
542
+ # 辦公場景
543
+ elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
544
+ material_mapping = {
545
+ "chair": "office",
546
+ "dining table": "conference",
547
+ "laptop": "modern",
548
+ "book": "reference"
549
+ }
550
+ return material_mapping.get(class_name)
551
+
552
+ # 客廳場景
553
+ elif scene_type and scene_type in ["living_room"]:
554
+ material_mapping = {
555
+ "couch": "comfortable",
556
+ "chair": "accent",
557
+ "tv": "large",
558
+ "vase": "decorative"
559
+ }
560
+ return material_mapping.get(class_name)
561
+
562
+ # 室外場景
563
+ elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
564
+ material_mapping = {
565
+ "car": "parked",
566
+ "person": "walking",
567
+ "bicycle": "stationed"
568
+ }
569
+ return material_mapping.get(class_name)
570
+
571
+ # 如果沒有特定的場景映射,返回通用描述符
572
+ generic_mapping = {
573
+ "chair": "comfortable",
574
+ "dining table": "sturdy",
575
+ "car": "parked",
576
+ "person": "present"
577
+ }
578
+
579
+ return generic_mapping.get(class_name)
580
+
581
+ def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
582
+ detected_objects: Optional[List[Dict]]) -> Optional[str]:
583
+ """
584
+ 為單個物件生成特徵描述符
585
+
586
+ Args:
587
+ class_name: 物件類別名稱
588
+ scene_type: 場景類型
589
+ detected_objects: 檢測物件
590
+
591
+ Returns:
592
+ Optional[str]: 特徵描述符
593
+ """
594
+ if not detected_objects or len(detected_objects) != 1:
595
+ return None
596
+
597
+ obj = detected_objects[0]
598
+ region = obj.get("region", "").lower()
599
+
600
+ # 基於位置的描述
601
+ if "center" in region:
602
+ if class_name == "dining table":
603
+ return "central"
604
+ elif class_name == "chair":
605
+ return "centrally placed"
606
+ elif "corner" in region or "left" in region or "right" in region:
607
+ return "positioned"
608
+
609
+ # 基於場景的功能描述
610
+ if scene_type and scene_type in ["dining_area", "restaurant"]:
611
+ if class_name == "chair":
612
+ return "dining"
613
+ elif class_name == "vase":
614
+ return "decorative"
615
+
616
+ return None