File size: 13,786 Bytes
2b20519
 
 
 
 
 
 
 
05b4419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b20519
 
05b4419
2b20519
 
 
05b4419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b20519
05b4419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# modules/knowledge_base.py
import json
from pathlib import Path
from utils.logger import log

class KnowledgeBase:
    def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
        self.knowledge = []
        self.city_index = {}  # 城市索引
        self.country_index = {}  # 国家索引
        self.region_index = {}  # 地区索引
        with open(file_path, 'r', encoding='utf-8') as f:
            self.knowledge = json.load(f).get('clean_knowledge', [])
        log.info(f"✅ 知识库加载完成")


    def _build_indexes(self):
        """建立快速检索索引"""
        for idx, item in enumerate(self.knowledge):
            knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
            if not knowledge:
                continue
            
            dest_info = knowledge.get('destination_info', {})
            
            # 建立城市索引
            primary_destinations = dest_info.get('primary_destinations', [])
            for city in primary_destinations:
                if city not in self.city_index:
                    self.city_index[city] = []
                self.city_index[city].append(idx)
            
            # 建立国家索引
            countries = dest_info.get('countries', [])
            for country in countries:
                if country not in self.country_index:
                    self.country_index[country] = []
                self.country_index[country].append(idx)
            
            # 建立地区索引
            region_type = dest_info.get('region_type', '')
            if region_type:
                if region_type not in self.region_index:
                    self.region_index[region_type] = []
                self.region_index[region_type].append(idx)

    def search(self, query: str) -> list:
        """搜索知识库中的相关信息"""
        relevant_knowledge = []
        query_lower = query.lower()
        
        log.info(f"🔍 在知识库中搜索: '{query}'")
        
        # 1. 直接城市匹配
        if query in self.city_index:
            for idx in self.city_index[query]:
                if self.knowledge[idx] not in relevant_knowledge:
                    relevant_knowledge.append(self.knowledge[idx])
            log.info(f"✅ 通过城市直接匹配找到 {len(self.city_index[query])} 条记录")
        
        # 2. 国家匹配
        matching_country = self._find_country_for_city(query)
        if matching_country and matching_country in self.country_index:
            for idx in self.country_index[matching_country]:
                if self.knowledge[idx] not in relevant_knowledge:
                    relevant_knowledge.append(self.knowledge[idx])
            log.info(f"✅ 通过国家匹配({matching_country})找到额外记录")
        
        # 3. 地区匹配
        matching_region = self._find_region_for_city(query)
        if matching_region and matching_region in self.region_index:
            for idx in self.region_index[matching_region]:
                if self.knowledge[idx] not in relevant_knowledge:
                    relevant_knowledge.append(self.knowledge[idx])
            log.info(f"✅ 通过地区匹配({matching_region})找到额外记录")
        
        # 4. 模糊匹配
        if not relevant_knowledge:
            log.info("🔍 尝试模糊匹配...")
            for item in self.knowledge:
                knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
                dest_info = knowledge.get('destination_info', {})
                
                # 检查所有目的地
                primary_destinations = dest_info.get('primary_destinations', [])
                for dest in primary_destinations:
                    if query_lower in dest.lower() or dest.lower() in query_lower:
                        if item not in relevant_knowledge:
                            relevant_knowledge.append(item)
                            log.info(f"✅ 模糊匹配找到: {dest}")
                        break
        
        log.info(f"📊 搜索完成,共找到 {len(relevant_knowledge)} 条相关记录")
        return relevant_knowledge

    def _find_country_for_city(self, city_name: str) -> str:
        """根据城市名查找所属国家"""
        city_country_mapping = {
            # 中欧
            "布拉格": "捷克", "布尔诺": "捷克", "库特纳霍拉": "捷克",
            "维也纳": "奥地利", "萨尔茨堡": "奥地利", "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
            "布达佩斯": "匈牙利", "德布勒森": "匈牙利", "圣安德烈": "匈牙利",
            "布拉迪斯拉发": "斯洛伐克",
            
            # 西欧
            "巴黎": "法国", "里昂": "法国", "尼斯": "法国", "马赛": "法国",
            "柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
            "阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰",
            "布鲁塞尔": "比利时", "安特卫普": "比利时", "布吕赫": "比利时",
            "卢森堡市": "卢森堡",
            "苏黎世": "瑞士", "日内瓦": "瑞士", "因特拉肯": "瑞士",
            
            # 南欧
            "罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利",
            "马德里": "西班牙", "巴塞罗那": "西班牙", "塞维利亚": "西班牙",
            "里斯本": "葡萄牙", "波尔图": "葡萄牙",
            "雅典": "希腊", "圣托里尼": "希腊", "米科诺斯": "希腊",
            
            # 北欧
            "斯德哥尔摩": "瑞典", "哥德堡": "瑞典",
            "奥斯陆": "挪威", "卑尔根": "挪威",
            "哥本哈根": "丹麦", "奥胡斯": "丹麦",
            "赫尔辛基": "芬兰", "坦佩雷": "芬兰",
            "雷克雅未克": "冰岛",
            
            # 英国
            "伦敦": "英国", "爱丁堡": "英国", "曼彻斯特": "英国",
        }
        return city_country_mapping.get(city_name, "")

    def _find_region_for_city(self, city_name: str) -> str:
        """根据城市名查找所属地区"""
        city_region_mapping = {
            # 中欧
            "布拉格": "中欧", "布尔诺": "中欧", "库特纳霍拉": "中欧",
            "维也纳": "中欧", "萨尔茨堡": "中欧", "哈尔施塔特": "中欧", "巴德伊舍": "中欧",
            "布达佩斯": "中欧", "德布勒森": "中欧", "圣安德烈": "中欧",
            "布拉迪斯拉发": "中欧",
            
            # 西欧
            "巴黎": "西欧", "里昂": "西欧", "尼斯": "西欧",
            "柏林": "西欧", "慕尼黑": "西欧", "汉堡": "西欧",
            "阿姆斯特丹": "西欧", "鹿特丹": "西欧",
            "布鲁塞尔": "西欧", "安特卫普": "西欧",
            "苏黎世": "西欧", "日内瓦": "西欧",
            
            # 东欧(按你的知识库分类)
            "华沙": "东欧", "克拉科夫": "东欧",
            "莫斯科": "东欧", "圣彼得堡": "东欧",
            
            # 南欧
            "罗马": "南欧", "米兰": "南欧", "威尼斯": "南欧",
            "马德里": "南欧", "巴塞罗那": "南欧",
            "里斯本": "南欧", "波尔图": "南欧",
            "雅典": "南欧", "圣托里尼": "南欧",
            
            # 北欧
            "斯德哥尔摩": "北欧", "哥德堡": "北欧",
            "奥斯陆": "北欧", "卑尔根": "北欧",
            "哥本哈根": "北欧", "赫尔辛基": "北欧",
            "雷克雅未克": "北欧",
        }
        return city_region_mapping.get(city_name, "")

    def get_knowledge_by_destination(self, destination: str) -> dict:
        """根据目的地获取结构化的知识信息"""
        relevant_items = self.search(destination)
        
        if not relevant_items:
            log.warning(f"⚠️ 未找到关于 '{destination}' 的知识")
            return {}
        
        # 合并所有相关知识
        merged_knowledge = {
            "destination_info": {},
            "budget_analysis": {},
            "detailed_itinerary": [],
            "professional_insights": {}
        }
        
        for item in relevant_items:
            knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
            
            # 合并目的地信息
            if 'destination_info' in knowledge:
                dest_info = knowledge['destination_info']
                merged_knowledge['destination_info'].update(dest_info)
            
            # 使用最详细的预算分析
            if 'budget_analysis' in knowledge:
                if not merged_knowledge['budget_analysis'] or len(knowledge['budget_analysis']) > len(merged_knowledge['budget_analysis']):
                    merged_knowledge['budget_analysis'] = knowledge['budget_analysis']
            
            # 合并行程建议
            if 'detailed_itinerary' in knowledge:
                merged_knowledge['detailed_itinerary'].extend(knowledge['detailed_itinerary'])
            
            # 合并专业洞察
            if 'professional_insights' in knowledge:
                for key, value in knowledge['professional_insights'].items():
                    if key not in merged_knowledge['professional_insights']:
                        merged_knowledge['professional_insights'][key] = value
                    elif isinstance(value, list):
                        # 合并列表,去重
                        existing = merged_knowledge['professional_insights'][key]
                        if isinstance(existing, list):
                            merged_knowledge['professional_insights'][key] = list(set(existing + value))
        
        # 去重行程建议
        if merged_knowledge['detailed_itinerary']:
            seen_days = set()
            unique_itinerary = []
            for day_plan in merged_knowledge['detailed_itinerary']:
                day_key = (day_plan.get('day_number', 0), day_plan.get('location', ''))
                if day_key not in seen_days:
                    seen_days.add(day_key)
                    unique_itinerary.append(day_plan)
            merged_knowledge['detailed_itinerary'] = unique_itinerary
        
        log.info(f"📚 为 '{destination}' 合并了 {len(relevant_items)} 条知识记录")
        return merged_knowledge

    def get_similar_destinations(self, destination: str, limit: int = 5) -> list:
        """获取相似的目的地推荐"""
        similar_destinations = []
        
        # 找到目标城市的国家和地区
        target_country = self._find_country_for_city(destination)
        target_region = self._find_region_for_city(destination)
        
        # 优先推荐同国家的其他城市
        if target_country and target_country in self.country_index:
            for idx in self.country_index[target_country]:
                knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
                dest_info = knowledge.get('destination_info', {})
                destinations = dest_info.get('primary_destinations', [])
                
                for dest in destinations:
                    if dest != destination and dest not in similar_destinations:
                        similar_destinations.append(dest)
                        if len(similar_destinations) >= limit:
                            return similar_destinations
        
        # 然后推荐同地区的城市
        if target_region and target_region in self.region_index and len(similar_destinations) < limit:
            for idx in self.region_index[target_region]:
                knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
                dest_info = knowledge.get('destination_info', {})
                destinations = dest_info.get('primary_destinations', [])
                
                for dest in destinations:
                    if dest != destination and dest not in similar_destinations:
                        similar_destinations.append(dest)
                        if len(similar_destinations) >= limit:
                            return similar_destinations
        
        return similar_destinations

    def get_statistics(self) -> dict:
        """获取知识库统计信息"""
        stats = {
            "total_records": len(self.knowledge),
            "cities_covered": len(self.city_index),
            "countries_covered": len(self.country_index),
            "regions_covered": len(self.region_index),
            "cities_by_region": {},
            "popular_cities": []
        }
        
        # 按地区统计城市数量
        for region, indices in self.region_index.items():
            cities_in_region = set()
            for idx in indices:
                knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
                dest_info = knowledge.get('destination_info', {})
                cities_in_region.update(dest_info.get('primary_destinations', []))
            stats["cities_by_region"][region] = len(cities_in_region)
        
        # 找出出现频率最高的城市
        city_frequency = {}
        for city, indices in self.city_index.items():
            city_frequency[city] = len(indices)
        
        # 按出现频率排序
        sorted_cities = sorted(city_frequency.items(), key=lambda x: x[1], reverse=True)
        stats["popular_cities"] = sorted_cities[:10]  # 前10个最热门城市
        
        return stats