Spaces:
Sleeping
Sleeping
File size: 13,786 Bytes
2b20519 05b4419 2b20519 05b4419 2b20519 05b4419 2b20519 05b4419 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
# modules/knowledge_base.py
import json
from pathlib import Path
from utils.logger import log
class KnowledgeBase:
def __init__(self, file_path: Path = Path("./config/general_travelplan.json")):
self.knowledge = []
self.city_index = {} # 城市索引
self.country_index = {} # 国家索引
self.region_index = {} # 地区索引
with open(file_path, 'r', encoding='utf-8') as f:
self.knowledge = json.load(f).get('clean_knowledge', [])
log.info(f"✅ 知识库加载完成")
def _build_indexes(self):
"""建立快速检索索引"""
for idx, item in enumerate(self.knowledge):
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
if not knowledge:
continue
dest_info = knowledge.get('destination_info', {})
# 建立城市索引
primary_destinations = dest_info.get('primary_destinations', [])
for city in primary_destinations:
if city not in self.city_index:
self.city_index[city] = []
self.city_index[city].append(idx)
# 建立国家索引
countries = dest_info.get('countries', [])
for country in countries:
if country not in self.country_index:
self.country_index[country] = []
self.country_index[country].append(idx)
# 建立地区索引
region_type = dest_info.get('region_type', '')
if region_type:
if region_type not in self.region_index:
self.region_index[region_type] = []
self.region_index[region_type].append(idx)
def search(self, query: str) -> list:
"""搜索知识库中的相关信息"""
relevant_knowledge = []
query_lower = query.lower()
log.info(f"🔍 在知识库中搜索: '{query}'")
# 1. 直接城市匹配
if query in self.city_index:
for idx in self.city_index[query]:
if self.knowledge[idx] not in relevant_knowledge:
relevant_knowledge.append(self.knowledge[idx])
log.info(f"✅ 通过城市直接匹配找到 {len(self.city_index[query])} 条记录")
# 2. 国家匹配
matching_country = self._find_country_for_city(query)
if matching_country and matching_country in self.country_index:
for idx in self.country_index[matching_country]:
if self.knowledge[idx] not in relevant_knowledge:
relevant_knowledge.append(self.knowledge[idx])
log.info(f"✅ 通过国家匹配({matching_country})找到额外记录")
# 3. 地区匹配
matching_region = self._find_region_for_city(query)
if matching_region and matching_region in self.region_index:
for idx in self.region_index[matching_region]:
if self.knowledge[idx] not in relevant_knowledge:
relevant_knowledge.append(self.knowledge[idx])
log.info(f"✅ 通过地区匹配({matching_region})找到额外记录")
# 4. 模糊匹配
if not relevant_knowledge:
log.info("🔍 尝试模糊匹配...")
for item in self.knowledge:
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
dest_info = knowledge.get('destination_info', {})
# 检查所有目的地
primary_destinations = dest_info.get('primary_destinations', [])
for dest in primary_destinations:
if query_lower in dest.lower() or dest.lower() in query_lower:
if item not in relevant_knowledge:
relevant_knowledge.append(item)
log.info(f"✅ 模糊匹配找到: {dest}")
break
log.info(f"📊 搜索完成,共找到 {len(relevant_knowledge)} 条相关记录")
return relevant_knowledge
def _find_country_for_city(self, city_name: str) -> str:
"""根据城市名查找所属国家"""
city_country_mapping = {
# 中欧
"布拉格": "捷克", "布尔诺": "捷克", "库特纳霍拉": "捷克",
"维也纳": "奥地利", "萨尔茨堡": "奥地利", "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利",
"布达佩斯": "匈牙利", "德布勒森": "匈牙利", "圣安德烈": "匈牙利",
"布拉迪斯拉发": "斯洛伐克",
# 西欧
"巴黎": "法国", "里昂": "法国", "尼斯": "法国", "马赛": "法国",
"柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国",
"阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰",
"布鲁塞尔": "比利时", "安特卫普": "比利时", "布吕赫": "比利时",
"卢森堡市": "卢森堡",
"苏黎世": "瑞士", "日内瓦": "瑞士", "因特拉肯": "瑞士",
# 南欧
"罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利",
"马德里": "西班牙", "巴塞罗那": "西班牙", "塞维利亚": "西班牙",
"里斯本": "葡萄牙", "波尔图": "葡萄牙",
"雅典": "希腊", "圣托里尼": "希腊", "米科诺斯": "希腊",
# 北欧
"斯德哥尔摩": "瑞典", "哥德堡": "瑞典",
"奥斯陆": "挪威", "卑尔根": "挪威",
"哥本哈根": "丹麦", "奥胡斯": "丹麦",
"赫尔辛基": "芬兰", "坦佩雷": "芬兰",
"雷克雅未克": "冰岛",
# 英国
"伦敦": "英国", "爱丁堡": "英国", "曼彻斯特": "英国",
}
return city_country_mapping.get(city_name, "")
def _find_region_for_city(self, city_name: str) -> str:
"""根据城市名查找所属地区"""
city_region_mapping = {
# 中欧
"布拉格": "中欧", "布尔诺": "中欧", "库特纳霍拉": "中欧",
"维也纳": "中欧", "萨尔茨堡": "中欧", "哈尔施塔特": "中欧", "巴德伊舍": "中欧",
"布达佩斯": "中欧", "德布勒森": "中欧", "圣安德烈": "中欧",
"布拉迪斯拉发": "中欧",
# 西欧
"巴黎": "西欧", "里昂": "西欧", "尼斯": "西欧",
"柏林": "西欧", "慕尼黑": "西欧", "汉堡": "西欧",
"阿姆斯特丹": "西欧", "鹿特丹": "西欧",
"布鲁塞尔": "西欧", "安特卫普": "西欧",
"苏黎世": "西欧", "日内瓦": "西欧",
# 东欧(按你的知识库分类)
"华沙": "东欧", "克拉科夫": "东欧",
"莫斯科": "东欧", "圣彼得堡": "东欧",
# 南欧
"罗马": "南欧", "米兰": "南欧", "威尼斯": "南欧",
"马德里": "南欧", "巴塞罗那": "南欧",
"里斯本": "南欧", "波尔图": "南欧",
"雅典": "南欧", "圣托里尼": "南欧",
# 北欧
"斯德哥尔摩": "北欧", "哥德堡": "北欧",
"奥斯陆": "北欧", "卑尔根": "北欧",
"哥本哈根": "北欧", "赫尔辛基": "北欧",
"雷克雅未克": "北欧",
}
return city_region_mapping.get(city_name, "")
def get_knowledge_by_destination(self, destination: str) -> dict:
"""根据目的地获取结构化的知识信息"""
relevant_items = self.search(destination)
if not relevant_items:
log.warning(f"⚠️ 未找到关于 '{destination}' 的知识")
return {}
# 合并所有相关知识
merged_knowledge = {
"destination_info": {},
"budget_analysis": {},
"detailed_itinerary": [],
"professional_insights": {}
}
for item in relevant_items:
knowledge = item.get('knowledge', {}).get('travel_knowledge', {})
# 合并目的地信息
if 'destination_info' in knowledge:
dest_info = knowledge['destination_info']
merged_knowledge['destination_info'].update(dest_info)
# 使用最详细的预算分析
if 'budget_analysis' in knowledge:
if not merged_knowledge['budget_analysis'] or len(knowledge['budget_analysis']) > len(merged_knowledge['budget_analysis']):
merged_knowledge['budget_analysis'] = knowledge['budget_analysis']
# 合并行程建议
if 'detailed_itinerary' in knowledge:
merged_knowledge['detailed_itinerary'].extend(knowledge['detailed_itinerary'])
# 合并专业洞察
if 'professional_insights' in knowledge:
for key, value in knowledge['professional_insights'].items():
if key not in merged_knowledge['professional_insights']:
merged_knowledge['professional_insights'][key] = value
elif isinstance(value, list):
# 合并列表,去重
existing = merged_knowledge['professional_insights'][key]
if isinstance(existing, list):
merged_knowledge['professional_insights'][key] = list(set(existing + value))
# 去重行程建议
if merged_knowledge['detailed_itinerary']:
seen_days = set()
unique_itinerary = []
for day_plan in merged_knowledge['detailed_itinerary']:
day_key = (day_plan.get('day_number', 0), day_plan.get('location', ''))
if day_key not in seen_days:
seen_days.add(day_key)
unique_itinerary.append(day_plan)
merged_knowledge['detailed_itinerary'] = unique_itinerary
log.info(f"📚 为 '{destination}' 合并了 {len(relevant_items)} 条知识记录")
return merged_knowledge
def get_similar_destinations(self, destination: str, limit: int = 5) -> list:
"""获取相似的目的地推荐"""
similar_destinations = []
# 找到目标城市的国家和地区
target_country = self._find_country_for_city(destination)
target_region = self._find_region_for_city(destination)
# 优先推荐同国家的其他城市
if target_country and target_country in self.country_index:
for idx in self.country_index[target_country]:
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
dest_info = knowledge.get('destination_info', {})
destinations = dest_info.get('primary_destinations', [])
for dest in destinations:
if dest != destination and dest not in similar_destinations:
similar_destinations.append(dest)
if len(similar_destinations) >= limit:
return similar_destinations
# 然后推荐同地区的城市
if target_region and target_region in self.region_index and len(similar_destinations) < limit:
for idx in self.region_index[target_region]:
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
dest_info = knowledge.get('destination_info', {})
destinations = dest_info.get('primary_destinations', [])
for dest in destinations:
if dest != destination and dest not in similar_destinations:
similar_destinations.append(dest)
if len(similar_destinations) >= limit:
return similar_destinations
return similar_destinations
def get_statistics(self) -> dict:
"""获取知识库统计信息"""
stats = {
"total_records": len(self.knowledge),
"cities_covered": len(self.city_index),
"countries_covered": len(self.country_index),
"regions_covered": len(self.region_index),
"cities_by_region": {},
"popular_cities": []
}
# 按地区统计城市数量
for region, indices in self.region_index.items():
cities_in_region = set()
for idx in indices:
knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {})
dest_info = knowledge.get('destination_info', {})
cities_in_region.update(dest_info.get('primary_destinations', []))
stats["cities_by_region"][region] = len(cities_in_region)
# 找出出现频率最高的城市
city_frequency = {}
for city, indices in self.city_index.items():
city_frequency[city] = len(indices)
# 按出现频率排序
sorted_cities = sorted(city_frequency.items(), key=lambda x: x[1], reverse=True)
stats["popular_cities"] = sorted_cities[:10] # 前10个最热门城市
return stats |