ginipick commited on
Commit
3ab6484
Β·
verified Β·
1 Parent(s): eaabaee

Update app-backup3.py

Browse files
Files changed (1) hide show
  1. app-backup3.py +443 -19
app-backup3.py CHANGED
@@ -95,6 +95,90 @@ def load_agriculture_dataset():
95
  logging.error(f"Error loading Kaggle dataset: {e}")
96
  return None
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def get_dataset_summary():
99
  """Generate a summary of the available agriculture datasets"""
100
  dataset_info = load_agriculture_dataset()
@@ -228,7 +312,287 @@ def analyze_dataset_for_query(query):
228
  analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
229
 
230
  return analysis_result
231
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  # ──────────────────────────────── System Prompt ─────────────────────────
233
  def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
234
  """
@@ -245,6 +609,8 @@ def get_system_prompt(mode="price_forecast", style="professional", include_searc
245
  3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
246
  4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
247
  5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
 
 
248
 
249
  μ€‘μš” κ°€μ΄λ“œλΌμΈ:
250
  - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
@@ -305,12 +671,41 @@ def get_system_prompt(mode="price_forecast", style="professional", include_searc
305
  }
306
 
307
  dataset_guide = """
308
- UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 ν™œμš© μ§€μΉ¨:
309
- - 제곡된 데이터셋 뢄석 κ²°κ³Όλ₯Ό μ‘λ‹΅μ˜ μ£Όμš” 근거둜 μ‚¬μš©ν•˜μ„Έμš”
 
 
310
  - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
311
  - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
312
  - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
313
  - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  """
315
 
316
  search_guide = """
@@ -346,8 +741,14 @@ UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 ν™œμš© μ§€μΉ¨:
346
  if style in style_guides:
347
  final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
348
 
349
- # Always include dataset guide
350
  final_prompt += f"\n\n{dataset_guide}"
 
 
 
 
 
 
351
 
352
  if include_search_results:
353
  final_prompt += f"\n\n{search_guide}"
@@ -381,8 +782,6 @@ def brave_search(query: str, count: int = 10):
381
  r.raise_for_status()
382
  data = r.json()
383
 
384
- logging.info(f"Brave search result data structure: {list(data.keys())}")
385
-
386
  raw = data.get("web", {}).get("results") or data.get("results", [])
387
  if not raw:
388
  logging.warning(f"No Brave search results found. Response: {data}")
@@ -400,7 +799,6 @@ def brave_search(query: str, count: int = 10):
400
  "displayed_link": host
401
  })
402
 
403
- logging.info(f"Brave search success: {len(arts)} results")
404
  return arts
405
 
406
  except Exception as e:
@@ -434,7 +832,6 @@ def brave_video_search(query: str, count: int = 3):
434
  "source": vid.get("provider", {}).get("name", "Unknown source")
435
  })
436
 
437
- logging.info(f"Brave video search success: {len(results)} results")
438
  return results
439
 
440
  except Exception as e:
@@ -469,7 +866,6 @@ def brave_news_search(query: str, count: int = 3):
469
  "date": news.get("age", "Unknown date")
470
  })
471
 
472
- logging.info(f"Brave news search success: {len(results)} results")
473
  return results
474
 
475
  except Exception as e:
@@ -716,6 +1112,8 @@ def agricultural_price_forecast_app():
716
  st.session_state.analysis_mode = "price_forecast"
717
  if "response_style" not in st.session_state:
718
  st.session_state.response_style = "professional"
 
 
719
 
720
  sb = st.sidebar
721
  sb.title("뢄석 μ„€μ •")
@@ -748,6 +1146,17 @@ def agricultural_price_forecast_app():
748
  key="response_style"
749
  )
750
 
 
 
 
 
 
 
 
 
 
 
 
751
  # Example queries
752
  sb.subheader("μ˜ˆμ‹œ 질문")
753
  c1, c2, c3 = sb.columns(3)
@@ -929,18 +1338,27 @@ def process_input(query: str, uploaded_files):
929
  status.update(label="농업 데이터셋 뢄석 쀑...")
930
  with st.spinner("데이터셋 뢄석 쀑..."):
931
  dataset_analysis = analyze_dataset_for_query(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932
 
933
  if use_web_search:
934
- status.update(label="μ›Ή 검색 μˆ˜ν–‰ 쀑...")
935
- with st.spinner("μ›Ή 검색 쀑..."):
936
  search_content = do_web_search(keywords(query, top=5))
937
-
938
- try:
939
- status.update(label="λΉ„λ””μ˜€ 검색 쀑...")
940
  video_results = brave_video_search(query, 2)
941
- news_results = brave_news_search(query, 3)
942
- except Exception as search_err:
943
- logging.error(f"λ―Έλ””μ–΄ 검색 였λ₯˜: {search_err}")
944
 
945
  file_content = None
946
  if has_uploaded_files:
@@ -972,8 +1390,14 @@ def process_input(query: str, uploaded_files):
972
  ]
973
 
974
  user_content = query
975
- # 항상 데이터셋 뢄석 κ²°κ³Ό 포함
976
  user_content += "\n\n" + dataset_analysis
 
 
 
 
 
 
977
 
978
  if search_content:
979
  user_content += "\n\n" + search_content
@@ -1077,4 +1501,4 @@ def main():
1077
  agricultural_price_forecast_app()
1078
 
1079
  if __name__ == "__main__":
1080
- main()
 
95
  logging.error(f"Error loading Kaggle dataset: {e}")
96
  return None
97
 
98
+ # New function to load Advanced Soybean Agricultural Dataset
99
+ @st.cache_resource
100
+ def load_soybean_dataset():
101
+ """Download and load the Advanced Soybean Agricultural Dataset from Kaggle"""
102
+ try:
103
+ path = kagglehub.dataset_download("wisam1985/advanced-soybean-agricultural-dataset-2025")
104
+ logging.info(f"Soybean dataset downloaded to: {path}")
105
+
106
+ available_files = []
107
+ for root, dirs, files in os.walk(path):
108
+ for file in files:
109
+ if file.endswith(('.csv', '.xlsx')):
110
+ file_path = os.path.join(root, file)
111
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
112
+ available_files.append({
113
+ 'name': file,
114
+ 'path': file_path,
115
+ 'size_mb': round(file_size, 2)
116
+ })
117
+
118
+ return {
119
+ 'base_path': path,
120
+ 'files': available_files
121
+ }
122
+ except Exception as e:
123
+ logging.error(f"Error loading Soybean dataset: {e}")
124
+ return None
125
+
126
+ # Function to load Crop Recommendation Dataset
127
+ @st.cache_resource
128
+ def load_crop_recommendation_dataset():
129
+ """Download and load the Soil and Environmental Variables Crop Recommendation Dataset"""
130
+ try:
131
+ path = kagglehub.dataset_download("agriinnovate/agricultural-crop-dataset")
132
+ logging.info(f"Crop recommendation dataset downloaded to: {path}")
133
+
134
+ available_files = []
135
+ for root, dirs, files in os.walk(path):
136
+ for file in files:
137
+ if file.endswith(('.csv', '.xlsx')):
138
+ file_path = os.path.join(root, file)
139
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
140
+ available_files.append({
141
+ 'name': file,
142
+ 'path': file_path,
143
+ 'size_mb': round(file_size, 2)
144
+ })
145
+
146
+ return {
147
+ 'base_path': path,
148
+ 'files': available_files
149
+ }
150
+ except Exception as e:
151
+ logging.error(f"Error loading Crop recommendation dataset: {e}")
152
+ return None
153
+
154
+ # Function to load Climate Change Impact Dataset
155
+ @st.cache_resource
156
+ def load_climate_impact_dataset():
157
+ """Download and load the Climate Change Impact on Agriculture Dataset"""
158
+ try:
159
+ path = kagglehub.dataset_download("waqi786/climate-change-impact-on-agriculture")
160
+ logging.info(f"Climate impact dataset downloaded to: {path}")
161
+
162
+ available_files = []
163
+ for root, dirs, files in os.walk(path):
164
+ for file in files:
165
+ if file.endswith(('.csv', '.xlsx')):
166
+ file_path = os.path.join(root, file)
167
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
168
+ available_files.append({
169
+ 'name': file,
170
+ 'path': file_path,
171
+ 'size_mb': round(file_size, 2)
172
+ })
173
+
174
+ return {
175
+ 'base_path': path,
176
+ 'files': available_files
177
+ }
178
+ except Exception as e:
179
+ logging.error(f"Error loading Climate impact dataset: {e}")
180
+ return None
181
+
182
  def get_dataset_summary():
183
  """Generate a summary of the available agriculture datasets"""
184
  dataset_info = load_agriculture_dataset()
 
312
  analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
313
 
314
  return analysis_result
315
+
316
+ # Function to analyze crop recommendation dataset
317
+ def analyze_crop_recommendation_dataset(query):
318
+ """Find and analyze crop recommendation data based on the query"""
319
+ try:
320
+ dataset_info = load_crop_recommendation_dataset()
321
+ if not dataset_info or not dataset_info['files']:
322
+ return "μž‘λ¬Ό μΆ”μ²œ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
323
+
324
+ analysis_result = "# ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ 데이터 뢄석\n\n"
325
+
326
+ # Process main files
327
+ for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
328
+ try:
329
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
330
+
331
+ if file_info['name'].endswith('.csv'):
332
+ df = pd.read_csv(file_info['path'])
333
+ elif file_info['name'].endswith('.xlsx'):
334
+ df = pd.read_excel(file_info['path'])
335
+ else:
336
+ continue
337
+
338
+ # Basic dataset info
339
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
340
+ analysis_result += f"- ν¬ν•¨λœ μž‘λ¬Ό μ’…λ₯˜: "
341
+
342
+ # Check if crop column exists
343
+ crop_cols = [col for col in df.columns if 'crop' in col.lower() or 'μž‘λ¬Ό' in col.lower()]
344
+ if crop_cols:
345
+ main_crop_col = crop_cols[0]
346
+ unique_crops = df[main_crop_col].unique()
347
+ analysis_result += f"{len(unique_crops)}μ’… ({', '.join(str(c) for c in unique_crops[:10])})\n\n"
348
+ else:
349
+ analysis_result += "μž‘λ¬Ό 정보 열을 찾을 수 μ—†μŒ\n\n"
350
+
351
+ # Extract environmental factors
352
+ env_factors = [col for col in df.columns if col.lower() not in ['crop', 'label', 'id', 'index']]
353
+ if env_factors:
354
+ analysis_result += f"- 고렀된 ν™˜κ²½ μš”μ†Œ: {', '.join(env_factors)}\n\n"
355
+
356
+ # Sample data
357
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
358
+ analysis_result += df.head(5).to_markdown() + "\n\n"
359
+
360
+ # Summary statistics for environmental factors
361
+ if env_factors:
362
+ numeric_factors = df[env_factors].select_dtypes(include=['number']).columns
363
+ if len(numeric_factors) > 0:
364
+ analysis_result += "### ν™˜κ²½ μš”μ†Œ 톡계:\n\n"
365
+ stats_df = df[numeric_factors].describe().round(2)
366
+ analysis_result += stats_df.to_markdown() + "\n\n"
367
+
368
+ # Check for query-specific crops
369
+ query_terms = query.lower().split()
370
+ relevant_crops = []
371
+
372
+ if crop_cols:
373
+ for crop in df[main_crop_col].unique():
374
+ crop_str = str(crop).lower()
375
+ if any(term in crop_str for term in query_terms):
376
+ relevant_crops.append(crop)
377
+
378
+ if relevant_crops:
379
+ analysis_result += f"### 쿼리 κ΄€λ ¨ μž‘λ¬Ό 뢄석: {', '.join(str(c) for c in relevant_crops)}\n\n"
380
+ for crop in relevant_crops[:3]: # Limit to 3 crops
381
+ crop_data = df[df[main_crop_col] == crop]
382
+ analysis_result += f"#### {crop} μž‘λ¬Ό μš”μ•½:\n\n"
383
+ analysis_result += f"- μƒ˜ν”Œ 수: {len(crop_data)}개\n"
384
+
385
+ if len(numeric_factors) > 0:
386
+ crop_stats = crop_data[numeric_factors].describe().round(2)
387
+ analysis_result += f"- 평균 ν™˜κ²½ 쑰건:\n"
388
+ for factor in numeric_factors[:5]: # Limit to 5 factors
389
+ analysis_result += f" * {factor}: {crop_stats.loc['mean', factor]}\n"
390
+ analysis_result += "\n"
391
+
392
+ except Exception as e:
393
+ logging.error(f"Error analyzing crop recommendation file {file_info['name']}: {e}")
394
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
395
+
396
+ analysis_result += "## μž‘λ¬Ό μΆ”μ²œ μΈμ‚¬μ΄νŠΈ\n\n"
397
+ analysis_result += "ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μ£Όμš” μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
398
+ analysis_result += "1. μ§€μ—­ ν™˜κ²½μ— μ ν•©ν•œ μž‘λ¬Ό μΆ”μ²œ\n"
399
+ analysis_result += "2. μž‘λ¬Ό 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈ\n"
400
+ analysis_result += "3. 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ 졜적의 μž‘λ¬Ό 선택 κΈ°μ€€\n\n"
401
+
402
+ return analysis_result
403
+
404
+ except Exception as e:
405
+ logging.error(f"Crop recommendation dataset analysis error: {e}")
406
+ return "μž‘λ¬Ό μΆ”μ²œ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
407
+
408
+ # Function to analyze climate impact dataset
409
+ def analyze_climate_impact_dataset(query):
410
+ """Find and analyze climate impact on agriculture data based on the query"""
411
+ try:
412
+ dataset_info = load_climate_impact_dataset()
413
+ if not dataset_info or not dataset_info['files']:
414
+ return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
415
+
416
+ analysis_result = "# κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석\n\n"
417
+
418
+ # Process main files
419
+ for file_info in dataset_info['files'][:2]: # Limit to first 2 files
420
+ try:
421
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
422
+
423
+ if file_info['name'].endswith('.csv'):
424
+ df = pd.read_csv(file_info['path'])
425
+ elif file_info['name'].endswith('.xlsx'):
426
+ df = pd.read_excel(file_info['path'])
427
+ else:
428
+ continue
429
+
430
+ # Basic dataset info
431
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
432
+
433
+ # Check for region column
434
+ region_cols = [col for col in df.columns if 'region' in col.lower() or 'country' in col.lower() or 'μ§€μ—­' in col.lower()]
435
+ if region_cols:
436
+ main_region_col = region_cols[0]
437
+ regions = df[main_region_col].unique()
438
+ analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(regions)}개 ({', '.join(str(r) for r in regions[:5])})\n"
439
+
440
+ # Identify climate and crop related columns
441
+ climate_cols = [col for col in df.columns if any(term in col.lower() for term in
442
+ ['temp', 'rainfall', 'precipitation', 'climate', 'weather', '기온', 'κ°•μˆ˜λŸ‰'])]
443
+ crop_cols = [col for col in df.columns if any(term in col.lower() for term in
444
+ ['yield', 'production', 'crop', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
445
+
446
+ if climate_cols:
447
+ analysis_result += f"- κΈ°ν›„ κ΄€λ ¨ λ³€μˆ˜: {', '.join(climate_cols)}\n"
448
+ if crop_cols:
449
+ analysis_result += f"- μž‘λ¬Ό κ΄€λ ¨ λ³€μˆ˜: {', '.join(crop_cols)}\n\n"
450
+
451
+ # Sample data
452
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
453
+ analysis_result += df.head(5).to_markdown() + "\n\n"
454
+
455
+ # Time series pattern if available
456
+ year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
457
+ if year_cols:
458
+ analysis_result += "### μ‹œκ³„μ—΄ κΈ°ν›„ 영ν–₯ νŒ¨ν„΄:\n\n"
459
+ analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ κΈ°ν›„ 변화와 농업 생산성 κ°„μ˜ 관계λ₯Ό 뢄석할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
460
+
461
+ # Statistical summary of key variables
462
+ key_vars = climate_cols + crop_cols
463
+ numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
464
+ if len(numeric_vars) > 0:
465
+ analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
466
+ stats_df = df[numeric_vars].describe().round(2)
467
+ analysis_result += stats_df.to_markdown() + "\n\n"
468
+
469
+ # Check for correlations between climate and crop variables
470
+ if len(climate_cols) > 0 and len(crop_cols) > 0:
471
+ numeric_climate = df[climate_cols].select_dtypes(include=['number']).columns
472
+ numeric_crop = df[crop_cols].select_dtypes(include=['number']).columns
473
+
474
+ if len(numeric_climate) > 0 and len(numeric_crop) > 0:
475
+ analysis_result += "### 기후와 μž‘λ¬Ό 생산 κ°„μ˜ 상관관계:\n\n"
476
+ try:
477
+ corr_vars = list(numeric_climate)[:2] + list(numeric_crop)[:2] # Limit to 2 of each type
478
+ corr_df = df[corr_vars].corr().round(3)
479
+ analysis_result += corr_df.to_markdown() + "\n\n"
480
+ analysis_result += "μœ„ 상관관계 ν‘œλŠ” κΈ°ν›„ λ³€μˆ˜μ™€ μž‘λ¬Ό 생산성 κ°„μ˜ 관계 강도λ₯Ό λ³΄μ—¬μ€λ‹ˆλ‹€.\n\n"
481
+ except:
482
+ analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
483
+
484
+ except Exception as e:
485
+ logging.error(f"Error analyzing climate impact file {file_info['name']}: {e}")
486
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
487
+
488
+ analysis_result += "## κΈ°ν›„ λ³€ν™” 영ν–₯ μΈμ‚¬μ΄νŠΈ\n\n"
489
+ analysis_result += "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
490
+ analysis_result += "1. 기온 변화에 λ”°λ₯Έ μž‘λ¬Ό 생산성 변동 νŒ¨ν„΄\n"
491
+ analysis_result += "2. κ°•μˆ˜λŸ‰ λ³€ν™”κ°€ 농업 μˆ˜ν™•λŸ‰μ— λ―ΈμΉ˜λŠ” 영ν–₯\n"
492
+ analysis_result += "3. κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ 농업 μ „λž΅ μ œμ•ˆ\n"
493
+ analysis_result += "4. 지역별 κΈ°ν›„ μ·¨μ•½μ„± 및 적응 λ°©μ•ˆ\n\n"
494
+
495
+ return analysis_result
496
+
497
+ except Exception as e:
498
+ logging.error(f"Climate impact dataset analysis error: {e}")
499
+ return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
500
+
501
+ # Function to analyze soybean dataset if selected
502
+ def analyze_soybean_dataset(query):
503
+ """Find and analyze soybean agriculture data based on the query"""
504
+ try:
505
+ dataset_info = load_soybean_dataset()
506
+ if not dataset_info or not dataset_info['files']:
507
+ return "λŒ€λ‘ 농업 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
508
+
509
+ analysis_result = "# κ³ κΈ‰ λŒ€λ‘ 농업 데이터 뢄석\n\n"
510
+
511
+ # Process main files
512
+ for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
513
+ try:
514
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
515
+
516
+ if file_info['name'].endswith('.csv'):
517
+ df = pd.read_csv(file_info['path'])
518
+ elif file_info['name'].endswith('.xlsx'):
519
+ df = pd.read_excel(file_info['path'])
520
+ else:
521
+ continue
522
+
523
+ # Basic file stats
524
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
525
+
526
+ # Check for region/location columns
527
+ location_cols = [col for col in df.columns if any(term in col.lower() for term in
528
+ ['region', 'location', 'area', 'country', 'μ§€μ—­'])]
529
+ if location_cols:
530
+ main_loc_col = location_cols[0]
531
+ locations = df[main_loc_col].unique()
532
+ analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(locations)}개 ({', '.join(str(loc) for loc in locations[:5])})\n"
533
+
534
+ # Identify yield and production columns
535
+ yield_cols = [col for col in df.columns if any(term in col.lower() for term in
536
+ ['yield', 'production', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
537
+ if yield_cols:
538
+ analysis_result += f"- 생산성 κ΄€λ ¨ λ³€μˆ˜: {', '.join(yield_cols)}\n"
539
+
540
+ # Identify environmental factors
541
+ env_cols = [col for col in df.columns if any(term in col.lower() for term in
542
+ ['temp', 'rainfall', 'soil', 'fertilizer', 'nutrient', 'irrigation',
543
+ '기온', 'κ°•μˆ˜λŸ‰', 'ν† μ–‘', 'λΉ„λ£Œ', 'κ΄€κ°œ'])]
544
+ if env_cols:
545
+ analysis_result += f"- ν™˜κ²½ κ΄€λ ¨ λ³€μˆ˜: {', '.join(env_cols)}\n\n"
546
+
547
+ # Sample data
548
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
549
+ analysis_result += df.head(5).to_markdown() + "\n\n"
550
+
551
+ # Statistical summary of key variables
552
+ key_vars = yield_cols + env_cols
553
+ numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
554
+ if len(numeric_vars) > 0:
555
+ analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
556
+ stats_df = df[numeric_vars].describe().round(2)
557
+ analysis_result += stats_df.to_markdown() + "\n\n"
558
+
559
+ # Time series analysis if possible
560
+ year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
561
+ if year_cols:
562
+ analysis_result += "### μ‹œκ³„μ—΄ 생산성 νŒ¨ν„΄:\n\n"
563
+ analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ λŒ€λ‘ μƒμ‚°μ„±μ˜ λ³€ν™”λ₯Ό 좔적할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
564
+
565
+ # Check for correlations between environmental factors and yield
566
+ if len(env_cols) > 0 and len(yield_cols) > 0:
567
+ numeric_env = df[env_cols].select_dtypes(include=['number']).columns
568
+ numeric_yield = df[yield_cols].select_dtypes(include=['number']).columns
569
+
570
+ if len(numeric_env) > 0 and len(numeric_yield) > 0:
571
+ analysis_result += "### ν™˜κ²½ μš”μ†Œμ™€ λŒ€λ‘ 생산성 κ°„μ˜ 상관관계:\n\n"
572
+ try:
573
+ corr_vars = list(numeric_env)[:3] + list(numeric_yield)[:2] # Limit variables
574
+ corr_df = df[corr_vars].corr().round(3)
575
+ analysis_result += corr_df.to_markdown() + "\n\n"
576
+ except:
577
+ analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
578
+
579
+ except Exception as e:
580
+ logging.error(f"Error analyzing soybean file {file_info['name']}: {e}")
581
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
582
+
583
+ analysis_result += "## λŒ€λ‘ 농업 μΈμ‚¬μ΄νŠΈ\n\n"
584
+ analysis_result += "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
585
+ analysis_result += "1. 졜적의 λŒ€λ‘ 생산을 μœ„ν•œ ν™˜κ²½ 쑰건\n"
586
+ analysis_result += "2. 지역별 λŒ€λ‘ 생산성 λ³€ν™” νŒ¨ν„΄\n"
587
+ analysis_result += "3. 생산성 ν–₯상을 μœ„ν•œ 농업 기술 및 접근법\n"
588
+ analysis_result += "4. μ‹œμž₯ μˆ˜μš”μ— λ§žλŠ” λŒ€λ‘ ν’ˆμ’… 선택 κ°€μ΄λ“œ\n\n"
589
+
590
+ return analysis_result
591
+
592
+ except Exception as e:
593
+ logging.error(f"Soybean dataset analysis error: {e}")
594
+ return "λŒ€λ‘ 농업 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
595
+
596
  # ──────────────────────────────── System Prompt ─────────────────────────
597
  def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
598
  """
 
609
  3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
610
  4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
611
  5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
612
+ 6. ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ μΈμ‚¬μ΄νŠΈ 적용
613
+ 7. κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋을 ν†΅ν•œ ν™˜κ²½ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€ 뢄석
614
 
615
  μ€‘μš” κ°€μ΄λ“œλΌμΈ:
616
  - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
 
671
  }
672
 
673
  dataset_guide = """
674
+ 농업 데이터셋 ν™œμš© μ§€μΉ¨:
675
+ - UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 κΈ°λ³Έ λΆ„μ„μ˜ 근거둜 μ‚¬μš©ν•˜μ„Έμš”
676
+ - ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ˜ μΈμ‚¬μ΄νŠΈλ₯Ό μž‘λ¬Ό 선택 및 재배 쑰건 뢄석에 ν†΅ν•©ν•˜μ„Έμš”
677
+ - κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ λ°μ΄ν„°μ…‹μ˜ 정보λ₯Ό 지속 κ°€λŠ₯μ„± 및 미래 전망 뢄석에 ν™œμš©ν•˜μ„Έμš”
678
  - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
679
  - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
680
  - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
681
  - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
682
+ """
683
+
684
+ soybean_guide = """
685
+ κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 ν™œμš© μ§€μΉ¨:
686
+ - λŒ€λ‘ 생산 쑰건 및 μˆ˜ν™•λŸ‰ νŒ¨ν„΄μ„ λ‹€λ₯Έ μž‘λ¬Όκ³Ό λΉ„κ΅ν•˜μ—¬ λΆ„μ„ν•˜μ„Έμš”
687
+ - λŒ€λ‘ λ†μ—…μ˜ 경제적 κ°€μΉ˜μ™€ μ‹œμž₯ κΈ°νšŒμ— λŒ€ν•œ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•˜μ„Έμš”
688
+ - λŒ€λ‘ 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈμ„ κ°•μ‘°ν•˜μ„Έμš”
689
+ - λŒ€λ‘ 재배 기술 ν˜μ‹ κ³Ό μˆ˜μ΅μ„± ν–₯상 λ°©μ•ˆμ„ μ œμ•ˆν•˜μ„Έμš”
690
+ - 지속 κ°€λŠ₯ν•œ λŒ€λ‘ 농업을 μœ„ν•œ μ‹€μ§ˆμ μΈ 접근법을 κ³΅μœ ν•˜μ„Έμš”
691
+ """
692
+
693
+ crop_recommendation_guide = """
694
+ ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ ν™œμš© μ§€μΉ¨:
695
+ - μ§€μ—­ νŠΉμ„±μ— λ§žλŠ” 졜적의 μž‘λ¬Ό 선택 기쀀을 μ œμ‹œν•˜μ„Έμš”
696
+ - ν† μ–‘ 쑰건과 μž‘λ¬Ό 적합성 κ°„μ˜ 상관관계λ₯Ό λΆ„μ„ν•˜μ„Έμš”
697
+ - ν™˜κ²½ λ³€μˆ˜μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 예츑 λͺ¨λΈμ„ ν™œμš©ν•˜μ„Έμš”
698
+ - 농업 생산성과 μˆ˜μ΅μ„± ν–₯상을 μœ„ν•œ μž‘λ¬Ό 선택 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
699
+ - 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ μž‘λ¬Ό λ‹€μ–‘ν™” 접근법을 ꢌμž₯ν•˜μ„Έμš”
700
+ """
701
+
702
+ climate_impact_guide = """
703
+ κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋 ν™œμš© μ§€μΉ¨:
704
+ - κΈ°ν›„ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 λ³€ν™”λ₯Ό μ˜ˆμΈ‘ν•˜μ„Έμš”
705
+ - κΈ°ν›„ μ μ‘ν˜• 농업 기술 및 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
706
+ - 지역별 κΈ°ν›„ μœ„ν—˜ μš”μ†Œμ™€ λŒ€μ‘ λ°©μ•ˆμ„ λΆ„μ„ν•˜μ„Έμš”
707
+ - κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ μž‘λ¬Ό 선택 및 재배 μ‹œκΈ° μ‘°μ • λ°©μ•ˆμ„ μ œμ‹œν•˜μ„Έμš”
708
+ - κΈ°ν›„ λ³€ν™”κ°€ 농산물 가격 및 μ‹œμž₯ 동ν–₯에 λ―ΈμΉ˜λŠ” 영ν–₯을 ν‰κ°€ν•˜μ„Έμš”
709
  """
710
 
711
  search_guide = """
 
741
  if style in style_guides:
742
  final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
743
 
744
+ # Always include dataset guides
745
  final_prompt += f"\n\n{dataset_guide}"
746
+ final_prompt += f"\n\n{crop_recommendation_guide}"
747
+ final_prompt += f"\n\n{climate_impact_guide}"
748
+
749
+ # Conditionally add soybean dataset guide if selected in UI
750
+ if st.session_state.get('use_soybean_dataset', False):
751
+ final_prompt += f"\n\n{soybean_guide}"
752
 
753
  if include_search_results:
754
  final_prompt += f"\n\n{search_guide}"
 
782
  r.raise_for_status()
783
  data = r.json()
784
 
 
 
785
  raw = data.get("web", {}).get("results") or data.get("results", [])
786
  if not raw:
787
  logging.warning(f"No Brave search results found. Response: {data}")
 
799
  "displayed_link": host
800
  })
801
 
 
802
  return arts
803
 
804
  except Exception as e:
 
832
  "source": vid.get("provider", {}).get("name", "Unknown source")
833
  })
834
 
 
835
  return results
836
 
837
  except Exception as e:
 
866
  "date": news.get("age", "Unknown date")
867
  })
868
 
 
869
  return results
870
 
871
  except Exception as e:
 
1112
  st.session_state.analysis_mode = "price_forecast"
1113
  if "response_style" not in st.session_state:
1114
  st.session_state.response_style = "professional"
1115
+ if "use_soybean_dataset" not in st.session_state:
1116
+ st.session_state.use_soybean_dataset = False
1117
 
1118
  sb = st.sidebar
1119
  sb.title("뢄석 μ„€μ •")
 
1146
  key="response_style"
1147
  )
1148
 
1149
+ # Dataset selection
1150
+ sb.subheader("데이터셋 선택")
1151
+ sb.checkbox(
1152
+ "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 μ‚¬μš©",
1153
+ key="use_soybean_dataset",
1154
+ help="λŒ€λ‘(콩) κ΄€λ ¨ μ§ˆλ¬Έμ— 더 μ •ν™•ν•œ 정보λ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€."
1155
+ )
1156
+
1157
+ # Always enabled datasets info
1158
+ sb.info("κΈ°λ³Έ ν™œμ„±ν™”λœ 데이터셋:\n- UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계\n- ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ\n- κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯")
1159
+
1160
  # Example queries
1161
  sb.subheader("μ˜ˆμ‹œ 질문")
1162
  c1, c2, c3 = sb.columns(3)
 
1338
  status.update(label="농업 데이터셋 뢄석 쀑...")
1339
  with st.spinner("데이터셋 뢄석 쀑..."):
1340
  dataset_analysis = analyze_dataset_for_query(query)
1341
+
1342
+ # 항상 ν¬ν•¨λ˜λŠ” μΆ”κ°€ 데이터셋 뢄석
1343
+ crop_recommendation_analysis = analyze_crop_recommendation_dataset(query)
1344
+ climate_impact_analysis = analyze_climate_impact_dataset(query)
1345
+
1346
+ #
1347
+
1348
+
1349
+ # 쑰건뢀 데이터셋 뢄석
1350
+ soybean_analysis = None
1351
+ if st.session_state.use_soybean_dataset:
1352
+ status.update(label="λŒ€λ‘ 농업 데이터셋 뢄석 쀑...")
1353
+ with st.spinner("λŒ€λ‘ 데이터셋 뢄석 쀑..."):
1354
+ soybean_analysis = analyze_soybean_dataset(query)
1355
 
1356
  if use_web_search:
1357
+ # μ›Ή 검색 과정은 λ…ΈμΆœν•˜μ§€ μ•Šκ³  쑰용히 μ§„ν–‰
1358
+ with st.spinner("정보 μˆ˜μ§‘ 쀑..."):
1359
  search_content = do_web_search(keywords(query, top=5))
 
 
 
1360
  video_results = brave_video_search(query, 2)
1361
+ news_results = brave_news_search(query, 3)
 
 
1362
 
1363
  file_content = None
1364
  if has_uploaded_files:
 
1390
  ]
1391
 
1392
  user_content = query
1393
+ # 항상 κΈ°λ³Έ 데이터셋 뢄석 κ²°κ³Ό 포함
1394
  user_content += "\n\n" + dataset_analysis
1395
+ user_content += "\n\n" + crop_recommendation_analysis
1396
+ user_content += "\n\n" + climate_impact_analysis
1397
+
1398
+ # 쑰건뢀 데이터셋 κ²°κ³Ό 포함
1399
+ if soybean_analysis:
1400
+ user_content += "\n\n" + soybean_analysis
1401
 
1402
  if search_content:
1403
  user_content += "\n\n" + search_content
 
1501
  agricultural_price_forecast_app()
1502
 
1503
  if __name__ == "__main__":
1504
+ main()