ginipick commited on
Commit
f77229e
Β·
verified Β·
1 Parent(s): 1710ad7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +444 -232
app.py CHANGED
@@ -6,11 +6,12 @@ from io import BytesIO
6
  from PIL import Image
7
 
8
  import streamlit as st
9
- from openai import OpenAI # OpenAI 라이브러리
10
 
11
  from gradio_client import Client
12
  import pandas as pd
13
  import PyPDF2 # For handling PDF files
 
14
 
15
  # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
16
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
@@ -20,28 +21,32 @@ BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
20
  BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
21
  IMAGE_API_URL = "http://211.233.58.201:7896"
22
  MAX_TOKENS = 7999
23
-
24
- # Brave Search modes and style definitions (in English)
25
- SEARCH_MODES = {
26
- "comprehensive": "Comprehensive answer with multiple sources",
27
- "academic": "Academic and research-focused results",
28
- "news": "Latest news and current events",
29
- "technical": "Technical and specialized information",
30
- "educational": "Educational and learning resources"
 
 
 
 
31
  }
32
 
33
  RESPONSE_STYLES = {
34
- "professional": "Professional and formal tone",
35
- "casual": "Friendly and conversational tone",
36
- "simple": "Simple and easy to understand",
37
- "detailed": "Detailed and thorough explanations"
38
  }
39
 
40
  # Example search queries
41
  EXAMPLE_QUERIES = {
42
- "example1": "What are the latest developments in quantum computing?",
43
- "example2": "How does climate change affect biodiversity in tropical rainforests?",
44
- "example3": "What are the economic implications of artificial intelligence in the job market?"
45
  }
46
 
47
  # ──────────────────────────────── Logging ────────────────────────────────
@@ -60,105 +65,289 @@ def get_openai_client():
60
  timeout=60.0,
61
  max_retries=3
62
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # ──────────────────────────────── System Prompt ─────────────────────────
65
- def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
66
  """
67
- Generate a system prompt for the 'Perplexity Clone' interface based on:
68
- - The selected search mode and style
69
- - Guidelines for using web search results and uploaded files
70
  """
71
- comprehensive_prompt = """
72
- You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
73
-
74
- Your task is to:
75
- 1. Thoroughly analyze the user's query
76
- 2. Provide a clear, well-structured answer integrating information from multiple sources
77
- 3. Include relevant videos, and links in your response
78
- 4. Format your answer with proper headings, bullet points, and sections
79
- 5. Cite sources inline and provide a references section at the end
80
-
81
- Important guidelines:
82
- - Organize information logically with clear section headings
83
- - Use bullet points and numbered lists for clarity
84
- - Include specific, factual information whenever possible
85
- - Provide balanced perspectives on controversial topics
86
- - Display relevant statistics, data, or quotes when appropriate
87
- - Format your response using markdown for readability
88
  """
89
 
90
  mode_prompts = {
91
- "academic": """
92
- Your focus is on providing academic and research-focused responses:
93
- - Prioritize peer-reviewed research and academic sources
94
- - Include citations in a formal academic format
95
- - Discuss methodologies and research limitations where relevant
96
- - Present different scholarly perspectives on the topic
97
- - Use precise, technical language appropriate for an academic audience
 
 
 
 
 
 
 
 
98
  """,
99
- "news": """
100
- Your focus is on providing the latest news and current events:
101
- - Prioritize recent news articles and current information
102
- - Include publication dates for all news sources
103
- - Present multiple perspectives from different news outlets
104
- - Distinguish between facts and opinions/editorial content
105
- - Update information with the most recent developments
106
  """,
107
- "technical": """
108
- Your focus is on providing technical and specialized information:
109
- - Use precise technical terminology appropriate to the field
110
- - Include code snippets, formulas, or technical diagrams where relevant
111
- - Break down complex concepts into step-by-step explanations
112
- - Reference technical documentation, standards, and best practices
113
- - Consider different technical approaches or methodologies
114
  """,
115
- "educational": """
116
- Your focus is on providing educational and learning resources:
117
- - Structure information in a learning-friendly progression
118
- - Include examples, analogies, and visual explanations
119
- - Highlight key concepts and definitions
120
- - Suggest further learning resources at different difficulty levels
121
- - Present information that's accessible to learners at various levels
122
  """
123
  }
124
 
125
  style_guides = {
126
- "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
127
- "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
128
- "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
129
- "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
130
  }
131
 
 
 
 
 
 
 
 
 
 
132
  search_guide = """
133
- Guidelines for Using Search Results:
134
- - Include source links directly in your response using markdown: [Source Name](URL)
135
- - For each major claim or piece of information, indicate its source
136
- - If sources conflict, explain the different perspectives and their reliability
137
- - Include relevant video links when appropriate by writing: [Video: Title](video_url)
138
- - Format search information into a cohesive, well-structured response
139
- - Include a "References" section at the end listing all major sources with links
 
140
  """
141
 
142
  upload_guide = """
143
- Guidelines for Using Uploaded Files:
144
- - Treat the uploaded files as primary sources for your response
145
- - Extract and highlight key information from files that directly addresses the query
146
- - Quote relevant passages and cite the specific file
147
- - For numerical data in CSV files, consider creating summary statements
148
- - For PDF content, reference specific sections or pages
149
- - Integrate file information seamlessly with web search results
150
- - When information conflicts, prioritize file content over general web results
151
  """
152
 
153
  # Base prompt
154
- if mode == "comprehensive":
155
- final_prompt = comprehensive_prompt
156
- else:
157
- final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
 
158
 
159
  # Style
160
  if style in style_guides:
161
- final_prompt += f"\n\nTone and Style: {style_guides[style]}"
 
 
 
162
 
163
  if include_search_results:
164
  final_prompt += f"\n\n{search_guide}"
@@ -167,23 +356,24 @@ Guidelines for Using Uploaded Files:
167
  final_prompt += f"\n\n{upload_guide}"
168
 
169
  final_prompt += """
170
- \n\nAdditional Formatting Requirements:
171
- - Use markdown headings (## and ###) to organize your response
172
- - Use bold text (**text**) for emphasis on important points
173
- - Include a "Related Questions" section at the end with 3-5 follow-up questions
174
- - Format your response with proper spacing and paragraph breaks
175
- - Make all links clickable by using proper markdown format: [text](url)
 
176
  """
177
  return final_prompt
178
 
179
  # ──────────────────────────────── Brave Search API ────────────────────────
180
  @st.cache_data(ttl=3600)
181
- def brave_search(query: str, count: int = 20):
182
  if not BRAVE_KEY:
183
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
184
 
185
  headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
186
- params = {"q": query, "count": str(count)}
187
 
188
  for attempt in range(3):
189
  try:
@@ -221,12 +411,12 @@ def brave_search(query: str, count: int = 20):
221
  return []
222
 
223
  @st.cache_data(ttl=3600)
224
- def brave_video_search(query: str, count: int = 5):
225
  if not BRAVE_KEY:
226
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
227
 
228
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
229
- params = {"q": query, "count": str(count)}
230
 
231
  for attempt in range(3):
232
  try:
@@ -255,12 +445,12 @@ def brave_video_search(query: str, count: int = 5):
255
  return []
256
 
257
  @st.cache_data(ttl=3600)
258
- def brave_news_search(query: str, count: int = 5):
259
  if not BRAVE_KEY:
260
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
261
 
262
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
263
- params = {"q": query, "count": str(count)}
264
 
265
  for attempt in range(3):
266
  try:
@@ -291,19 +481,19 @@ def brave_news_search(query: str, count: int = 5):
291
 
292
  def mock_results(query: str) -> str:
293
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
294
- return (f"# Fallback Search Content (Generated: {ts})\n\n"
295
- f"The search API request failed or returned no results for '{query}'. "
296
- f"Please generate a response based on any pre-existing knowledge.\n\n"
297
- f"Consider these points:\n\n"
298
- f"- Basic concepts and importance of {query}\n"
299
- f"- Commonly known related statistics or trends\n"
300
- f"- Typical expert opinions on this subject\n"
301
- f"- Questions that readers might have\n\n"
302
- f"Note: This is fallback guidance, not real-time data.\n\n")
303
 
304
  def do_web_search(query: str) -> str:
305
  try:
306
- arts = brave_search(query, 20)
307
  if not arts:
308
  logging.warning("No search results, using fallback content")
309
  return mock_results(query)
@@ -311,26 +501,26 @@ def do_web_search(query: str) -> str:
311
  videos = brave_video_search(query, 2)
312
  news = brave_news_search(query, 3)
313
 
314
- result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources.\n\n"
315
 
316
- result += "## Web Results\n\n"
317
- for a in arts[:10]:
318
- result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
319
- result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
320
 
 
 
 
 
 
 
321
  if videos:
322
- result += "## Video Results\n\n"
323
  for vid in videos:
324
  result += f"### {vid['title']}\n\n"
325
  if vid.get('thumbnail_url'):
326
- result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
327
- result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
328
-
329
- if news:
330
- result += "## News Results\n\n"
331
- for n in news:
332
- result += f"### {n['title']}\n\n{n['description']}\n\n"
333
- result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
334
 
335
  return result
336
 
@@ -348,11 +538,11 @@ def process_text_file(file):
348
  if len(text) > 10000:
349
  text = text[:9700] + "...(truncated)..."
350
 
351
- result = f"## Text File: {file.name}\n\n" + text
352
  return result
353
  except Exception as e:
354
  logging.error(f"Error processing text file: {str(e)}")
355
- return f"Error processing text file: {str(e)}"
356
 
357
  def process_csv_file(file):
358
  try:
@@ -360,41 +550,41 @@ def process_csv_file(file):
360
  file.seek(0)
361
 
362
  df = pd.read_csv(io.BytesIO(content))
363
- result = f"## CSV File: {file.name}\n\n"
364
- result += f"- Rows: {len(df)}\n"
365
- result += f"- Columns: {len(df.columns)}\n"
366
- result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
367
 
368
- result += "### Data Preview\n\n"
369
  preview_df = df.head(10)
370
  try:
371
  markdown_table = preview_df.to_markdown(index=False)
372
  if markdown_table:
373
  result += markdown_table + "\n\n"
374
  else:
375
- result += "Unable to display CSV data.\n\n"
376
  except Exception as e:
377
  logging.error(f"Markdown table conversion error: {e}")
378
- result += "Displaying data as text:\n\n" + str(preview_df) + "\n\n"
379
 
380
  num_cols = df.select_dtypes(include=['number']).columns
381
  if len(num_cols) > 0:
382
- result += "### Basic Statistical Information\n\n"
383
  try:
384
  stats_df = df[num_cols].describe().round(2)
385
  stats_markdown = stats_df.to_markdown()
386
  if stats_markdown:
387
  result += stats_markdown + "\n\n"
388
  else:
389
- result += "Unable to display statistical information.\n\n"
390
  except Exception as e:
391
  logging.error(f"Statistical info conversion error: {e}")
392
- result += "Unable to generate statistical information.\n\n"
393
 
394
  return result
395
  except Exception as e:
396
  logging.error(f"CSV file processing error: {str(e)}")
397
- return f"Error processing CSV file: {str(e)}"
398
 
399
  def process_pdf_file(file):
400
  try:
@@ -404,7 +594,7 @@ def process_pdf_file(file):
404
  pdf_file = io.BytesIO(file_bytes)
405
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
406
 
407
- result = f"## PDF File: {file.name}\n\n- Total pages: {len(reader.pages)}\n\n"
408
 
409
  max_pages = min(5, len(reader.pages))
410
  all_text = ""
@@ -413,40 +603,40 @@ def process_pdf_file(file):
413
  try:
414
  page = reader.pages[i]
415
  page_text = page.extract_text()
416
- current_page_text = f"### Page {i+1}\n\n"
417
  if page_text and len(page_text.strip()) > 0:
418
  if len(page_text) > 1500:
419
- current_page_text += page_text[:1500] + "...(truncated)...\n\n"
420
  else:
421
  current_page_text += page_text + "\n\n"
422
  else:
423
- current_page_text += "(No text could be extracted)\n\n"
424
 
425
  all_text += current_page_text
426
 
427
  if len(all_text) > 8000:
428
- all_text += "...(truncating remaining pages)...\n\n"
429
  break
430
 
431
  except Exception as page_err:
432
  logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
433
- all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
434
 
435
  if len(reader.pages) > max_pages:
436
- all_text += f"\nNote: Only the first {max_pages} pages are shown.\n\n"
437
 
438
- result += "### PDF Content\n\n" + all_text
439
  return result
440
 
441
  except Exception as e:
442
  logging.error(f"PDF file processing error: {str(e)}")
443
- return f"## PDF File: {file.name}\n\nError: {str(e)}\n\nCannot process."
444
 
445
  def process_uploaded_files(files):
446
  if not files:
447
  return None
448
 
449
- result = "# Uploaded File Contents\n\nBelow is the content from the files provided by the user.\n\n"
450
  for file in files:
451
  try:
452
  ext = file.name.split('.')[-1].lower()
@@ -457,10 +647,10 @@ def process_uploaded_files(files):
457
  elif ext == 'pdf':
458
  result += process_pdf_file(file) + "\n\n---\n\n"
459
  else:
460
- result += f"### Unsupported File: {file.name}\n\n---\n\n"
461
  except Exception as e:
462
  logging.error(f"File processing error {file.name}: {e}")
463
- result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
464
 
465
  return result
466
 
@@ -488,8 +678,8 @@ def extract_image_prompt(response_text: str, topic: str):
488
  response = client.chat.completions.create(
489
  model="gpt-4.1-mini",
490
  messages=[
491
- {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
492
- {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
493
  ],
494
  temperature=1,
495
  max_tokens=80,
@@ -498,9 +688,9 @@ def extract_image_prompt(response_text: str, topic: str):
498
  return response.choices[0].message.content.strip()
499
  except Exception as e:
500
  logging.error(f"OpenAI image prompt generation error: {e}")
501
- return f"A professional photo related to {topic}, high quality"
502
 
503
- def md_to_html(md: str, title="Perplexity Clone Response"):
504
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
505
 
506
  def keywords(text: str, top=5):
@@ -508,8 +698,9 @@ def keywords(text: str, top=5):
508
  return " ".join(cleaned.split()[:top])
509
 
510
  # ──────────────────────────────── Streamlit UI ────────────────────────────
511
- def perplexity_app():
512
- st.title("Perplexity Clone AI Assistant")
 
513
 
514
  if "ai_model" not in st.session_state:
515
  st.session_state.ai_model = "gpt-4.1-mini"
@@ -521,48 +712,61 @@ def perplexity_app():
521
  st.session_state.generate_image = False
522
  if "web_search_enabled" not in st.session_state:
523
  st.session_state.web_search_enabled = True
524
- if "search_mode" not in st.session_state:
525
- st.session_state.search_mode = "comprehensive"
526
  if "response_style" not in st.session_state:
527
  st.session_state.response_style = "professional"
528
 
529
  sb = st.sidebar
530
- sb.title("Search Settings")
531
 
532
- sb.subheader("Response Configuration")
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  sb.selectbox(
534
- "Search Mode",
535
- options=list(SEARCH_MODES.keys()),
536
- format_func=lambda x: SEARCH_MODES[x],
537
- key="search_mode"
538
  )
539
 
540
  sb.selectbox(
541
- "Response Style",
542
  options=list(RESPONSE_STYLES.keys()),
543
  format_func=lambda x: RESPONSE_STYLES[x],
544
  key="response_style"
545
  )
546
 
547
  # Example queries
548
- sb.subheader("Example Queries")
549
  c1, c2, c3 = sb.columns(3)
550
- if c1.button("Quantum Computing", key="ex1"):
551
  process_example(EXAMPLE_QUERIES["example1"])
552
- if c2.button("Climate Change", key="ex2"):
553
  process_example(EXAMPLE_QUERIES["example2"])
554
- if c3.button("AI Economics", key="ex3"):
555
  process_example(EXAMPLE_QUERIES["example3"])
556
 
557
- sb.subheader("Other Settings")
558
- sb.toggle("Auto Save", key="auto_save")
559
- sb.toggle("Auto Image Generation", key="generate_image")
560
 
561
- web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
562
  st.session_state.web_search_enabled = web_search_enabled
563
 
564
  if web_search_enabled:
565
- st.sidebar.info("βœ… Web search results will be integrated into the response.")
566
 
567
  # Download the latest response
568
  latest_response = next(
@@ -578,35 +782,35 @@ def perplexity_app():
578
  first_line = latest_response.split('\n', 1)[0].strip()
579
  title = first_line[:40] + "..." if len(first_line) > 40 else first_line
580
 
581
- sb.subheader("Download Latest Response")
582
  d1, d2 = sb.columns(2)
583
- d1.download_button("Download as Markdown", latest_response,
584
  file_name=f"{title}.md", mime="text/markdown")
585
- d2.download_button("Download as HTML", md_to_html(latest_response, title),
586
  file_name=f"{title}.html", mime="text/html")
587
 
588
  # JSON conversation record upload
589
- up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
590
  if up:
591
  try:
592
  st.session_state.messages = json.load(up)
593
- sb.success("Conversation history loaded successfully")
594
  except Exception as e:
595
- sb.error(f"Failed to load: {e}")
596
 
597
  # JSON conversation record download
598
- if sb.button("Download Conversation as JSON"):
599
  sb.download_button(
600
- "Save",
601
  data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
602
  file_name="conversation_history.json",
603
  mime="application/json"
604
  )
605
 
606
  # File Upload
607
- st.subheader("Upload Files")
608
  uploaded_files = st.file_uploader(
609
- "Upload files to be used as reference (txt, csv, pdf)",
610
  type=["txt", "csv", "pdf"],
611
  accept_multiple_files=True,
612
  key="file_uploader"
@@ -614,18 +818,18 @@ def perplexity_app():
614
 
615
  if uploaded_files:
616
  file_count = len(uploaded_files)
617
- st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
618
 
619
- with st.expander("Preview Uploaded Files", expanded=False):
620
  for idx, file in enumerate(uploaded_files):
621
- st.write(f"**File Name:** {file.name}")
622
  ext = file.name.split('.')[-1].lower()
623
 
624
  if ext == 'txt':
625
  preview = file.read(1000).decode('utf-8', errors='ignore')
626
  file.seek(0)
627
  st.text_area(
628
- f"Preview of {file.name}",
629
  preview + ("..." if len(preview) >= 1000 else ""),
630
  height=150
631
  )
@@ -633,10 +837,10 @@ def perplexity_app():
633
  try:
634
  df = pd.read_csv(file)
635
  file.seek(0)
636
- st.write("CSV Preview (up to 5 rows)")
637
  st.dataframe(df.head(5))
638
  except Exception as e:
639
- st.error(f"CSV preview failed: {e}")
640
  elif ext == 'pdf':
641
  try:
642
  file_bytes = file.read()
@@ -646,17 +850,17 @@ def perplexity_app():
646
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
647
 
648
  pc = len(reader.pages)
649
- st.write(f"PDF File: {pc} pages")
650
 
651
  if pc > 0:
652
  try:
653
  page_text = reader.pages[0].extract_text()
654
- preview = page_text[:500] if page_text else "(No text extracted)"
655
- st.text_area("Preview of the first page", preview + "...", height=150)
656
  except:
657
- st.warning("Failed to extract text from the first page")
658
  except Exception as e:
659
- st.error(f"PDF preview failed: {e}")
660
 
661
  if idx < file_count - 1:
662
  st.divider()
@@ -668,9 +872,9 @@ def perplexity_app():
668
 
669
  # Videos
670
  if "videos" in m and m["videos"]:
671
- st.subheader("Related Videos")
672
  for video in m["videos"]:
673
- video_title = video.get('title', 'Related video')
674
  video_url = video.get('url', '')
675
  thumbnail = video.get('thumbnail', '')
676
 
@@ -680,13 +884,13 @@ def perplexity_app():
680
  st.write("🎬")
681
  with col2:
682
  st.markdown(f"**[{video_title}]({video_url})**")
683
- st.write(f"Source: {video.get('source', 'Unknown')}")
684
  else:
685
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
686
- st.write(f"Source: {video.get('source', 'Unknown')}")
687
 
688
  # User input
689
- query = st.chat_input("Enter your query or question here.")
690
  if query:
691
  process_input(query, uploaded_files)
692
 
@@ -712,8 +916,8 @@ def process_input(query: str, uploaded_files):
712
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
713
 
714
  try:
715
- status = st.status("Preparing to answer your query...")
716
- status.update(label="Initializing client...")
717
 
718
  client = get_openai_client()
719
 
@@ -721,22 +925,27 @@ def process_input(query: str, uploaded_files):
721
  video_results = []
722
  news_results = []
723
 
 
 
 
 
 
724
  if use_web_search:
725
- status.update(label="Performing web search...")
726
- with st.spinner("Searching the web..."):
727
  search_content = do_web_search(keywords(query, top=5))
728
 
729
  try:
730
- status.update(label="Finding videos...")
731
  video_results = brave_video_search(query, 2)
732
  news_results = brave_news_search(query, 3)
733
  except Exception as search_err:
734
- logging.error(f"Media search error: {search_err}")
735
 
736
  file_content = None
737
  if has_uploaded_files:
738
- status.update(label="Processing uploaded files...")
739
- with st.spinner("Analyzing files..."):
740
  file_content = process_uploaded_files(uploaded_files)
741
 
742
  valid_videos = []
@@ -745,14 +954,14 @@ def process_input(query: str, uploaded_files):
745
  if url and url.startswith('http'):
746
  valid_videos.append({
747
  'url': url,
748
- 'title': vid.get('title', 'Video'),
749
  'thumbnail': vid.get('thumbnail_url', ''),
750
- 'source': vid.get('source', 'Video source')
751
  })
752
 
753
- status.update(label="Preparing comprehensive answer...")
754
  sys_prompt = get_system_prompt(
755
- mode=st.session_state.search_mode,
756
  style=st.session_state.response_style,
757
  include_search_results=use_web_search,
758
  include_uploaded_files=has_uploaded_files
@@ -763,13 +972,16 @@ def process_input(query: str, uploaded_files):
763
  ]
764
 
765
  user_content = query
 
 
 
766
  if search_content:
767
  user_content += "\n\n" + search_content
768
  if file_content:
769
  user_content += "\n\n" + file_content
770
 
771
  if valid_videos:
772
- user_content += "\n\n# Available Videos\n"
773
  for i, vid in enumerate(valid_videos):
774
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
775
 
@@ -794,15 +1006,15 @@ def process_input(query: str, uploaded_files):
794
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
795
 
796
  if valid_videos:
797
- st.subheader("Related Videos")
798
  for video in valid_videos:
799
- video_title = video.get('title', 'Related video')
800
  video_url = video.get('url', '')
801
 
802
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
803
- st.write(f"Source: {video.get('source', 'Unknown')}")
804
 
805
- status.update(label="Response completed!", state="complete")
806
 
807
  st.session_state.messages.append({
808
  "role": "assistant",
@@ -812,27 +1024,27 @@ def process_input(query: str, uploaded_files):
812
 
813
  except Exception as api_error:
814
  error_message = str(api_error)
815
- logging.error(f"API error: {error_message}")
816
- status.update(label=f"Error: {error_message}", state="error")
817
- raise Exception(f"Response generation error: {error_message}")
818
 
819
  if st.session_state.generate_image and full_response:
820
- with st.spinner("Generating custom image..."):
821
  try:
822
  ip = extract_image_prompt(full_response, query)
823
  img, cap = generate_image(ip)
824
  if img:
825
- st.subheader("AI-Generated Image")
826
  st.image(img, caption=cap, use_container_width=True)
827
  except Exception as img_error:
828
- logging.error(f"Image generation error: {str(img_error)}")
829
- st.warning("Custom image generation failed.")
830
 
831
  if full_response:
832
- st.subheader("Download This Response")
833
  c1, c2 = st.columns(2)
834
  c1.download_button(
835
- "Markdown",
836
  data=full_response,
837
  file_name=f"{query[:30]}.md",
838
  mime="text/markdown"
@@ -850,19 +1062,19 @@ def process_input(query: str, uploaded_files):
850
  with open(fn, "w", encoding="utf-8") as fp:
851
  json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
852
  except Exception as e:
853
- logging.error(f"Auto-save failed: {e}")
854
 
855
  except Exception as e:
856
  error_message = str(e)
857
- placeholder.error(f"An error occurred: {error_message}")
858
- logging.error(f"Process input error: {error_message}")
859
- ans = f"An error occurred while processing your request: {error_message}"
860
  st.session_state.messages.append({"role": "assistant", "content": ans})
861
 
862
  # ──────────────────────────────── main ────────────────────────────────────
863
  def main():
864
- st.write("==== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
865
- perplexity_app()
866
 
867
  if __name__ == "__main__":
868
  main()
 
6
  from PIL import Image
7
 
8
  import streamlit as st
9
+ from openai import OpenAI
10
 
11
  from gradio_client import Client
12
  import pandas as pd
13
  import PyPDF2 # For handling PDF files
14
+ import kagglehub
15
 
16
  # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
17
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 
21
  BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
22
  IMAGE_API_URL = "http://211.233.58.201:7896"
23
  MAX_TOKENS = 7999
24
+ KAGGLE_API_KEY = os.getenv("KDATA_API", "")
25
+
26
+ # Set Kaggle API key
27
+ os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY
28
+
29
+ # Analysis modes and style definitions
30
+ ANALYSIS_MODES = {
31
+ "price_forecast": "농산물 가격 예츑과 μ‹œμž₯ 뢄석",
32
+ "market_trend": "μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석",
33
+ "production_analysis": "μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망",
34
+ "agricultural_policy": "농업 μ •μ±… 및 규제 영ν–₯ 뢄석",
35
+ "climate_impact": "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석"
36
  }
37
 
38
  RESPONSE_STYLES = {
39
+ "professional": "전문적이고 ν•™μˆ μ μΈ 뢄석",
40
+ "simple": "μ‰½κ²Œ 이해할 수 μžˆλŠ” κ°„κ²°ν•œ μ„€λͺ…",
41
+ "detailed": "μƒμ„Έν•œ 톡계 기반 깊이 μžˆλŠ” 뢄석",
42
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μ‘°μ–Έκ³Ό μΆ”μ²œ 쀑심"
43
  }
44
 
45
  # Example search queries
46
  EXAMPLE_QUERIES = {
47
+ "example1": "μŒ€ 가격 μΆ”μ„Έ 및 ν–₯ν›„ 6κ°œμ›” 전망을 λΆ„μ„ν•΄μ£Όμ„Έμš”",
48
+ "example2": "κΈ°ν›„ λ³€ν™”λŠ” ν•œκ΅­ μ±„μ†Œ 생산에 μ–΄λ–€ 영ν–₯을 λ―ΈμΉ˜λ‚˜μš”?",
49
+ "example3": "세계 곑물 μ‹œμž₯ 동ν–₯κ³Ό κ΅­λ‚΄ μ‹λŸ‰ μ•ˆλ³΄μ— λ―ΈμΉ˜λŠ” 영ν–₯은?"
50
  }
51
 
52
  # ──────────────────────────────── Logging ────────────────────────────────
 
65
  timeout=60.0,
66
  max_retries=3
67
  )
68
+
69
+ # ────────────────────────────── Kaggle Dataset Access ──────────────────────
70
+ @st.cache_resource
71
+ def load_agriculture_dataset():
72
+ """Download and load the UN agriculture dataset from Kaggle"""
73
+ try:
74
+ path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics")
75
+ logging.info(f"Kaggle dataset downloaded to: {path}")
76
+
77
+ # Load metadata about available files
78
+ available_files = []
79
+ for root, dirs, files in os.walk(path):
80
+ for file in files:
81
+ if file.endswith('.csv'):
82
+ file_path = os.path.join(root, file)
83
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
84
+ available_files.append({
85
+ 'name': file,
86
+ 'path': file_path,
87
+ 'size_mb': round(file_size, 2)
88
+ })
89
+
90
+ return {
91
+ 'base_path': path,
92
+ 'files': available_files
93
+ }
94
+ except Exception as e:
95
+ logging.error(f"Error loading Kaggle dataset: {e}")
96
+ return None
97
+
98
+ def get_dataset_summary():
99
+ """Generate a summary of the available agriculture datasets"""
100
+ dataset_info = load_agriculture_dataset()
101
+ if not dataset_info:
102
+ return "Failed to load the UN global food and agriculture statistics dataset."
103
+
104
+ summary = "# UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋\n\n"
105
+ summary += f"총 {len(dataset_info['files'])}개의 CSV 파일이 ν¬ν•¨λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.\n\n"
106
+
107
+ # List files with sizes
108
+ summary += "## μ‚¬μš© κ°€λŠ₯ν•œ 데이터 파일:\n\n"
109
+ for i, file_info in enumerate(dataset_info['files'][:10], 1): # Limit to first 10 files
110
+ summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n"
111
+
112
+ if len(dataset_info['files']) > 10:
113
+ summary += f"\n...μ™Έ {len(dataset_info['files']) - 10}개 파일\n"
114
+
115
+ # Add example of data structure
116
+ try:
117
+ if dataset_info['files']:
118
+ sample_file = dataset_info['files'][0]['path']
119
+ df = pd.read_csv(sample_file, nrows=5)
120
+ summary += "\n## 데이터 μƒ˜ν”Œ ꡬ쑰:\n\n"
121
+ summary += df.head(5).to_markdown() + "\n\n"
122
+
123
+ summary += "## 데이터셋 λ³€μˆ˜ μ„€λͺ…:\n\n"
124
+ for col in df.columns:
125
+ summary += f"- **{col}**: [λ³€μˆ˜ μ„€λͺ… ν•„μš”]\n"
126
+ except Exception as e:
127
+ logging.error(f"Error generating dataset sample: {e}")
128
+ summary += "\n데이터 μƒ˜ν”Œμ„ μƒμ„±ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n"
129
+
130
+ return summary
131
+
132
+ def analyze_dataset_for_query(query):
133
+ """Find and analyze relevant data from the dataset based on the query"""
134
+ dataset_info = load_agriculture_dataset()
135
+ if not dataset_info:
136
+ return "데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€. Kaggle API 연결을 ν™•μΈν•΄μ£Όμ„Έμš”."
137
+
138
+ # Extract key terms from the query
139
+ query_lower = query.lower()
140
+
141
+ # Define keywords to look for in the dataset files
142
+ keywords = {
143
+ "μŒ€": ["rice", "grain"],
144
+ "λ°€": ["wheat", "grain"],
145
+ "μ˜₯수수": ["corn", "maize", "grain"],
146
+ "μ±„μ†Œ": ["vegetable", "produce"],
147
+ "과일": ["fruit", "produce"],
148
+ "가격": ["price", "cost", "value"],
149
+ "생산": ["production", "yield", "harvest"],
150
+ "수좜": ["export", "trade"],
151
+ "μˆ˜μž…": ["import", "trade"],
152
+ "μ†ŒλΉ„": ["consumption", "demand"]
153
+ }
154
+
155
+ # Find relevant files based on the query
156
+ relevant_files = []
157
+
158
+ # First check for Korean keywords in the query
159
+ found_keywords = []
160
+ for k_term, e_terms in keywords.items():
161
+ if k_term in query_lower:
162
+ found_keywords.extend([k_term] + e_terms)
163
+
164
+ # If no Korean keywords found, check for English terms in the filenames
165
+ if not found_keywords:
166
+ # Generic search through all files
167
+ relevant_files = dataset_info['files'][:5] # Take first 5 files as default
168
+ else:
169
+ # Search for files related to the found keywords
170
+ for file_info in dataset_info['files']:
171
+ file_name_lower = file_info['name'].lower()
172
+ for keyword in found_keywords:
173
+ if keyword.lower() in file_name_lower:
174
+ relevant_files.append(file_info)
175
+ break
176
+
177
+ # If still no relevant files, take the first 5 files
178
+ if not relevant_files:
179
+ relevant_files = dataset_info['files'][:5]
180
+
181
+ # Read and analyze the relevant files
182
+ analysis_result = "# 농업 데이터 뢄석 κ²°κ³Ό\n\n"
183
+ analysis_result += f"쿼리: '{query}'에 λŒ€ν•œ 뢄석을 μˆ˜ν–‰ν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
184
+
185
+ if found_keywords:
186
+ analysis_result += f"## 뢄석 ν‚€μ›Œλ“œ: {', '.join(set(found_keywords))}\n\n"
187
+
188
+ # Process each relevant file
189
+ for file_info in relevant_files[:3]: # Limit to 3 files for performance
190
+ try:
191
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
192
+
193
+ # Read the CSV file
194
+ df = pd.read_csv(file_info['path'])
195
+
196
+ # Basic file stats
197
+ analysis_result += f"- ν–‰ 수: {len(df)}\n"
198
+ analysis_result += f"- μ—΄ 수: {len(df.columns)}\n"
199
+ analysis_result += f"- μ—΄ λͺ©λ‘: {', '.join(df.columns.tolist())}\n\n"
200
+
201
+ # Sample data
202
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
203
+ analysis_result += df.head(5).to_markdown() + "\n\n"
204
+
205
+ # Statistical summary of numeric columns
206
+ numeric_cols = df.select_dtypes(include=['number']).columns
207
+ if len(numeric_cols) > 0:
208
+ analysis_result += "### κΈ°λ³Έ 톡계:\n\n"
209
+ stats_df = df[numeric_cols].describe()
210
+ analysis_result += stats_df.to_markdown() + "\n\n"
211
+
212
+ # Time series analysis if possible
213
+ time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()]
214
+ if time_cols:
215
+ analysis_result += "### μ‹œκ³„μ—΄ νŒ¨ν„΄:\n\n"
216
+ analysis_result += "데이터셋에 μ‹œκ°„ κ΄€λ ¨ 열이 μžˆμ–΄ μ‹œκ³„μ—΄ 뢄석이 κ°€λŠ₯ν•©λ‹ˆλ‹€.\n\n"
217
+
218
+ except Exception as e:
219
+ logging.error(f"Error analyzing file {file_info['name']}: {e}")
220
+ analysis_result += f"이 파일 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n\n"
221
+
222
+ analysis_result += "## 농산물 가격 예츑 및 μˆ˜μš” 뢄석에 λŒ€ν•œ μΈμ‚¬μ΄νŠΈ\n\n"
223
+ analysis_result += "λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ‹€μŒ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
224
+ analysis_result += "1. 데이터 기반 뢄석 (기본적인 μš”μ•½)\n"
225
+ analysis_result += "2. μ£Όμš” 가격 및 μˆ˜μš” 동ν–₯\n"
226
+ analysis_result += "3. μƒμ‚°λŸ‰ 및 무역 νŒ¨ν„΄\n\n"
227
+
228
+ analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
229
+
230
+ return analysis_result
231
 
232
  # ──────────────────────────────── System Prompt ─────────────────────────
233
+ def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
234
  """
235
+ Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on:
236
+ - The selected analysis mode and style
237
+ - Guidelines for using agricultural datasets, web search results and uploaded files
238
  """
239
+ base_prompt = """
240
+ 당신은 농업 데이터 μ „λ¬Έκ°€λ‘œμ„œ 농산물 가격 예츑과 μˆ˜μš” 뢄석을 μˆ˜ν–‰ν•˜λŠ” AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€.
241
+
242
+ μ£Όμš” μž„λ¬΄:
243
+ 1. UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 농산물 μ‹œμž₯ 뢄석
244
+ 2. 농산물 가격 μΆ”μ„Έ 예츑 및 μˆ˜μš” νŒ¨ν„΄ 뢄석
245
+ 3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
246
+ 4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
247
+ 5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
248
+
249
+ μ€‘μš” κ°€μ΄λ“œλΌμΈ:
250
+ - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
251
+ - 뢄석 κ³Όμ •κ³Ό 방법둠을 λͺ…ν™•νžˆ μ„€λͺ…ν•˜μ„Έμš”
252
+ - 톡계적 μ‹ λ’°μ„±κ³Ό ν•œκ³„μ μ„ 투λͺ…ν•˜κ²Œ μ œμ‹œν•˜μ„Έμš”
253
+ - μ΄ν•΄ν•˜κΈ° μ‰¬μš΄ μ‹œκ°μ  μš”μ†Œλ‘œ 뢄석 κ²°κ³Όλ₯Ό λ³΄μ™„ν•˜μ„Έμš”
254
+ - λ§ˆν¬λ‹€μš΄μ„ ν™œμš©ν•΄ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
 
255
  """
256
 
257
  mode_prompts = {
258
+ "price_forecast": """
259
+ 농산물 가격 예츑 및 μ‹œμž₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
260
+ - κ³Όκ±° 가격 데이터 νŒ¨ν„΄μ— κΈ°λ°˜ν•œ 예츑 제곡
261
+ - 가격 변동성 μš”μΈ 뢄석(κ³„μ ˆμ„±, 날씨, μ •μ±… λ“±)
262
+ - 단기 및 쀑μž₯κΈ° 가격 전망 μ œμ‹œ
263
+ - 가격에 영ν–₯을 λ―ΈμΉ˜λŠ” κ΅­λ‚΄μ™Έ μš”μΈ 식별
264
+ - μ‹œμž₯ λΆˆν™•μ‹€μ„±κ³Ό 리슀크 μš”μ†Œ κ°•μ‘°
265
+ """,
266
+ "market_trend": """
267
+ μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
268
+ - μ£Όμš” 농산물 μˆ˜μš” λ³€ν™” νŒ¨ν„΄ 식별
269
+ - μ†ŒλΉ„μž μ„ ν˜Έλ„ 및 ꡬ맀 행동 뢄석
270
+ - μ‹œμž₯ μ„Έκ·Έλ¨ΌνŠΈ 및 ν‹ˆμƒˆμ‹œμž₯ 기회 탐색
271
+ - μ‹œμž₯ ν™•λŒ€/μΆ•μ†Œ νŠΈλ Œλ“œ 평가
272
+ - μˆ˜μš” 탄λ ₯μ„± 및 가격 민감도 뢄석
273
  """,
274
+ "production_analysis": """
275
+ μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
276
+ - μž‘λ¬Ό μƒμ‚°λŸ‰ μΆ”μ„Έ 및 변동 μš”μΈ 뢄석
277
+ - μ‹λŸ‰ 생산과 인ꡬ μ„±μž₯ κ°„μ˜ 관계 평가
278
+ - κ΅­κ°€/지역별 생산 μ—­λŸ‰ 비ꡐ
279
+ - μ‹λŸ‰ μ•ˆλ³΄ μœ„ν˜‘ μš”μ†Œ 및 취약점 식별
280
+ - 생산성 ν–₯상 μ „λž΅ 및 기회 μ œμ•ˆ
281
  """,
282
+ "agricultural_policy": """
283
+ 농업 μ •μ±… 및 규제 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
284
+ - μ •λΆ€ μ •μ±…κ³Ό, 보쑰금, 규제의 μ‹œμž₯ 영ν–₯ 뢄석
285
+ - ꡭ제 무역 μ •μ±…κ³Ό κ΄€μ„Έμ˜ 농산물 가격 영ν–₯ 평가
286
+ - 농업 지원 ν”„λ‘œκ·Έλž¨μ˜ νš¨κ³Όμ„± κ²€ν† 
287
+ - 규제 ν™˜κ²½ 변화에 λ”°λ₯Έ μ‹œμž₯ μ‘°μ • 예츑
288
+ - 정책적 κ°œμž…μ˜ μ˜λ„λœ/μ˜λ„μΉ˜ μ•Šμ€ κ²°κ³Ό 뢄석
289
  """,
290
+ "climate_impact": """
291
+ κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
292
+ - κΈ°ν›„ 변화와 농산물 μƒμ‚°λŸ‰/ν’ˆμ§ˆ κ°„μ˜ 상관관계 뢄석
293
+ - 기상 이변이 가격 변동성에 λ―ΈμΉ˜λŠ” 영ν–₯ 평가
294
+ - μž₯기적 κΈ°ν›„ 좔세에 λ”°λ₯Έ 농업 νŒ¨ν„΄ λ³€ν™” 예츑
295
+ - κΈ°ν›„ 회볡λ ₯ μžˆλŠ” 농업 μ‹œμŠ€ν…œ μ „λž΅ μ œμ•ˆ
296
+ - 지역별 κΈ°ν›„ μœ„ν—˜ λ…ΈμΆœλ„ 및 μ·¨μ•½μ„± λ§€ν•‘
297
  """
298
  }
299
 
300
  style_guides = {
301
+ "professional": "전문적이고 ν•™μˆ μ μΈ μ–΄μ‘°λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. 기술적 μš©μ–΄λ₯Ό 적절히 μ‚¬μš©ν•˜κ³  체계적인 데이터 뢄석을 μ œκ³΅ν•˜μ„Έμš”.",
302
+ "simple": "쉽고 κ°„κ²°ν•œ μ–Έμ–΄λ‘œ μ„€λͺ…ν•˜μ„Έμš”. μ „λ¬Έ μš©μ–΄λŠ” μ΅œμ†Œν™”ν•˜κ³  핡심 κ°œλ…μ„ 일상적인 ν‘œν˜„μœΌλ‘œ μ „λ‹¬ν•˜μ„Έμš”.",
303
+ "detailed": "μƒμ„Έν•˜κ³  포괄적인 뢄석을 μ œκ³΅ν•˜μ„Έμš”. λ‹€μ–‘ν•œ 데이터 포인트, 톡계적 λ‰˜μ•™μŠ€, 그리고 μ—¬λŸ¬ μ‹œλ‚˜λ¦¬μ˜€λ₯Ό κ³ λ €ν•œ 심측 뢄석을 μ œμ‹œν•˜μ„Έμš”.",
304
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μΈμ‚¬μ΄νŠΈμ™€ ꡬ체적인 ꢌμž₯사항에 μ΄ˆμ μ„ λ§žμΆ”μ„Έμš”. 'λ‹€μŒ 단계' 및 'μ‹€μ§ˆμ  μ‘°μ–Έ' μ„Ήμ…˜μ„ ν¬ν•¨ν•˜μ„Έμš”."
305
  }
306
 
307
+ dataset_guide = """
308
+ UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 ν™œμš© μ§€μΉ¨:
309
+ - 제곡된 데이터셋 뢄석 κ²°κ³Όλ₯Ό μ‘λ‹΅μ˜ μ£Όμš” 근거둜 μ‚¬μš©ν•˜μ„Έμš”
310
+ - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
311
+ - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
312
+ - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
313
+ - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
314
+ """
315
+
316
  search_guide = """
317
+ μ›Ή 검색 κ²°κ³Ό ν™œμš© μ§€μΉ¨:
318
+ - 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” μ΅œμ‹  μ‹œμž₯ μ •λ³΄λ‘œ 검색 κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ„Έμš”
319
+ - 각 μ •λ³΄μ˜ 좜처λ₯Ό λ§ˆν¬λ‹€μš΄ 링크둜 ν¬ν•¨ν•˜μ„Έμš”: [좜처λͺ…](URL)
320
+ - μ£Όμš” μ£Όμž₯μ΄λ‚˜ 데이터 ν¬μΈνŠΈλ§ˆλ‹€ 좜처λ₯Ό ν‘œμ‹œν•˜μ„Έμš”
321
+ - μΆœμ²˜κ°€ 상좩할 경우, λ‹€μ–‘ν•œ 관점과 신뒰도λ₯Ό μ„€λͺ…ν•˜μ„Έμš”
322
+ - κ΄€λ ¨ λ™μ˜μƒ λ§ν¬λŠ” [λΉ„λ””μ˜€: 제λͺ©](video_url) ν˜•μ‹μœΌλ‘œ ν¬ν•¨ν•˜μ„Έμš”
323
+ - 검색 정보λ₯Ό μΌκ΄€λ˜κ³  체계적인 μ‘λ‹΅μœΌλ‘œ ν†΅ν•©ν•˜μ„Έμš”
324
+ - λͺ¨λ“  μ£Όμš” 좜처λ₯Ό λ‚˜μ—΄ν•œ "μ°Έκ³  자료" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— ν¬ν•¨ν•˜μ„Έμš”
325
  """
326
 
327
  upload_guide = """
328
+ μ—…λ‘œλ“œλœ 파일 ν™œμš© μ§€μΉ¨:
329
+ - μ—…λ‘œλ“œλœ νŒŒμΌμ„ μ‘λ‹΅μ˜ μ£Όμš” μ •λ³΄μ›μœΌλ‘œ ν™œμš©ν•˜μ„Έμš”
330
+ - 쿼리와 직접 κ΄€λ ¨λœ 파일 정보λ₯Ό μΆ”μΆœν•˜κ³  κ°•μ‘°ν•˜μ„Έμš”
331
+ - κ΄€λ ¨ κ΅¬μ ˆμ„ μΈμš©ν•˜κ³  νŠΉμ • νŒŒμΌμ„ 좜처둜 οΏ½οΏ½οΏ½μš©ν•˜μ„Έμš”
332
+ - CSV 파일의 수치 λ°μ΄ν„°λŠ” μš”μ•½ λ¬Έμž₯으둜 λ³€ν™˜ν•˜μ„Έμš”
333
+ - PDF μ½˜ν…μΈ λŠ” νŠΉμ • μ„Ήμ…˜μ΄λ‚˜ νŽ˜μ΄μ§€λ₯Ό μ°Έμ‘°ν•˜μ„Έμš”
334
+ - 파일 정보λ₯Ό μ›Ή 검색 결과와 μ›ν™œν•˜κ²Œ ν†΅ν•©ν•˜μ„Έμš”
335
+ - 정보가 상좩할 경우, 일반적인 μ›Ή 결과보닀 파일 μ½˜ν…μΈ λ₯Ό μš°μ„ μ‹œν•˜μ„Έμš”
336
  """
337
 
338
  # Base prompt
339
+ final_prompt = base_prompt
340
+
341
+ # Add mode-specific guidance
342
+ if mode in mode_prompts:
343
+ final_prompt += "\n" + mode_prompts[mode]
344
 
345
  # Style
346
  if style in style_guides:
347
+ final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
348
+
349
+ # Always include dataset guide
350
+ final_prompt += f"\n\n{dataset_guide}"
351
 
352
  if include_search_results:
353
  final_prompt += f"\n\n{search_guide}"
 
356
  final_prompt += f"\n\n{upload_guide}"
357
 
358
  final_prompt += """
359
+ \n\n응닡 ν˜•μ‹ μš”κ΅¬μ‚¬ν•­:
360
+ - λ§ˆν¬λ‹€μš΄ 제λͺ©(## 및 ###)을 μ‚¬μš©ν•˜μ—¬ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
361
+ - μ€‘μš”ν•œ 점은 ꡡ은 ν…μŠ€νŠΈ(**ν…μŠ€νŠΈ**)둜 κ°•μ‘°ν•˜μ„Έμš”
362
+ - 3-5개의 후속 μ§ˆλ¬Έμ„ ν¬ν•¨ν•œ "κ΄€λ ¨ 질문" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— μΆ”κ°€ν•˜μ„Έμš”
363
+ - μ μ ˆν•œ 간격과 단락 κ΅¬λΆ„μœΌλ‘œ 응닡을 μ„œμ‹ν™”ν•˜μ„Έμš”
364
+ - λͺ¨λ“  λ§ν¬λŠ” λ§ˆν¬λ‹€μš΄ ν˜•μ‹μœΌλ‘œ 클릭 κ°€λŠ₯ν•˜κ²Œ λ§Œλ“œμ„Έμš”: [ν…μŠ€νŠΈ](url)
365
+ - κ°€λŠ₯ν•œ 경우 데이터λ₯Ό μ‹œκ°μ μœΌλ‘œ ν‘œν˜„(ν‘œ, κ·Έλž˜ν”„ λ“±μ˜ μ„€λͺ…)ν•˜μ„Έμš”
366
  """
367
  return final_prompt
368
 
369
  # ──────────────────────────────── Brave Search API ────────────────────────
370
  @st.cache_data(ttl=3600)
371
+ def brave_search(query: str, count: int = 10):
372
  if not BRAVE_KEY:
373
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
374
 
375
  headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
376
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업 데이터", "count": str(count)}
377
 
378
  for attempt in range(3):
379
  try:
 
411
  return []
412
 
413
  @st.cache_data(ttl=3600)
414
+ def brave_video_search(query: str, count: int = 3):
415
  if not BRAVE_KEY:
416
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
417
 
418
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
419
+ params = {"q": query + " 농산물 가격 농업 μ‹œμž₯", "count": str(count)}
420
 
421
  for attempt in range(3):
422
  try:
 
445
  return []
446
 
447
  @st.cache_data(ttl=3600)
448
+ def brave_news_search(query: str, count: int = 3):
449
  if not BRAVE_KEY:
450
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
451
 
452
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
453
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업", "count": str(count)}
454
 
455
  for attempt in range(3):
456
  try:
 
481
 
482
  def mock_results(query: str) -> str:
483
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
484
+ return (f"# λŒ€μ²΄ 검색 μ½˜ν…μΈ  (생성 μ‹œκ°„: {ts})\n\n"
485
+ f"'{query}'에 λŒ€ν•œ 검색 API μš”μ²­μ΄ μ‹€νŒ¨ν–ˆκ±°λ‚˜ κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€. "
486
+ f"κΈ°μ‘΄ 지식을 기반으둜 응닡을 μƒμ„±ν•΄οΏ½οΏ½μ„Έμš”.\n\n"
487
+ f"λ‹€μŒ 사항을 κ³ λ €ν•˜μ„Έμš”:\n\n"
488
+ f"- {query}에 κ΄€ν•œ κΈ°λ³Έ κ°œλ…κ³Ό μ€‘μš”μ„±\n"
489
+ f"- 일반적으둜 μ•Œλ €μ§„ κ΄€λ ¨ ν†΅κ³„λ‚˜ μΆ”μ„Έ\n"
490
+ f"- 이 μ£Όμ œμ— λŒ€ν•œ μ „λ¬Έκ°€ 의견\n"
491
+ f"- λ…μžκ°€ κ°€μ§ˆ 수 μžˆλŠ” 질문\n\n"
492
+ f"μ°Έκ³ : μ΄λŠ” μ‹€μ‹œκ°„ 데이터가 μ•„λ‹Œ λŒ€μ²΄ μ§€μΉ¨μž…λ‹ˆλ‹€.\n\n")
493
 
494
  def do_web_search(query: str) -> str:
495
  try:
496
+ arts = brave_search(query, 10)
497
  if not arts:
498
  logging.warning("No search results, using fallback content")
499
  return mock_results(query)
 
501
  videos = brave_video_search(query, 2)
502
  news = brave_news_search(query, 3)
503
 
504
+ result = "# μ›Ή 검색 κ²°κ³Ό\nλ‹€μŒ κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ—¬ 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” 포괄적인 닡변을 μ œκ³΅ν•˜μ„Έμš”.\n\n"
505
 
506
+ result += "## μ›Ή κ²°κ³Ό\n\n"
507
+ for a in arts[:5]:
508
+ result += f"### κ²°κ³Ό {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
509
+ result += f"**좜처**: [{a['displayed_link']}]({a['link']})\n\n---\n"
510
 
511
+ if news:
512
+ result += "## λ‰΄μŠ€ κ²°κ³Ό\n\n"
513
+ for n in news:
514
+ result += f"### {n['title']}\n\n{n['description']}\n\n"
515
+ result += f"**좜처**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
516
+
517
  if videos:
518
+ result += "## λΉ„λ””μ˜€ κ²°κ³Ό\n\n"
519
  for vid in videos:
520
  result += f"### {vid['title']}\n\n"
521
  if vid.get('thumbnail_url'):
522
+ result += f"![썸넀일]({vid['thumbnail_url']})\n\n"
523
+ result += f"**μ‹œμ²­**: [{vid['source']}]({vid['video_url']})\n\n"
 
 
 
 
 
 
524
 
525
  return result
526
 
 
538
  if len(text) > 10000:
539
  text = text[:9700] + "...(truncated)..."
540
 
541
+ result = f"## ν…μŠ€νŠΈ 파일: {file.name}\n\n" + text
542
  return result
543
  except Exception as e:
544
  logging.error(f"Error processing text file: {str(e)}")
545
+ return f"ν…μŠ€νŠΈ 파일 처리 였λ₯˜: {str(e)}"
546
 
547
  def process_csv_file(file):
548
  try:
 
550
  file.seek(0)
551
 
552
  df = pd.read_csv(io.BytesIO(content))
553
+ result = f"## CSV 파일: {file.name}\n\n"
554
+ result += f"- ν–‰: {len(df)}\n"
555
+ result += f"- μ—΄: {len(df.columns)}\n"
556
+ result += f"- μ—΄ 이름: {', '.join(df.columns.tolist())}\n\n"
557
 
558
+ result += "### 데이터 미리보기\n\n"
559
  preview_df = df.head(10)
560
  try:
561
  markdown_table = preview_df.to_markdown(index=False)
562
  if markdown_table:
563
  result += markdown_table + "\n\n"
564
  else:
565
+ result += "CSV 데이터λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
566
  except Exception as e:
567
  logging.error(f"Markdown table conversion error: {e}")
568
+ result += "ν…μŠ€νŠΈλ‘œ 데이터 ν‘œμ‹œ:\n\n" + str(preview_df) + "\n\n"
569
 
570
  num_cols = df.select_dtypes(include=['number']).columns
571
  if len(num_cols) > 0:
572
+ result += "### κΈ°λ³Έ 톡계 정보\n\n"
573
  try:
574
  stats_df = df[num_cols].describe().round(2)
575
  stats_markdown = stats_df.to_markdown()
576
  if stats_markdown:
577
  result += stats_markdown + "\n\n"
578
  else:
579
+ result += "톡계 정보λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
580
  except Exception as e:
581
  logging.error(f"Statistical info conversion error: {e}")
582
+ result += "톡계 정보λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
583
 
584
  return result
585
  except Exception as e:
586
  logging.error(f"CSV file processing error: {str(e)}")
587
+ return f"CSV 파일 처리 였λ₯˜: {str(e)}"
588
 
589
  def process_pdf_file(file):
590
  try:
 
594
  pdf_file = io.BytesIO(file_bytes)
595
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
596
 
597
+ result = f"## PDF 파일: {file.name}\n\n- 총 νŽ˜μ΄μ§€: {len(reader.pages)}\n\n"
598
 
599
  max_pages = min(5, len(reader.pages))
600
  all_text = ""
 
603
  try:
604
  page = reader.pages[i]
605
  page_text = page.extract_text()
606
+ current_page_text = f"### νŽ˜μ΄μ§€ {i+1}\n\n"
607
  if page_text and len(page_text.strip()) > 0:
608
  if len(page_text) > 1500:
609
+ current_page_text += page_text[:1500] + "...(좕약됨)...\n\n"
610
  else:
611
  current_page_text += page_text + "\n\n"
612
  else:
613
+ current_page_text += "(ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•  수 μ—†μŒ)\n\n"
614
 
615
  all_text += current_page_text
616
 
617
  if len(all_text) > 8000:
618
+ all_text += "...(λ‚˜λ¨Έμ§€ νŽ˜μ΄μ§€ 좕약됨)...\n\n"
619
  break
620
 
621
  except Exception as page_err:
622
  logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
623
+ all_text += f"### νŽ˜μ΄μ§€ {i+1}\n\n(λ‚΄μš© μΆ”μΆœ 였λ₯˜: {str(page_err)})\n\n"
624
 
625
  if len(reader.pages) > max_pages:
626
+ all_text += f"\nμ°Έκ³ : 처음 {max_pages} νŽ˜μ΄μ§€λ§Œ ν‘œμ‹œλ©λ‹ˆλ‹€.\n\n"
627
 
628
+ result += "### PDF λ‚΄μš©\n\n" + all_text
629
  return result
630
 
631
  except Exception as e:
632
  logging.error(f"PDF file processing error: {str(e)}")
633
+ return f"## PDF 파일: {file.name}\n\n였λ₯˜: {str(e)}\n\nμ²˜λ¦¬ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
634
 
635
  def process_uploaded_files(files):
636
  if not files:
637
  return None
638
 
639
+ result = "# μ—…λ‘œλ“œλœ 파일 λ‚΄μš©\n\nμ‚¬μš©μžκ°€ μ œκ³΅ν•œ 파일의 λ‚΄μš©μž…λ‹ˆλ‹€.\n\n"
640
  for file in files:
641
  try:
642
  ext = file.name.split('.')[-1].lower()
 
647
  elif ext == 'pdf':
648
  result += process_pdf_file(file) + "\n\n---\n\n"
649
  else:
650
+ result += f"### μ§€μ›λ˜μ§€ μ•ŠλŠ” 파일: {file.name}\n\n---\n\n"
651
  except Exception as e:
652
  logging.error(f"File processing error {file.name}: {e}")
653
+ result += f"### 파일 처리 였λ₯˜: {file.name}\n\n였λ₯˜: {e}\n\n---\n\n"
654
 
655
  return result
656
 
 
678
  response = client.chat.completions.create(
679
  model="gpt-4.1-mini",
680
  messages=[
681
+ {"role": "system", "content": "농업 및 농산물에 κ΄€ν•œ 이미지 ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ν•œ μ€„μ˜ μ˜μ–΄λ‘œ 된 ν”„λ‘¬ν”„νŠΈλ§Œ λ°˜ν™˜ν•˜μ„Έμš”, λ‹€λ₯Έ ν…μŠ€νŠΈλŠ” ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”."},
682
+ {"role": "user", "content": f"주제: {topic}\n\n---\n{response_text}\n\n---"}
683
  ],
684
  temperature=1,
685
  max_tokens=80,
 
688
  return response.choices[0].message.content.strip()
689
  except Exception as e:
690
  logging.error(f"OpenAI image prompt generation error: {e}")
691
+ return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality"
692
 
693
+ def md_to_html(md: str, title="농산물 μˆ˜μš” 예츑 뢄석 κ²°κ³Ό"):
694
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
695
 
696
  def keywords(text: str, top=5):
 
698
  return " ".join(cleaned.split()[:top])
699
 
700
  # ──────────────────────────────── Streamlit UI ────────────────────────────
701
+ def agricultural_price_forecast_app():
702
+ st.title("농산물 μˆ˜μš” 및 가격 예츑 AI μ–΄μ‹œμŠ€ν„΄νŠΈ")
703
+ st.markdown("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 뢄석 기반의 농산물 μ‹œμž₯ 예츑")
704
 
705
  if "ai_model" not in st.session_state:
706
  st.session_state.ai_model = "gpt-4.1-mini"
 
712
  st.session_state.generate_image = False
713
  if "web_search_enabled" not in st.session_state:
714
  st.session_state.web_search_enabled = True
715
+ if "analysis_mode" not in st.session_state:
716
+ st.session_state.analysis_mode = "price_forecast"
717
  if "response_style" not in st.session_state:
718
  st.session_state.response_style = "professional"
719
 
720
  sb = st.sidebar
721
+ sb.title("뢄석 μ„€μ •")
722
 
723
+ # Kaggle dataset info display
724
+ if sb.checkbox("데이터셋 정보 ν‘œμ‹œ", value=False):
725
+ st.info("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 λΆˆλŸ¬μ˜€λŠ” 쀑...")
726
+ dataset_info = load_agriculture_dataset()
727
+ if dataset_info:
728
+ st.success(f"데이터셋 λ‘œλ“œ μ™„λ£Œ: {len(dataset_info['files'])}개 파일")
729
+
730
+ with st.expander("데이터셋 미리보기", expanded=False):
731
+ for file_info in dataset_info['files'][:5]:
732
+ st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)")
733
+ else:
734
+ st.error("데이터셋을 λΆˆλŸ¬μ˜€λŠ”λ° μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. Kaggle API 섀정을 ν™•μΈν•˜μ„Έμš”.")
735
+
736
+ sb.subheader("뢄석 ꡬ성")
737
  sb.selectbox(
738
+ "뢄석 λͺ¨λ“œ",
739
+ options=list(ANALYSIS_MODES.keys()),
740
+ format_func=lambda x: ANALYSIS_MODES[x],
741
+ key="analysis_mode"
742
  )
743
 
744
  sb.selectbox(
745
+ "응닡 μŠ€νƒ€μΌ",
746
  options=list(RESPONSE_STYLES.keys()),
747
  format_func=lambda x: RESPONSE_STYLES[x],
748
  key="response_style"
749
  )
750
 
751
  # Example queries
752
+ sb.subheader("μ˜ˆμ‹œ 질문")
753
  c1, c2, c3 = sb.columns(3)
754
+ if c1.button("μŒ€ 가격 전망", key="ex1"):
755
  process_example(EXAMPLE_QUERIES["example1"])
756
+ if c2.button("κΈ°ν›„ 영ν–₯", key="ex2"):
757
  process_example(EXAMPLE_QUERIES["example2"])
758
+ if c3.button("곑물 μ‹œμž₯", key="ex3"):
759
  process_example(EXAMPLE_QUERIES["example3"])
760
 
761
+ sb.subheader("기타 μ„€μ •")
762
+ sb.toggle("μžλ™ μ €μž₯", key="auto_save")
763
+ sb.toggle("이미지 μžλ™ 생성", key="generate_image")
764
 
765
+ web_search_enabled = sb.toggle("μ›Ή 검색 μ‚¬μš©", value=st.session_state.web_search_enabled)
766
  st.session_state.web_search_enabled = web_search_enabled
767
 
768
  if web_search_enabled:
769
+ st.sidebar.info("βœ… μ›Ή 검색 κ²°κ³Όκ°€ 응닡에 ν†΅ν•©λ©λ‹ˆλ‹€.")
770
 
771
  # Download the latest response
772
  latest_response = next(
 
782
  first_line = latest_response.split('\n', 1)[0].strip()
783
  title = first_line[:40] + "..." if len(first_line) > 40 else first_line
784
 
785
+ sb.subheader("μ΅œμ‹  응닡 λ‹€μš΄λ‘œλ“œ")
786
  d1, d2 = sb.columns(2)
787
+ d1.download_button("λ§ˆν¬λ‹€μš΄μœΌλ‘œ λ‹€μš΄λ‘œλ“œ", latest_response,
788
  file_name=f"{title}.md", mime="text/markdown")
789
+ d2.download_button("HTML둜 λ‹€μš΄λ‘œλ“œ", md_to_html(latest_response, title),
790
  file_name=f"{title}.html", mime="text/html")
791
 
792
  # JSON conversation record upload
793
+ up = sb.file_uploader("λŒ€ν™” 기둝 뢈러였기 (.json)", type=["json"], key="json_uploader")
794
  if up:
795
  try:
796
  st.session_state.messages = json.load(up)
797
+ sb.success("λŒ€ν™” 기둝을 μ„±κ³΅μ μœΌλ‘œ λΆˆλŸ¬μ™”μŠ΅λ‹ˆλ‹€")
798
  except Exception as e:
799
+ sb.error(f"뢈러였기 μ‹€νŒ¨: {e}")
800
 
801
  # JSON conversation record download
802
+ if sb.button("λŒ€ν™” 기둝을 JSON으둜 λ‹€μš΄λ‘œλ“œ"):
803
  sb.download_button(
804
+ "μ €μž₯",
805
  data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
806
  file_name="conversation_history.json",
807
  mime="application/json"
808
  )
809
 
810
  # File Upload
811
+ st.subheader("파일 μ—…λ‘œλ“œ")
812
  uploaded_files = st.file_uploader(
813
+ "μ°Έκ³  자료둜 μ‚¬μš©ν•  파일 μ—…λ‘œλ“œ (txt, csv, pdf)",
814
  type=["txt", "csv", "pdf"],
815
  accept_multiple_files=True,
816
  key="file_uploader"
 
818
 
819
  if uploaded_files:
820
  file_count = len(uploaded_files)
821
+ st.success(f"{file_count}개 파일이 μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μ§ˆμ˜μ— λŒ€ν•œ μ†ŒμŠ€λ‘œ μ‚¬μš©λ©λ‹ˆλ‹€.")
822
 
823
+ with st.expander("μ—…λ‘œλ“œλœ 파일 미리보기", expanded=False):
824
  for idx, file in enumerate(uploaded_files):
825
+ st.write(f"**파일λͺ…:** {file.name}")
826
  ext = file.name.split('.')[-1].lower()
827
 
828
  if ext == 'txt':
829
  preview = file.read(1000).decode('utf-8', errors='ignore')
830
  file.seek(0)
831
  st.text_area(
832
+ f"{file.name} 미리보기",
833
  preview + ("..." if len(preview) >= 1000 else ""),
834
  height=150
835
  )
 
837
  try:
838
  df = pd.read_csv(file)
839
  file.seek(0)
840
+ st.write("CSV 미리보기 (μ΅œλŒ€ 5ν–‰)")
841
  st.dataframe(df.head(5))
842
  except Exception as e:
843
+ st.error(f"CSV 미리보기 μ‹€νŒ¨: {e}")
844
  elif ext == 'pdf':
845
  try:
846
  file_bytes = file.read()
 
850
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
851
 
852
  pc = len(reader.pages)
853
+ st.write(f"PDF 파일: {pc}νŽ˜μ΄μ§€")
854
 
855
  if pc > 0:
856
  try:
857
  page_text = reader.pages[0].extract_text()
858
+ preview = page_text[:500] if page_text else "(ν…μŠ€νŠΈ μΆ”μΆœ λΆˆκ°€)"
859
+ st.text_area("첫 νŽ˜μ΄μ§€ 미리보기", preview + "...", height=150)
860
  except:
861
+ st.warning("첫 νŽ˜μ΄μ§€ ν…μŠ€νŠΈ μΆ”μΆœ μ‹€νŒ¨")
862
  except Exception as e:
863
+ st.error(f"PDF 미리보기 μ‹€νŒ¨: {e}")
864
 
865
  if idx < file_count - 1:
866
  st.divider()
 
872
 
873
  # Videos
874
  if "videos" in m and m["videos"]:
875
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
876
  for video in m["videos"]:
877
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
878
  video_url = video.get('url', '')
879
  thumbnail = video.get('thumbnail', '')
880
 
 
884
  st.write("🎬")
885
  with col2:
886
  st.markdown(f"**[{video_title}]({video_url})**")
887
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
888
  else:
889
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
890
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
891
 
892
  # User input
893
+ query = st.chat_input("농산물 가격, μˆ˜μš” λ˜λŠ” μ‹œμž₯ 동ν–₯ κ΄€λ ¨ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.")
894
  if query:
895
  process_input(query, uploaded_files)
896
 
 
916
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
917
 
918
  try:
919
+ status = st.status("μ§ˆλ¬Έμ— λ‹΅λ³€ μ€€λΉ„ 쀑...")
920
+ status.update(label="ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 쀑...")
921
 
922
  client = get_openai_client()
923
 
 
925
  video_results = []
926
  news_results = []
927
 
928
+ # 농업 데이터셋 뢄석 κ²°κ³Ό κ°€μ Έμ˜€κΈ°
929
+ status.update(label="농업 데이터셋 뢄석 쀑...")
930
+ with st.spinner("데이터셋 뢄석 쀑..."):
931
+ dataset_analysis = analyze_dataset_for_query(query)
932
+
933
  if use_web_search:
934
+ status.update(label="μ›Ή 검색 μˆ˜ν–‰ 쀑...")
935
+ with st.spinner("μ›Ή 검색 쀑..."):
936
  search_content = do_web_search(keywords(query, top=5))
937
 
938
  try:
939
+ status.update(label="λΉ„λ””μ˜€ 검색 쀑...")
940
  video_results = brave_video_search(query, 2)
941
  news_results = brave_news_search(query, 3)
942
  except Exception as search_err:
943
+ logging.error(f"λ―Έλ””μ–΄ 검색 였λ₯˜: {search_err}")
944
 
945
  file_content = None
946
  if has_uploaded_files:
947
+ status.update(label="μ—…λ‘œλ“œλœ 파일 처리 쀑...")
948
+ with st.spinner("파일 뢄석 쀑..."):
949
  file_content = process_uploaded_files(uploaded_files)
950
 
951
  valid_videos = []
 
954
  if url and url.startswith('http'):
955
  valid_videos.append({
956
  'url': url,
957
+ 'title': vid.get('title', 'λΉ„λ””μ˜€'),
958
  'thumbnail': vid.get('thumbnail_url', ''),
959
+ 'source': vid.get('source', 'λΉ„λ””μ˜€ 좜처')
960
  })
961
 
962
+ status.update(label="μ’…ν•© 뢄석 μ€€λΉ„ 쀑...")
963
  sys_prompt = get_system_prompt(
964
+ mode=st.session_state.analysis_mode,
965
  style=st.session_state.response_style,
966
  include_search_results=use_web_search,
967
  include_uploaded_files=has_uploaded_files
 
972
  ]
973
 
974
  user_content = query
975
+ # 항상 데이터셋 뢄석 κ²°κ³Ό 포함
976
+ user_content += "\n\n" + dataset_analysis
977
+
978
  if search_content:
979
  user_content += "\n\n" + search_content
980
  if file_content:
981
  user_content += "\n\n" + file_content
982
 
983
  if valid_videos:
984
+ user_content += "\n\n# κ΄€λ ¨ λ™μ˜μƒ\n"
985
  for i, vid in enumerate(valid_videos):
986
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
987
 
 
1006
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
1007
 
1008
  if valid_videos:
1009
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1010
  for video in valid_videos:
1011
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1012
  video_url = video.get('url', '')
1013
 
1014
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
1015
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1016
 
1017
+ status.update(label="응닡 μ™„λ£Œ!", state="complete")
1018
 
1019
  st.session_state.messages.append({
1020
  "role": "assistant",
 
1024
 
1025
  except Exception as api_error:
1026
  error_message = str(api_error)
1027
+ logging.error(f"API 였λ₯˜: {error_message}")
1028
+ status.update(label=f"였λ₯˜: {error_message}", state="error")
1029
+ raise Exception(f"응닡 생성 였λ₯˜: {error_message}")
1030
 
1031
  if st.session_state.generate_image and full_response:
1032
+ with st.spinner("λ§žμΆ€ν˜• 이미지 생성 쀑..."):
1033
  try:
1034
  ip = extract_image_prompt(full_response, query)
1035
  img, cap = generate_image(ip)
1036
  if img:
1037
+ st.subheader("AI 생성 이미지")
1038
  st.image(img, caption=cap, use_container_width=True)
1039
  except Exception as img_error:
1040
+ logging.error(f"이미지 생성 였λ₯˜: {str(img_error)}")
1041
+ st.warning("λ§žμΆ€ν˜• 이미지 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
1042
 
1043
  if full_response:
1044
+ st.subheader("이 응닡 λ‹€μš΄λ‘œλ“œ")
1045
  c1, c2 = st.columns(2)
1046
  c1.download_button(
1047
+ "λ§ˆν¬λ‹€μš΄",
1048
  data=full_response,
1049
  file_name=f"{query[:30]}.md",
1050
  mime="text/markdown"
 
1062
  with open(fn, "w", encoding="utf-8") as fp:
1063
  json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1064
  except Exception as e:
1065
+ logging.error(f"μžλ™ μ €μž₯ μ‹€νŒ¨: {e}")
1066
 
1067
  except Exception as e:
1068
  error_message = str(e)
1069
+ placeholder.error(f"였λ₯˜ λ°œμƒ: {error_message}")
1070
+ logging.error(f"μž…λ ₯ 처리 였λ₯˜: {error_message}")
1071
+ ans = f"μš”μ²­ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_message}"
1072
  st.session_state.messages.append({"role": "assistant", "content": ans})
1073
 
1074
  # ──────────────────────────────── main ────────────────────────────────────
1075
  def main():
1076
+ st.write("==== μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹œμž‘ μ‹œκ°„:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1077
+ agricultural_price_forecast_app()
1078
 
1079
  if __name__ == "__main__":
1080
  main()