ginipick commited on
Commit
7304251
Β·
verified Β·
1 Parent(s): 6a89f2d

Update app-backup3.py

Browse files
Files changed (1) hide show
  1. app-backup3.py +449 -373
app-backup3.py CHANGED
@@ -6,43 +6,47 @@ from io import BytesIO
6
  from PIL import Image
7
 
8
  import streamlit as st
9
- from openai import OpenAI # OpenAI 라이브러리
10
 
11
  from gradio_client import Client
12
  import pandas as pd
13
  import PyPDF2 # For handling PDF files
 
14
 
15
  # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
16
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
17
  BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
18
  BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
19
- BRAVE_IMAGE_ENDPOINT = "https://api.search.brave.com/res/v1/images/search"
20
  BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
21
  BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
22
  IMAGE_API_URL = "http://211.233.58.201:7896"
23
  MAX_TOKENS = 7999
24
-
25
- # Brave Search modes and style definitions (in English)
26
- SEARCH_MODES = {
27
- "comprehensive": "Comprehensive answer with multiple sources",
28
- "academic": "Academic and research-focused results",
29
- "news": "Latest news and current events",
30
- "technical": "Technical and specialized information",
31
- "educational": "Educational and learning resources"
 
 
 
 
32
  }
33
 
34
  RESPONSE_STYLES = {
35
- "professional": "Professional and formal tone",
36
- "casual": "Friendly and conversational tone",
37
- "simple": "Simple and easy to understand",
38
- "detailed": "Detailed and thorough explanations"
39
  }
40
 
41
  # Example search queries
42
  EXAMPLE_QUERIES = {
43
- "example1": "What are the latest developments in quantum computing?",
44
- "example2": "How does climate change affect biodiversity in tropical rainforests?",
45
- "example3": "What are the economic implications of artificial intelligence in the job market?"
46
  }
47
 
48
  # ──────────────────────────────── Logging ────────────────────────────────
@@ -61,106 +65,289 @@ def get_openai_client():
61
  timeout=60.0,
62
  max_retries=3
63
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # ──────────────────────────────── System Prompt ─────────────────────────
66
- def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
67
  """
68
- Generate a system prompt for the 'Perplexity Clone' interface based on:
69
- - The selected search mode and style
70
- - Guidelines for using web search results and uploaded files
71
  """
72
- comprehensive_prompt = """
73
- You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
74
-
75
- Your task is to:
76
- 1. Thoroughly analyze the user's query
77
- 2. Provide a clear, well-structured answer integrating information from multiple sources
78
- 3. Include relevant images, videos, and links in your response
79
- 4. Format your answer with proper headings, bullet points, and sections
80
- 5. Cite sources inline and provide a references section at the end
81
-
82
- Important guidelines:
83
- - Organize information logically with clear section headings
84
- - Use bullet points and numbered lists for clarity
85
- - Include specific, factual information whenever possible
86
- - Provide balanced perspectives on controversial topics
87
- - Display relevant statistics, data, or quotes when appropriate
88
- - Format your response using markdown for readability
89
  """
90
 
91
  mode_prompts = {
92
- "academic": """
93
- Your focus is on providing academic and research-focused responses:
94
- - Prioritize peer-reviewed research and academic sources
95
- - Include citations in a formal academic format
96
- - Discuss methodologies and research limitations where relevant
97
- - Present different scholarly perspectives on the topic
98
- - Use precise, technical language appropriate for an academic audience
99
  """,
100
- "news": """
101
- Your focus is on providing the latest news and current events:
102
- - Prioritize recent news articles and current information
103
- - Include publication dates for all news sources
104
- - Present multiple perspectives from different news outlets
105
- - Distinguish between facts and opinions/editorial content
106
- - Update information with the most recent developments
107
  """,
108
- "technical": """
109
- Your focus is on providing technical and specialized information:
110
- - Use precise technical terminology appropriate to the field
111
- - Include code snippets, formulas, or technical diagrams where relevant
112
- - Break down complex concepts into step-by-step explanations
113
- - Reference technical documentation, standards, and best practices
114
- - Consider different technical approaches or methodologies
115
  """,
116
- "educational": """
117
- Your focus is on providing educational and learning resources:
118
- - Structure information in a learning-friendly progression
119
- - Include examples, analogies, and visual explanations
120
- - Highlight key concepts and definitions
121
- - Suggest further learning resources at different difficulty levels
122
- - Present information that's accessible to learners at various levels
 
 
 
 
 
 
 
 
123
  """
124
  }
125
 
126
  style_guides = {
127
- "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
128
- "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
129
- "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
130
- "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
131
  }
132
 
 
 
 
 
 
 
 
 
 
133
  search_guide = """
134
- Guidelines for Using Search Results:
135
- - Include source links directly in your response using markdown: [Source Name](URL)
136
- - For each major claim or piece of information, indicate its source
137
- - If sources conflict, explain the different perspectives and their reliability
138
- - Include relevant images by writing: ![Image description](image_url)
139
- - Include relevant video links when appropriate by writing: [Video: Title](video_url)
140
- - Format search information into a cohesive, well-structured response
141
- - Include a "References" section at the end listing all major sources with links
142
  """
143
 
144
  upload_guide = """
145
- Guidelines for Using Uploaded Files:
146
- - Treat the uploaded files as primary sources for your response
147
- - Extract and highlight key information from files that directly addresses the query
148
- - Quote relevant passages and cite the specific file
149
- - For numerical data in CSV files, consider creating summary statements
150
- - For PDF content, reference specific sections or pages
151
- - Integrate file information seamlessly with web search results
152
- - When information conflicts, prioritize file content over general web results
153
  """
154
 
155
  # Base prompt
156
- if mode == "comprehensive":
157
- final_prompt = comprehensive_prompt
158
- else:
159
- final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
 
160
 
161
  # Style
162
  if style in style_guides:
163
- final_prompt += f"\n\nTone and Style: {style_guides[style]}"
 
 
 
164
 
165
  if include_search_results:
166
  final_prompt += f"\n\n{search_guide}"
@@ -169,23 +356,24 @@ Guidelines for Using Uploaded Files:
169
  final_prompt += f"\n\n{upload_guide}"
170
 
171
  final_prompt += """
172
- \n\nAdditional Formatting Requirements:
173
- - Use markdown headings (## and ###) to organize your response
174
- - Use bold text (**text**) for emphasis on important points
175
- - Include a "Related Questions" section at the end with 3-5 follow-up questions
176
- - Format your response with proper spacing and paragraph breaks
177
- - Make all links clickable by using proper markdown format: [text](url)
 
178
  """
179
  return final_prompt
180
 
181
  # ──────────────────────────────── Brave Search API ────────────────────────
182
  @st.cache_data(ttl=3600)
183
- def brave_search(query: str, count: int = 20):
184
  if not BRAVE_KEY:
185
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
186
 
187
  headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
188
- params = {"q": query, "count": str(count)}
189
 
190
  for attempt in range(3):
191
  try:
@@ -218,53 +406,17 @@ def brave_search(query: str, count: int = 20):
218
  except Exception as e:
219
  logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
220
  if attempt < 2:
221
- # μ—¬κΈ°μ„œ λŒ€κΈ° μ‹œκ°„ 늘림 (2초 β†’ 5초)
222
  time.sleep(5)
223
 
224
  return []
225
 
226
  @st.cache_data(ttl=3600)
227
- def brave_image_search(query: str, count: int = 10):
228
  if not BRAVE_KEY:
229
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
230
 
231
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
232
- params = {"q": query, "count": str(count),"search_lang": "en","country": "us","spellcheck": "1"}
233
-
234
- for attempt in range(3):
235
- try:
236
- r = requests.get(BRAVE_IMAGE_ENDPOINT, headers=headers, params=params, timeout=15)
237
- r.raise_for_status()
238
- data = r.json()
239
-
240
- results = []
241
- for i, img in enumerate(data.get("results", [])[:count], 1):
242
- results.append({
243
- "index": i,
244
- "title": img.get("title", "Image"),
245
- "image_url": img.get("image", {}).get("url", ""),
246
- "source_url": img.get("source", ""),
247
- "width": img.get("image", {}).get("width", 0),
248
- "height": img.get("image", {}).get("height", 0)
249
- })
250
-
251
- logging.info(f"Brave image search success: {len(results)} results")
252
- return results
253
-
254
- except Exception as e:
255
- logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
256
- if attempt < 2:
257
- time.sleep(5)
258
-
259
- return []
260
-
261
- @st.cache_data(ttl=3600)
262
- def brave_video_search(query: str, count: int = 5):
263
- if not BRAVE_KEY:
264
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
265
-
266
- headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
267
- params = {"q": query, "count": str(count)}
268
 
269
  for attempt in range(3):
270
  try:
@@ -293,12 +445,12 @@ def brave_video_search(query: str, count: int = 5):
293
  return []
294
 
295
  @st.cache_data(ttl=3600)
296
- def brave_news_search(query: str, count: int = 5):
297
  if not BRAVE_KEY:
298
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
299
 
300
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
301
- params = {"q": query, "count": str(count)}
302
 
303
  for attempt in range(3):
304
  try:
@@ -329,54 +481,46 @@ def brave_news_search(query: str, count: int = 5):
329
 
330
  def mock_results(query: str) -> str:
331
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
332
- return (f"# Fallback Search Content (Generated: {ts})\n\n"
333
- f"The search API request failed or returned no results for '{query}'. "
334
- f"Please generate a response based on any pre-existing knowledge.\n\n"
335
- f"Consider these points:\n\n"
336
- f"- Basic concepts and importance of {query}\n"
337
- f"- Commonly known related statistics or trends\n"
338
- f"- Typical expert opinions on this subject\n"
339
- f"- Questions that readers might have\n\n"
340
- f"Note: This is fallback guidance, not real-time data.\n\n")
341
 
342
  def do_web_search(query: str) -> str:
343
  try:
344
- arts = brave_search(query, 20)
345
  if not arts:
346
  logging.warning("No search results, using fallback content")
347
  return mock_results(query)
348
 
349
- images = brave_image_search(query, 5)
350
  videos = brave_video_search(query, 2)
351
  news = brave_news_search(query, 3)
352
 
353
- result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources.\n\n"
354
-
355
- result += "## Web Results\n\n"
356
- for a in arts[:10]:
357
- result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
358
- result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
359
 
360
- if images:
361
- result += "## Image Results\n\n"
362
- for img in images:
363
- if img.get('image_url'):
364
- result += f"![{img['title']}]({img['image_url']})\n\n"
365
- result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
366
 
 
 
 
 
 
 
367
  if videos:
368
- result += "## Video Results\n\n"
369
  for vid in videos:
370
  result += f"### {vid['title']}\n\n"
371
  if vid.get('thumbnail_url'):
372
- result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
373
- result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
374
-
375
- if news:
376
- result += "## News Results\n\n"
377
- for n in news:
378
- result += f"### {n['title']}\n\n{n['description']}\n\n"
379
- result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
380
 
381
  return result
382
 
@@ -394,11 +538,11 @@ def process_text_file(file):
394
  if len(text) > 10000:
395
  text = text[:9700] + "...(truncated)..."
396
 
397
- result = f"## Text File: {file.name}\n\n" + text
398
  return result
399
  except Exception as e:
400
  logging.error(f"Error processing text file: {str(e)}")
401
- return f"Error processing text file: {str(e)}"
402
 
403
  def process_csv_file(file):
404
  try:
@@ -406,41 +550,41 @@ def process_csv_file(file):
406
  file.seek(0)
407
 
408
  df = pd.read_csv(io.BytesIO(content))
409
- result = f"## CSV File: {file.name}\n\n"
410
- result += f"- Rows: {len(df)}\n"
411
- result += f"- Columns: {len(df.columns)}\n"
412
- result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
413
 
414
- result += "### Data Preview\n\n"
415
  preview_df = df.head(10)
416
  try:
417
  markdown_table = preview_df.to_markdown(index=False)
418
  if markdown_table:
419
  result += markdown_table + "\n\n"
420
  else:
421
- result += "Unable to display CSV data.\n\n"
422
  except Exception as e:
423
  logging.error(f"Markdown table conversion error: {e}")
424
- result += "Displaying data as text:\n\n" + str(preview_df) + "\n\n"
425
 
426
  num_cols = df.select_dtypes(include=['number']).columns
427
  if len(num_cols) > 0:
428
- result += "### Basic Statistical Information\n\n"
429
  try:
430
  stats_df = df[num_cols].describe().round(2)
431
  stats_markdown = stats_df.to_markdown()
432
  if stats_markdown:
433
  result += stats_markdown + "\n\n"
434
  else:
435
- result += "Unable to display statistical information.\n\n"
436
  except Exception as e:
437
  logging.error(f"Statistical info conversion error: {e}")
438
- result += "Unable to generate statistical information.\n\n"
439
 
440
  return result
441
  except Exception as e:
442
  logging.error(f"CSV file processing error: {str(e)}")
443
- return f"Error processing CSV file: {str(e)}"
444
 
445
  def process_pdf_file(file):
446
  try:
@@ -450,7 +594,7 @@ def process_pdf_file(file):
450
  pdf_file = io.BytesIO(file_bytes)
451
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
452
 
453
- result = f"## PDF File: {file.name}\n\n- Total pages: {len(reader.pages)}\n\n"
454
 
455
  max_pages = min(5, len(reader.pages))
456
  all_text = ""
@@ -459,40 +603,40 @@ def process_pdf_file(file):
459
  try:
460
  page = reader.pages[i]
461
  page_text = page.extract_text()
462
- current_page_text = f"### Page {i+1}\n\n"
463
  if page_text and len(page_text.strip()) > 0:
464
  if len(page_text) > 1500:
465
- current_page_text += page_text[:1500] + "...(truncated)...\n\n"
466
  else:
467
  current_page_text += page_text + "\n\n"
468
  else:
469
- current_page_text += "(No text could be extracted)\n\n"
470
 
471
  all_text += current_page_text
472
 
473
  if len(all_text) > 8000:
474
- all_text += "...(truncating remaining pages)...\n\n"
475
  break
476
 
477
  except Exception as page_err:
478
  logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
479
- all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
480
 
481
  if len(reader.pages) > max_pages:
482
- all_text += f"\nNote: Only the first {max_pages} pages are shown.\n\n"
483
 
484
- result += "### PDF Content\n\n" + all_text
485
  return result
486
 
487
  except Exception as e:
488
  logging.error(f"PDF file processing error: {str(e)}")
489
- return f"## PDF File: {file.name}\n\nError: {str(e)}\n\nCannot process."
490
 
491
  def process_uploaded_files(files):
492
  if not files:
493
  return None
494
 
495
- result = "# Uploaded File Contents\n\nBelow is the content from the files provided by the user.\n\n"
496
  for file in files:
497
  try:
498
  ext = file.name.split('.')[-1].lower()
@@ -503,33 +647,15 @@ def process_uploaded_files(files):
503
  elif ext == 'pdf':
504
  result += process_pdf_file(file) + "\n\n---\n\n"
505
  else:
506
- result += f"### Unsupported File: {file.name}\n\n---\n\n"
507
  except Exception as e:
508
  logging.error(f"File processing error {file.name}: {e}")
509
- result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
510
 
511
  return result
512
 
513
  # ──────────────────────────────── Image & Utility ─────────────────────────
514
 
515
- def load_and_show_image(img_url: str, caption: str = "Image"):
516
- """
517
- 1) User-Agentλ₯Ό λ„£μ–΄ hotlink λ°©μ–΄ 우회
518
- 2) λ‹€μš΄λ‘œλ“œ ν›„ ν‘œμ‹œ
519
- """
520
- headers = {
521
- "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
522
- " AppleWebKit/537.36 (KHTML, like Gecko)"
523
- " Chrome/98.0.4758.102 Safari/537.36")
524
- }
525
- try:
526
- response = requests.get(img_url, headers=headers, timeout=10)
527
- response.raise_for_status()
528
- image = Image.open(BytesIO(response.content))
529
- st.image(image, caption=caption, use_container_width=True)
530
- except Exception as e:
531
- st.warning(f"이미지 λ‘œλ”© μ‹€νŒ¨: {e}")
532
-
533
  def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
534
  if not prompt:
535
  return None, "Insufficient prompt"
@@ -552,8 +678,8 @@ def extract_image_prompt(response_text: str, topic: str):
552
  response = client.chat.completions.create(
553
  model="gpt-4.1-mini",
554
  messages=[
555
- {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
556
- {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
557
  ],
558
  temperature=1,
559
  max_tokens=80,
@@ -562,9 +688,9 @@ def extract_image_prompt(response_text: str, topic: str):
562
  return response.choices[0].message.content.strip()
563
  except Exception as e:
564
  logging.error(f"OpenAI image prompt generation error: {e}")
565
- return f"A professional photo related to {topic}, high quality"
566
 
567
- def md_to_html(md: str, title="Perplexity Clone Response"):
568
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
569
 
570
  def keywords(text: str, top=5):
@@ -572,8 +698,9 @@ def keywords(text: str, top=5):
572
  return " ".join(cleaned.split()[:top])
573
 
574
  # ──────────────────────────────── Streamlit UI ────────────────────────────
575
- def perplexity_app():
576
- st.title("Perplexity Clone AI Assistant")
 
577
 
578
  if "ai_model" not in st.session_state:
579
  st.session_state.ai_model = "gpt-4.1-mini"
@@ -585,48 +712,61 @@ def perplexity_app():
585
  st.session_state.generate_image = False
586
  if "web_search_enabled" not in st.session_state:
587
  st.session_state.web_search_enabled = True
588
- if "search_mode" not in st.session_state:
589
- st.session_state.search_mode = "comprehensive"
590
  if "response_style" not in st.session_state:
591
  st.session_state.response_style = "professional"
592
 
593
  sb = st.sidebar
594
- sb.title("Search Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
596
- sb.subheader("Response Configuration")
597
  sb.selectbox(
598
- "Search Mode",
599
- options=list(SEARCH_MODES.keys()),
600
- format_func=lambda x: SEARCH_MODES[x],
601
- key="search_mode"
602
  )
603
 
604
  sb.selectbox(
605
- "Response Style",
606
  options=list(RESPONSE_STYLES.keys()),
607
  format_func=lambda x: RESPONSE_STYLES[x],
608
  key="response_style"
609
  )
610
 
611
  # Example queries
612
- sb.subheader("Example Queries")
613
  c1, c2, c3 = sb.columns(3)
614
- if c1.button("Quantum Computing", key="ex1"):
615
  process_example(EXAMPLE_QUERIES["example1"])
616
- if c2.button("Climate Change", key="ex2"):
617
  process_example(EXAMPLE_QUERIES["example2"])
618
- if c3.button("AI Economics", key="ex3"):
619
  process_example(EXAMPLE_QUERIES["example3"])
620
 
621
- sb.subheader("Other Settings")
622
- sb.toggle("Auto Save", key="auto_save")
623
- sb.toggle("Auto Image Generation", key="generate_image")
624
 
625
- web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
626
  st.session_state.web_search_enabled = web_search_enabled
627
 
628
  if web_search_enabled:
629
- st.sidebar.info("βœ… Web search results will be integrated into the response.")
630
 
631
  # Download the latest response
632
  latest_response = next(
@@ -642,35 +782,35 @@ def perplexity_app():
642
  first_line = latest_response.split('\n', 1)[0].strip()
643
  title = first_line[:40] + "..." if len(first_line) > 40 else first_line
644
 
645
- sb.subheader("Download Latest Response")
646
  d1, d2 = sb.columns(2)
647
- d1.download_button("Download as Markdown", latest_response,
648
  file_name=f"{title}.md", mime="text/markdown")
649
- d2.download_button("Download as HTML", md_to_html(latest_response, title),
650
  file_name=f"{title}.html", mime="text/html")
651
 
652
  # JSON conversation record upload
653
- up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
654
  if up:
655
  try:
656
  st.session_state.messages = json.load(up)
657
- sb.success("Conversation history loaded successfully")
658
  except Exception as e:
659
- sb.error(f"Failed to load: {e}")
660
 
661
  # JSON conversation record download
662
- if sb.button("Download Conversation as JSON"):
663
  sb.download_button(
664
- "Save",
665
  data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
666
  file_name="conversation_history.json",
667
  mime="application/json"
668
  )
669
 
670
  # File Upload
671
- st.subheader("Upload Files")
672
  uploaded_files = st.file_uploader(
673
- "Upload files to be used as reference (txt, csv, pdf)",
674
  type=["txt", "csv", "pdf"],
675
  accept_multiple_files=True,
676
  key="file_uploader"
@@ -678,18 +818,18 @@ def perplexity_app():
678
 
679
  if uploaded_files:
680
  file_count = len(uploaded_files)
681
- st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
682
 
683
- with st.expander("Preview Uploaded Files", expanded=False):
684
  for idx, file in enumerate(uploaded_files):
685
- st.write(f"**File Name:** {file.name}")
686
  ext = file.name.split('.')[-1].lower()
687
 
688
  if ext == 'txt':
689
  preview = file.read(1000).decode('utf-8', errors='ignore')
690
  file.seek(0)
691
  st.text_area(
692
- f"Preview of {file.name}",
693
  preview + ("..." if len(preview) >= 1000 else ""),
694
  height=150
695
  )
@@ -697,10 +837,10 @@ def perplexity_app():
697
  try:
698
  df = pd.read_csv(file)
699
  file.seek(0)
700
- st.write("CSV Preview (up to 5 rows)")
701
  st.dataframe(df.head(5))
702
  except Exception as e:
703
- st.error(f"CSV preview failed: {e}")
704
  elif ext == 'pdf':
705
  try:
706
  file_bytes = file.read()
@@ -710,17 +850,17 @@ def perplexity_app():
710
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
711
 
712
  pc = len(reader.pages)
713
- st.write(f"PDF File: {pc} pages")
714
 
715
  if pc > 0:
716
  try:
717
  page_text = reader.pages[0].extract_text()
718
- preview = page_text[:500] if page_text else "(No text extracted)"
719
- st.text_area("Preview of the first page", preview + "...", height=150)
720
  except:
721
- st.warning("Failed to extract text from the first page")
722
  except Exception as e:
723
- st.error(f"PDF preview failed: {e}")
724
 
725
  if idx < file_count - 1:
726
  st.divider()
@@ -730,52 +870,32 @@ def perplexity_app():
730
  with st.chat_message(m["role"]):
731
  st.markdown(m["content"], unsafe_allow_html=True)
732
 
733
- # Images
734
- if "images" in m and m["images"]:
735
- st.subheader("Related Images")
736
- cols = st.columns(min(3, len(m["images"])))
737
- for i, img_data in enumerate(m["images"]):
738
- col_idx = i % len(cols)
739
- with cols[col_idx]:
740
- try:
741
- img_url = img_data.get('url', '')
742
- caption = img_data.get('title', 'Related image')
743
- if img_url:
744
- load_and_show_image(img_url, caption=caption)
745
- if img_data.get('source'):
746
- st.markdown(f"[Source]({img_data['source']})")
747
- except Exception as img_err:
748
- st.warning(f"Could not display image: {img_err}")
749
-
750
  # Videos
751
  if "videos" in m and m["videos"]:
752
- st.subheader("Related Videos")
753
  for video in m["videos"]:
754
- video_title = video.get('title', 'Related video')
755
  video_url = video.get('url', '')
756
  thumbnail = video.get('thumbnail', '')
757
 
758
  if thumbnail:
759
  col1, col2 = st.columns([1, 3])
760
  with col1:
761
- try:
762
- load_and_show_image(thumbnail, caption="Video Thumbnail")
763
- except:
764
- st.write("🎬")
765
  with col2:
766
  st.markdown(f"**[{video_title}]({video_url})**")
767
- st.write(f"Source: {video.get('source', 'Unknown')}")
768
  else:
769
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
770
- st.write(f"Source: {video.get('source', 'Unknown')}")
771
 
772
  # User input
773
- query = st.chat_input("Enter your query or question here.")
774
  if query:
775
  process_input(query, uploaded_files)
776
 
777
  sb.markdown("---")
778
- sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
779
 
780
  def process_example(topic):
781
  process_input(topic, [])
@@ -796,59 +916,52 @@ def process_input(query: str, uploaded_files):
796
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
797
 
798
  try:
799
- status = st.status("Preparing to answer your query...")
800
- status.update(label="Initializing client...")
801
 
802
  client = get_openai_client()
803
 
804
  search_content = None
805
- image_results = []
806
  video_results = []
807
  news_results = []
808
 
 
 
 
 
 
809
  if use_web_search:
810
- status.update(label="Performing web search...")
811
- with st.spinner("Searching the web..."):
812
  search_content = do_web_search(keywords(query, top=5))
813
 
814
  try:
815
- status.update(label="Finding images and videos...")
816
- image_results = brave_image_search(query, 5)
817
  video_results = brave_video_search(query, 2)
818
  news_results = brave_news_search(query, 3)
819
  except Exception as search_err:
820
- logging.error(f"Media search error: {search_err}")
821
 
822
  file_content = None
823
  if has_uploaded_files:
824
- status.update(label="Processing uploaded files...")
825
- with st.spinner("Analyzing files..."):
826
  file_content = process_uploaded_files(uploaded_files)
827
 
828
- valid_images = []
829
- for img in image_results:
830
- url = img.get('image_url')
831
- if url and url.startswith('http'):
832
- valid_images.append({
833
- 'url': url,
834
- 'title': img.get('title', f"Related to: {query}"),
835
- 'source': img.get('source_url', '')
836
- })
837
-
838
  valid_videos = []
839
  for vid in video_results:
840
  url = vid.get('video_url')
841
  if url and url.startswith('http'):
842
  valid_videos.append({
843
  'url': url,
844
- 'title': vid.get('title', 'Video'),
845
  'thumbnail': vid.get('thumbnail_url', ''),
846
- 'source': vid.get('source', 'Video source')
847
  })
848
 
849
- status.update(label="Preparing comprehensive answer...")
850
  sys_prompt = get_system_prompt(
851
- mode=st.session_state.search_mode,
852
  style=st.session_state.response_style,
853
  include_search_results=use_web_search,
854
  include_uploaded_files=has_uploaded_files
@@ -859,20 +972,16 @@ def process_input(query: str, uploaded_files):
859
  ]
860
 
861
  user_content = query
 
 
 
862
  if search_content:
863
  user_content += "\n\n" + search_content
864
  if file_content:
865
  user_content += "\n\n" + file_content
866
 
867
- if valid_images:
868
- user_content += "\n\n# Available Images\n"
869
- for i, img in enumerate(valid_images):
870
- user_content += f"\n{i+1}. ![{img['title']}]({img['url']})\n"
871
- if img['source']:
872
- user_content += f" Source: {img['source']}\n"
873
-
874
  if valid_videos:
875
- user_content += "\n\n# Available Videos\n"
876
  for i, vid in enumerate(valid_videos):
877
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
878
 
@@ -896,79 +1005,46 @@ def process_input(query: str, uploaded_files):
896
 
897
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
898
 
899
- if valid_images:
900
- st.subheader("Related Images")
901
- image_cols = st.columns(min(3, len(valid_images)))
902
-
903
- for i, img_data in enumerate(valid_images):
904
- col_idx = i % len(image_cols)
905
- try:
906
- with image_cols[col_idx]:
907
- img_url = img_data['url']
908
- caption = img_data['title']
909
- load_and_show_image(img_url, caption=caption)
910
- if img_data.get('source'):
911
- st.markdown(f"[Source]({img_data['source']})")
912
- except Exception as img_err:
913
- logging.warning(f"Error displaying image: {img_err}")
914
-
915
  if valid_videos:
916
- st.subheader("Related Videos")
917
  for video in valid_videos:
918
- video_title = video.get('title', 'Related video')
919
  video_url = video.get('url', '')
920
- thumbnail = video.get('thumbnail', '')
921
 
922
- if thumbnail:
923
- try:
924
- col1, col2 = st.columns([1, 3])
925
- with col1:
926
- try:
927
- load_and_show_image(thumbnail, caption="Video Thumbnail")
928
- except:
929
- st.write("🎬")
930
- with col2:
931
- st.markdown(f"**[{video_title}]({video_url})**")
932
- st.write(f"Source: {video.get('source', 'Unknown')}")
933
- except Exception as vid_err:
934
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
935
- st.write(f"Source: {video.get('source', 'Unknown')}")
936
- else:
937
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
938
- st.write(f"Source: {video.get('source', 'Unknown')}")
939
-
940
- status.update(label="Response completed!", state="complete")
941
 
942
  st.session_state.messages.append({
943
  "role": "assistant",
944
  "content": full_response,
945
- "images": valid_images,
946
  "videos": valid_videos
947
  })
948
 
949
  except Exception as api_error:
950
  error_message = str(api_error)
951
- logging.error(f"API error: {error_message}")
952
- status.update(label=f"Error: {error_message}", state="error")
953
- raise Exception(f"Response generation error: {error_message}")
954
 
955
  if st.session_state.generate_image and full_response:
956
- with st.spinner("Generating custom image..."):
957
  try:
958
  ip = extract_image_prompt(full_response, query)
959
  img, cap = generate_image(ip)
960
  if img:
961
- st.subheader("AI-Generated Image")
962
  st.image(img, caption=cap, use_container_width=True)
963
  except Exception as img_error:
964
- logging.error(f"Image generation error: {str(img_error)}")
965
- st.warning("Custom image generation failed.")
966
 
967
  if full_response:
968
- st.subheader("Download This Response")
969
  c1, c2 = st.columns(2)
970
  c1.download_button(
971
- "Markdown",
972
  data=full_response,
973
  file_name=f"{query[:30]}.md",
974
  mime="text/markdown"
@@ -986,19 +1062,19 @@ def process_input(query: str, uploaded_files):
986
  with open(fn, "w", encoding="utf-8") as fp:
987
  json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
988
  except Exception as e:
989
- logging.error(f"Auto-save failed: {e}")
990
 
991
  except Exception as e:
992
  error_message = str(e)
993
- placeholder.error(f"An error occurred: {error_message}")
994
- logging.error(f"Process input error: {error_message}")
995
- ans = f"An error occurred while processing your request: {error_message}"
996
  st.session_state.messages.append({"role": "assistant", "content": ans})
997
 
998
  # ──────────────────────────────── main ────────────────────────────────────
999
  def main():
1000
- st.write("==== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1001
- perplexity_app()
1002
 
1003
  if __name__ == "__main__":
1004
- main()
 
6
  from PIL import Image
7
 
8
  import streamlit as st
9
+ from openai import OpenAI
10
 
11
  from gradio_client import Client
12
  import pandas as pd
13
  import PyPDF2 # For handling PDF files
14
+ import kagglehub
15
 
16
  # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
17
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
18
  BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
19
  BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
 
20
  BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
21
  BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
22
  IMAGE_API_URL = "http://211.233.58.201:7896"
23
  MAX_TOKENS = 7999
24
+ KAGGLE_API_KEY = os.getenv("KDATA_API", "")
25
+
26
+ # Set Kaggle API key
27
+ os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY
28
+
29
+ # Analysis modes and style definitions
30
+ ANALYSIS_MODES = {
31
+ "price_forecast": "농산물 가격 예츑과 μ‹œμž₯ 뢄석",
32
+ "market_trend": "μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석",
33
+ "production_analysis": "μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망",
34
+ "agricultural_policy": "농업 μ •μ±… 및 규제 영ν–₯ 뢄석",
35
+ "climate_impact": "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석"
36
  }
37
 
38
  RESPONSE_STYLES = {
39
+ "professional": "전문적이고 ν•™μˆ μ μΈ 뢄석",
40
+ "simple": "μ‰½κ²Œ 이해할 수 μžˆλŠ” κ°„κ²°ν•œ μ„€λͺ…",
41
+ "detailed": "μƒμ„Έν•œ 톡계 기반 깊이 μžˆλŠ” 뢄석",
42
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μ‘°μ–Έκ³Ό μΆ”μ²œ 쀑심"
43
  }
44
 
45
  # Example search queries
46
  EXAMPLE_QUERIES = {
47
+ "example1": "μŒ€ 가격 μΆ”μ„Έ 및 ν–₯ν›„ 6κ°œμ›” 전망을 λΆ„μ„ν•΄μ£Όμ„Έμš”",
48
+ "example2": "κΈ°ν›„ λ³€ν™”λ‘œ ν•œκ΅­ 과일 생산 μ „λž΅κ³Ό μˆ˜μš” 예츑 λ³΄κ³ μ„œλ₯Ό μž‘μ„±ν•˜λΌ.",
49
+ "example3": "2025λ…„λΆ€ν„° 2030λ…„κΉŒμ§€ 좩뢁 μ¦ν‰κ΅°μ—μ„œ μž¬λ°°ν•˜λ©΄ μœ λ§ν•œ μž‘λ¬Όμ€? μˆ˜μ΅μ„±κ³Ό 관리성이 μ’‹μ•„μ•Όν•œλ‹€"
50
  }
51
 
52
  # ──────────────────────────────── Logging ────────────────────────────────
 
65
  timeout=60.0,
66
  max_retries=3
67
  )
68
+
69
+ # ────────────────────────────── Kaggle Dataset Access ──────────────────────
70
+ @st.cache_resource
71
+ def load_agriculture_dataset():
72
+ """Download and load the UN agriculture dataset from Kaggle"""
73
+ try:
74
+ path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics")
75
+ logging.info(f"Kaggle dataset downloaded to: {path}")
76
+
77
+ # Load metadata about available files
78
+ available_files = []
79
+ for root, dirs, files in os.walk(path):
80
+ for file in files:
81
+ if file.endswith('.csv'):
82
+ file_path = os.path.join(root, file)
83
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
84
+ available_files.append({
85
+ 'name': file,
86
+ 'path': file_path,
87
+ 'size_mb': round(file_size, 2)
88
+ })
89
+
90
+ return {
91
+ 'base_path': path,
92
+ 'files': available_files
93
+ }
94
+ except Exception as e:
95
+ logging.error(f"Error loading Kaggle dataset: {e}")
96
+ return None
97
+
98
+ def get_dataset_summary():
99
+ """Generate a summary of the available agriculture datasets"""
100
+ dataset_info = load_agriculture_dataset()
101
+ if not dataset_info:
102
+ return "Failed to load the UN global food and agriculture statistics dataset."
103
+
104
+ summary = "# UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋\n\n"
105
+ summary += f"총 {len(dataset_info['files'])}개의 CSV 파일이 ν¬ν•¨λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.\n\n"
106
+
107
+ # List files with sizes
108
+ summary += "## μ‚¬μš© κ°€λŠ₯ν•œ 데이터 파일:\n\n"
109
+ for i, file_info in enumerate(dataset_info['files'][:10], 1): # Limit to first 10 files
110
+ summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n"
111
+
112
+ if len(dataset_info['files']) > 10:
113
+ summary += f"\n...μ™Έ {len(dataset_info['files']) - 10}개 파일\n"
114
+
115
+ # Add example of data structure
116
+ try:
117
+ if dataset_info['files']:
118
+ sample_file = dataset_info['files'][0]['path']
119
+ df = pd.read_csv(sample_file, nrows=5)
120
+ summary += "\n## 데이터 μƒ˜ν”Œ ꡬ쑰:\n\n"
121
+ summary += df.head(5).to_markdown() + "\n\n"
122
+
123
+ summary += "## 데이터셋 λ³€μˆ˜ μ„€λͺ…:\n\n"
124
+ for col in df.columns:
125
+ summary += f"- **{col}**: [λ³€μˆ˜ μ„€λͺ… ν•„μš”]\n"
126
+ except Exception as e:
127
+ logging.error(f"Error generating dataset sample: {e}")
128
+ summary += "\n데이터 μƒ˜ν”Œμ„ μƒμ„±ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n"
129
+
130
+ return summary
131
+
132
+ def analyze_dataset_for_query(query):
133
+ """Find and analyze relevant data from the dataset based on the query"""
134
+ dataset_info = load_agriculture_dataset()
135
+ if not dataset_info:
136
+ return "데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€. Kaggle API 연결을 ν™•μΈν•΄μ£Όμ„Έμš”."
137
+
138
+ # Extract key terms from the query
139
+ query_lower = query.lower()
140
+
141
+ # Define keywords to look for in the dataset files
142
+ keywords = {
143
+ "μŒ€": ["rice", "grain"],
144
+ "λ°€": ["wheat", "grain"],
145
+ "μ˜₯수수": ["corn", "maize", "grain"],
146
+ "μ±„μ†Œ": ["vegetable", "produce"],
147
+ "과일": ["fruit", "produce"],
148
+ "가격": ["price", "cost", "value"],
149
+ "생산": ["production", "yield", "harvest"],
150
+ "수좜": ["export", "trade"],
151
+ "μˆ˜μž…": ["import", "trade"],
152
+ "μ†ŒλΉ„": ["consumption", "demand"]
153
+ }
154
+
155
+ # Find relevant files based on the query
156
+ relevant_files = []
157
+
158
+ # First check for Korean keywords in the query
159
+ found_keywords = []
160
+ for k_term, e_terms in keywords.items():
161
+ if k_term in query_lower:
162
+ found_keywords.extend([k_term] + e_terms)
163
+
164
+ # If no Korean keywords found, check for English terms in the filenames
165
+ if not found_keywords:
166
+ # Generic search through all files
167
+ relevant_files = dataset_info['files'][:5] # Take first 5 files as default
168
+ else:
169
+ # Search for files related to the found keywords
170
+ for file_info in dataset_info['files']:
171
+ file_name_lower = file_info['name'].lower()
172
+ for keyword in found_keywords:
173
+ if keyword.lower() in file_name_lower:
174
+ relevant_files.append(file_info)
175
+ break
176
+
177
+ # If still no relevant files, take the first 5 files
178
+ if not relevant_files:
179
+ relevant_files = dataset_info['files'][:5]
180
+
181
+ # Read and analyze the relevant files
182
+ analysis_result = "# 농업 데이터 뢄석 κ²°κ³Ό\n\n"
183
+ analysis_result += f"쿼리: '{query}'에 λŒ€ν•œ 뢄석을 μˆ˜ν–‰ν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
184
+
185
+ if found_keywords:
186
+ analysis_result += f"## 뢄석 ν‚€μ›Œλ“œ: {', '.join(set(found_keywords))}\n\n"
187
+
188
+ # Process each relevant file
189
+ for file_info in relevant_files[:3]: # Limit to 3 files for performance
190
+ try:
191
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
192
+
193
+ # Read the CSV file
194
+ df = pd.read_csv(file_info['path'])
195
+
196
+ # Basic file stats
197
+ analysis_result += f"- ν–‰ 수: {len(df)}\n"
198
+ analysis_result += f"- μ—΄ 수: {len(df.columns)}\n"
199
+ analysis_result += f"- μ—΄ λͺ©λ‘: {', '.join(df.columns.tolist())}\n\n"
200
+
201
+ # Sample data
202
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
203
+ analysis_result += df.head(5).to_markdown() + "\n\n"
204
+
205
+ # Statistical summary of numeric columns
206
+ numeric_cols = df.select_dtypes(include=['number']).columns
207
+ if len(numeric_cols) > 0:
208
+ analysis_result += "### κΈ°λ³Έ 톡계:\n\n"
209
+ stats_df = df[numeric_cols].describe()
210
+ analysis_result += stats_df.to_markdown() + "\n\n"
211
+
212
+ # Time series analysis if possible
213
+ time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()]
214
+ if time_cols:
215
+ analysis_result += "### μ‹œκ³„μ—΄ νŒ¨ν„΄:\n\n"
216
+ analysis_result += "데이터셋에 μ‹œκ°„ κ΄€λ ¨ 열이 μžˆμ–΄ μ‹œκ³„μ—΄ 뢄석이 κ°€λŠ₯ν•©λ‹ˆλ‹€.\n\n"
217
+
218
+ except Exception as e:
219
+ logging.error(f"Error analyzing file {file_info['name']}: {e}")
220
+ analysis_result += f"이 파일 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n\n"
221
+
222
+ analysis_result += "## 농산물 가격 예츑 및 μˆ˜μš” 뢄석에 λŒ€ν•œ μΈμ‚¬μ΄νŠΈ\n\n"
223
+ analysis_result += "λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ‹€μŒ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
224
+ analysis_result += "1. 데이터 기반 뢄석 (기본적인 μš”μ•½)\n"
225
+ analysis_result += "2. μ£Όμš” 가격 및 μˆ˜μš” 동ν–₯\n"
226
+ analysis_result += "3. μƒμ‚°λŸ‰ 및 무역 νŒ¨ν„΄\n\n"
227
+
228
+ analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
229
+
230
+ return analysis_result
231
 
232
  # ──────────────────────────────── System Prompt ─────────────────────────
233
+ def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
234
  """
235
+ Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on:
236
+ - The selected analysis mode and style
237
+ - Guidelines for using agricultural datasets, web search results and uploaded files
238
  """
239
+ base_prompt = """
240
+ 당신은 농업 데이터 μ „λ¬Έκ°€λ‘œμ„œ 농산물 가격 예츑과 μˆ˜μš” 뢄석을 μˆ˜ν–‰ν•˜λŠ” AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€.
241
+
242
+ μ£Όμš” μž„λ¬΄:
243
+ 1. UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 농산물 μ‹œμž₯ 뢄석
244
+ 2. 농산물 가격 μΆ”μ„Έ 예츑 및 μˆ˜μš” νŒ¨ν„΄ 뢄석
245
+ 3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
246
+ 4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
247
+ 5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
248
+
249
+ μ€‘μš” κ°€μ΄λ“œλΌμΈ:
250
+ - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
251
+ - 뢄석 κ³Όμ •κ³Ό 방법둠을 λͺ…ν™•νžˆ μ„€λͺ…ν•˜μ„Έμš”
252
+ - 톡계적 μ‹ λ’°μ„±κ³Ό ν•œκ³„μ μ„ 투λͺ…ν•˜κ²Œ μ œμ‹œν•˜μ„Έμš”
253
+ - μ΄ν•΄ν•˜κΈ° μ‰¬μš΄ μ‹œκ°μ  μš”μ†Œλ‘œ 뢄석 κ²°κ³Όλ₯Ό λ³΄μ™„ν•˜μ„Έμš”
254
+ - λ§ˆν¬λ‹€μš΄μ„ ν™œμš©ν•΄ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
 
255
  """
256
 
257
  mode_prompts = {
258
+ "price_forecast": """
259
+ 농산물 가격 예츑 및 μ‹œμž₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
260
+ - κ³Όκ±° 가격 데이터 νŒ¨ν„΄μ— κΈ°λ°˜ν•œ 예츑 제곡
261
+ - 가격 변동성 μš”μΈ 뢄석(κ³„μ ˆμ„±, 날씨, μ •μ±… λ“±)
262
+ - 단기 및 쀑μž₯κΈ° 가격 전망 μ œμ‹œ
263
+ - 가격에 영ν–₯을 λ―ΈμΉ˜λŠ” κ΅­λ‚΄μ™Έ μš”μΈ 식별
264
+ - μ‹œμž₯ λΆˆν™•μ‹€μ„±κ³Ό 리슀크 μš”μ†Œ κ°•μ‘°
265
  """,
266
+ "market_trend": """
267
+ μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
268
+ - μ£Όμš” 농산물 μˆ˜μš” λ³€ν™” νŒ¨ν„΄ 식별
269
+ - μ†ŒλΉ„μž μ„ ν˜Έλ„ 및 ꡬ맀 행동 뢄석
270
+ - μ‹œμž₯ μ„Έκ·Έλ¨ΌνŠΈ 및 ν‹ˆμƒˆμ‹œμž₯ 기회 탐색
271
+ - μ‹œμž₯ ν™•λŒ€/μΆ•μ†Œ νŠΈλ Œλ“œ 평가
272
+ - μˆ˜μš” 탄λ ₯μ„± 및 가격 민감도 뢄석
273
  """,
274
+ "production_analysis": """
275
+ μƒμ‚°λŸ‰ 뢄석 οΏ½οΏ½οΏ½ μ‹λŸ‰ μ•ˆλ³΄ 전망에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
276
+ - μž‘λ¬Ό μƒμ‚°λŸ‰ μΆ”μ„Έ 및 변동 μš”μΈ 뢄석
277
+ - μ‹λŸ‰ 생산과 인ꡬ μ„±μž₯ κ°„μ˜ 관계 평가
278
+ - κ΅­κ°€/지역별 생산 μ—­λŸ‰ 비ꡐ
279
+ - μ‹λŸ‰ μ•ˆλ³΄ μœ„ν˜‘ μš”μ†Œ 및 취약점 식별
280
+ - 생산성 ν–₯상 μ „λž΅ 및 기회 μ œμ•ˆ
281
  """,
282
+ "agricultural_policy": """
283
+ 농업 μ •μ±… 및 규제 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
284
+ - μ •λΆ€ μ •μ±…κ³Ό, 보쑰금, 규제의 μ‹œμž₯ 영ν–₯ 뢄석
285
+ - ꡭ제 무역 μ •μ±…κ³Ό κ΄€μ„Έμ˜ 농산물 가격 영ν–₯ 평가
286
+ - 농업 지원 ν”„λ‘œκ·Έλž¨μ˜ νš¨κ³Όμ„± κ²€ν† 
287
+ - 규제 ν™˜κ²½ 변화에 λ”°λ₯Έ μ‹œμž₯ μ‘°μ • 예츑
288
+ - 정책적 κ°œμž…μ˜ μ˜λ„λœ/μ˜λ„μΉ˜ μ•Šμ€ κ²°κ³Ό 뢄석
289
+ """,
290
+ "climate_impact": """
291
+ κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
292
+ - κΈ°ν›„ 변화와 농산물 μƒμ‚°λŸ‰/ν’ˆμ§ˆ κ°„μ˜ 상관관계 뢄석
293
+ - 기상 이변이 가격 변동성에 λ―ΈμΉ˜λŠ” 영ν–₯ 평가
294
+ - μž₯기적 κΈ°ν›„ 좔세에 λ”°λ₯Έ 농업 νŒ¨ν„΄ λ³€ν™” 예츑
295
+ - κΈ°ν›„ 회볡λ ₯ μžˆλŠ” 농업 μ‹œμŠ€ν…œ μ „λž΅ μ œμ•ˆ
296
+ - 지역별 κΈ°ν›„ μœ„ν—˜ λ…ΈμΆœλ„ 및 μ·¨μ•½μ„± λ§€ν•‘
297
  """
298
  }
299
 
300
  style_guides = {
301
+ "professional": "전문적이고 ν•™μˆ μ μΈ μ–΄μ‘°λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. 기술적 μš©μ–΄λ₯Ό 적절히 μ‚¬μš©ν•˜κ³  체계적인 데이터 뢄석을 μ œκ³΅ν•˜μ„Έμš”.",
302
+ "simple": "쉽고 κ°„κ²°ν•œ μ–Έμ–΄λ‘œ μ„€λͺ…ν•˜μ„Έμš”. μ „λ¬Έ μš©μ–΄λŠ” μ΅œμ†Œν™”ν•˜κ³  핡심 κ°œλ…μ„ 일상적인 ν‘œν˜„μœΌλ‘œ μ „λ‹¬ν•˜μ„Έμš”.",
303
+ "detailed": "μƒμ„Έν•˜κ³  포괄적인 뢄석을 μ œκ³΅ν•˜μ„Έμš”. λ‹€μ–‘ν•œ 데이터 포인트, 톡계적 λ‰˜μ•™μŠ€, 그리고 μ—¬λŸ¬ μ‹œλ‚˜λ¦¬μ˜€λ₯Ό κ³ λ €ν•œ 심측 뢄석을 μ œμ‹œν•˜μ„Έμš”.",
304
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μΈμ‚¬μ΄νŠΈμ™€ ꡬ체적인 ꢌμž₯사항에 μ΄ˆμ μ„ λ§žμΆ”μ„Έμš”. 'λ‹€μŒ 단계' 및 'μ‹€μ§ˆμ  μ‘°μ–Έ' μ„Ήμ…˜μ„ ν¬ν•¨ν•˜μ„Έμš”."
305
  }
306
 
307
+ dataset_guide = """
308
+ UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 ν™œμš© μ§€μΉ¨:
309
+ - 제곡된 데이터셋 뢄석 κ²°κ³Όλ₯Ό μ‘λ‹΅μ˜ μ£Όμš” 근거둜 μ‚¬μš©ν•˜μ„Έμš”
310
+ - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
311
+ - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
312
+ - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
313
+ - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
314
+ """
315
+
316
  search_guide = """
317
+ μ›Ή 검색 κ²°κ³Ό ν™œμš© μ§€μΉ¨:
318
+ - 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” μ΅œμ‹  μ‹œμž₯ μ •λ³΄λ‘œ 검색 κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ„Έμš”
319
+ - 각 μ •λ³΄μ˜ 좜처λ₯Ό λ§ˆν¬λ‹€μš΄ 링크둜 ν¬ν•¨ν•˜μ„Έμš”: [좜처λͺ…](URL)
320
+ - μ£Όμš” μ£Όμž₯μ΄λ‚˜ 데이터 ν¬μΈνŠΈλ§ˆλ‹€ 좜처λ₯Ό ν‘œμ‹œν•˜μ„Έμš”
321
+ - μΆœμ²˜κ°€ 상좩할 경우, λ‹€μ–‘ν•œ 관점과 신뒰도λ₯Ό μ„€λͺ…ν•˜μ„Έμš”
322
+ - κ΄€λ ¨ λ™μ˜μƒ λ§ν¬λŠ” [λΉ„λ””μ˜€: 제λͺ©](video_url) ν˜•μ‹μœΌλ‘œ ν¬ν•¨ν•˜μ„Έμš”
323
+ - 검색 정보λ₯Ό μΌκ΄€λ˜κ³  체계적인 μ‘λ‹΅μœΌλ‘œ ν†΅ν•©ν•˜μ„Έμš”
324
+ - λͺ¨λ“  μ£Όμš” 좜처λ₯Ό λ‚˜μ—΄ν•œ "μ°Έκ³  자료" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— ν¬ν•¨ν•˜μ„Έμš”
325
  """
326
 
327
  upload_guide = """
328
+ μ—…λ‘œλ“œλœ 파일 ν™œμš© μ§€μΉ¨:
329
+ - μ—…λ‘œλ“œλœ νŒŒμΌμ„ μ‘λ‹΅μ˜ μ£Όμš” μ •λ³΄μ›μœΌλ‘œ ν™œμš©ν•˜μ„Έμš”
330
+ - 쿼리와 직접 κ΄€λ ¨λœ 파일 정보λ₯Ό μΆ”μΆœν•˜κ³  κ°•μ‘°ν•˜μ„Έμš”
331
+ - κ΄€λ ¨ κ΅¬μ ˆμ„ μΈμš©ν•˜κ³  νŠΉμ • νŒŒμΌμ„ 좜처둜 μΈμš©ν•˜μ„Έμš”
332
+ - CSV 파일의 수치 λ°μ΄ν„°λŠ” μš”μ•½ λ¬Έμž₯으둜 λ³€ν™˜ν•˜μ„Έμš”
333
+ - PDF μ½˜ν…μΈ λŠ” νŠΉμ • μ„Ήμ…˜μ΄λ‚˜ νŽ˜μ΄μ§€λ₯Ό μ°Έμ‘°ν•˜μ„Έμš”
334
+ - 파일 정보λ₯Ό μ›Ή 검색 결과와 μ›ν™œν•˜κ²Œ ν†΅ν•©ν•˜μ„Έμš”
335
+ - 정보가 상좩할 경우, 일반적인 μ›Ή 결과보닀 파일 μ½˜ν…μΈ λ₯Ό μš°μ„ μ‹œν•˜μ„Έμš”
336
  """
337
 
338
  # Base prompt
339
+ final_prompt = base_prompt
340
+
341
+ # Add mode-specific guidance
342
+ if mode in mode_prompts:
343
+ final_prompt += "\n" + mode_prompts[mode]
344
 
345
  # Style
346
  if style in style_guides:
347
+ final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
348
+
349
+ # Always include dataset guide
350
+ final_prompt += f"\n\n{dataset_guide}"
351
 
352
  if include_search_results:
353
  final_prompt += f"\n\n{search_guide}"
 
356
  final_prompt += f"\n\n{upload_guide}"
357
 
358
  final_prompt += """
359
+ \n\n응닡 ν˜•μ‹ μš”κ΅¬μ‚¬ν•­:
360
+ - λ§ˆν¬λ‹€μš΄ 제λͺ©(## 및 ###)을 μ‚¬μš©ν•˜μ—¬ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
361
+ - μ€‘μš”ν•œ 점은 ꡡ은 ν…μŠ€νŠΈ(**ν…μŠ€νŠΈ**)둜 κ°•μ‘°ν•˜μ„Έμš”
362
+ - 3-5개의 후속 μ§ˆλ¬Έμ„ ν¬ν•¨ν•œ "κ΄€λ ¨ 질문" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— μΆ”κ°€ν•˜μ„Έμš”
363
+ - μ μ ˆν•œ 간격과 단락 κ΅¬λΆ„μœΌλ‘œ 응닡을 μ„œμ‹ν™”ν•˜μ„Έμš”
364
+ - λͺ¨λ“  λ§ν¬λŠ” λ§ˆν¬λ‹€μš΄ ν˜•μ‹μœΌλ‘œ 클릭 κ°€λŠ₯ν•˜κ²Œ λ§Œλ“œμ„Έμš”: [ν…μŠ€νŠΈ](url)
365
+ - κ°€λŠ₯ν•œ 경우 데이터λ₯Ό μ‹œκ°μ μœΌλ‘œ ν‘œν˜„(ν‘œ, κ·Έλž˜ν”„ λ“±μ˜ μ„€λͺ…)ν•˜μ„Έμš”
366
  """
367
  return final_prompt
368
 
369
  # ──────────────────────────────── Brave Search API ────────────────────────
370
  @st.cache_data(ttl=3600)
371
+ def brave_search(query: str, count: int = 10):
372
  if not BRAVE_KEY:
373
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
374
 
375
  headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
376
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업 데이터", "count": str(count)}
377
 
378
  for attempt in range(3):
379
  try:
 
406
  except Exception as e:
407
  logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
408
  if attempt < 2:
 
409
  time.sleep(5)
410
 
411
  return []
412
 
413
  @st.cache_data(ttl=3600)
414
+ def brave_video_search(query: str, count: int = 3):
415
  if not BRAVE_KEY:
416
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
417
 
418
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
419
+ params = {"q": query + " 농산물 가격 농업 μ‹œμž₯", "count": str(count)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  for attempt in range(3):
422
  try:
 
445
  return []
446
 
447
  @st.cache_data(ttl=3600)
448
+ def brave_news_search(query: str, count: int = 3):
449
  if not BRAVE_KEY:
450
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
451
 
452
  headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
453
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업", "count": str(count)}
454
 
455
  for attempt in range(3):
456
  try:
 
481
 
482
  def mock_results(query: str) -> str:
483
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
484
+ return (f"# λŒ€μ²΄ 검색 μ½˜ν…μΈ  (생성 μ‹œκ°„: {ts})\n\n"
485
+ f"'{query}'에 λŒ€ν•œ 검색 API μš”μ²­μ΄ μ‹€νŒ¨ν–ˆκ±°λ‚˜ κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€. "
486
+ f"κΈ°μ‘΄ 지식을 기반으둜 응닡을 μƒμ„±ν•΄μ£Όμ„Έμš”.\n\n"
487
+ f"λ‹€μŒ 사항을 κ³ λ €ν•˜μ„Έμš”:\n\n"
488
+ f"- {query}에 κ΄€ν•œ κΈ°λ³Έ κ°œλ…κ³Ό μ€‘μš”μ„±\n"
489
+ f"- 일반적으둜 μ•Œλ €μ§„ κ΄€λ ¨ ν†΅κ³„λ‚˜ μΆ”μ„Έ\n"
490
+ f"- 이 μ£Όμ œμ— λŒ€ν•œ μ „λ¬Έκ°€ 의견\n"
491
+ f"- λ…μžκ°€ κ°€μ§ˆ 수 μžˆλŠ” 질문\n\n"
492
+ f"μ°Έκ³ : μ΄λŠ” μ‹€μ‹œκ°„ 데이터가 μ•„λ‹Œ λŒ€μ²΄ μ§€μΉ¨μž…λ‹ˆλ‹€.\n\n")
493
 
494
  def do_web_search(query: str) -> str:
495
  try:
496
+ arts = brave_search(query, 10)
497
  if not arts:
498
  logging.warning("No search results, using fallback content")
499
  return mock_results(query)
500
 
 
501
  videos = brave_video_search(query, 2)
502
  news = brave_news_search(query, 3)
503
 
504
+ result = "# μ›Ή 검색 κ²°κ³Ό\nλ‹€μŒ κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ—¬ 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” 포괄적인 닡변을 μ œκ³΅ν•˜μ„Έμš”.\n\n"
 
 
 
 
 
505
 
506
+ result += "## μ›Ή κ²°κ³Ό\n\n"
507
+ for a in arts[:5]:
508
+ result += f"### κ²°κ³Ό {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
509
+ result += f"**좜처**: [{a['displayed_link']}]({a['link']})\n\n---\n"
 
 
510
 
511
+ if news:
512
+ result += "## λ‰΄μŠ€ κ²°κ³Ό\n\n"
513
+ for n in news:
514
+ result += f"### {n['title']}\n\n{n['description']}\n\n"
515
+ result += f"**좜처**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
516
+
517
  if videos:
518
+ result += "## λΉ„λ””μ˜€ κ²°κ³Ό\n\n"
519
  for vid in videos:
520
  result += f"### {vid['title']}\n\n"
521
  if vid.get('thumbnail_url'):
522
+ result += f"![썸넀일]({vid['thumbnail_url']})\n\n"
523
+ result += f"**μ‹œμ²­**: [{vid['source']}]({vid['video_url']})\n\n"
 
 
 
 
 
 
524
 
525
  return result
526
 
 
538
  if len(text) > 10000:
539
  text = text[:9700] + "...(truncated)..."
540
 
541
+ result = f"## ν…μŠ€νŠΈ 파일: {file.name}\n\n" + text
542
  return result
543
  except Exception as e:
544
  logging.error(f"Error processing text file: {str(e)}")
545
+ return f"ν…μŠ€νŠΈ 파일 처리 였λ₯˜: {str(e)}"
546
 
547
  def process_csv_file(file):
548
  try:
 
550
  file.seek(0)
551
 
552
  df = pd.read_csv(io.BytesIO(content))
553
+ result = f"## CSV 파일: {file.name}\n\n"
554
+ result += f"- ν–‰: {len(df)}\n"
555
+ result += f"- μ—΄: {len(df.columns)}\n"
556
+ result += f"- μ—΄ 이름: {', '.join(df.columns.tolist())}\n\n"
557
 
558
+ result += "### 데이터 미리보기\n\n"
559
  preview_df = df.head(10)
560
  try:
561
  markdown_table = preview_df.to_markdown(index=False)
562
  if markdown_table:
563
  result += markdown_table + "\n\n"
564
  else:
565
+ result += "CSV 데이터λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
566
  except Exception as e:
567
  logging.error(f"Markdown table conversion error: {e}")
568
+ result += "ν…μŠ€νŠΈλ‘œ 데이터 ν‘œμ‹œ:\n\n" + str(preview_df) + "\n\n"
569
 
570
  num_cols = df.select_dtypes(include=['number']).columns
571
  if len(num_cols) > 0:
572
+ result += "### κΈ°λ³Έ 톡계 정보\n\n"
573
  try:
574
  stats_df = df[num_cols].describe().round(2)
575
  stats_markdown = stats_df.to_markdown()
576
  if stats_markdown:
577
  result += stats_markdown + "\n\n"
578
  else:
579
+ result += "톡계 정보λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
580
  except Exception as e:
581
  logging.error(f"Statistical info conversion error: {e}")
582
+ result += "톡계 정보λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
583
 
584
  return result
585
  except Exception as e:
586
  logging.error(f"CSV file processing error: {str(e)}")
587
+ return f"CSV 파일 처리 였λ₯˜: {str(e)}"
588
 
589
  def process_pdf_file(file):
590
  try:
 
594
  pdf_file = io.BytesIO(file_bytes)
595
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
596
 
597
+ result = f"## PDF 파일: {file.name}\n\n- 총 νŽ˜μ΄μ§€: {len(reader.pages)}\n\n"
598
 
599
  max_pages = min(5, len(reader.pages))
600
  all_text = ""
 
603
  try:
604
  page = reader.pages[i]
605
  page_text = page.extract_text()
606
+ current_page_text = f"### νŽ˜μ΄μ§€ {i+1}\n\n"
607
  if page_text and len(page_text.strip()) > 0:
608
  if len(page_text) > 1500:
609
+ current_page_text += page_text[:1500] + "...(좕약됨)...\n\n"
610
  else:
611
  current_page_text += page_text + "\n\n"
612
  else:
613
+ current_page_text += "(ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•  수 μ—†μŒ)\n\n"
614
 
615
  all_text += current_page_text
616
 
617
  if len(all_text) > 8000:
618
+ all_text += "...(λ‚˜λ¨Έμ§€ νŽ˜μ΄μ§€ 좕약됨)...\n\n"
619
  break
620
 
621
  except Exception as page_err:
622
  logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
623
+ all_text += f"### νŽ˜μ΄μ§€ {i+1}\n\n(λ‚΄μš© μΆ”μΆœ 였λ₯˜: {str(page_err)})\n\n"
624
 
625
  if len(reader.pages) > max_pages:
626
+ all_text += f"\nμ°Έκ³ : 처음 {max_pages} νŽ˜μ΄μ§€λ§Œ ν‘œμ‹œλ©λ‹ˆλ‹€.\n\n"
627
 
628
+ result += "### PDF λ‚΄μš©\n\n" + all_text
629
  return result
630
 
631
  except Exception as e:
632
  logging.error(f"PDF file processing error: {str(e)}")
633
+ return f"## PDF 파일: {file.name}\n\n였λ₯˜: {str(e)}\n\nμ²˜λ¦¬ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
634
 
635
  def process_uploaded_files(files):
636
  if not files:
637
  return None
638
 
639
+ result = "# μ—…λ‘œλ“œλœ 파일 λ‚΄μš©\n\nμ‚¬μš©μžκ°€ μ œκ³΅ν•œ 파일의 λ‚΄μš©μž…λ‹ˆλ‹€.\n\n"
640
  for file in files:
641
  try:
642
  ext = file.name.split('.')[-1].lower()
 
647
  elif ext == 'pdf':
648
  result += process_pdf_file(file) + "\n\n---\n\n"
649
  else:
650
+ result += f"### μ§€μ›λ˜μ§€ μ•ŠλŠ” 파일: {file.name}\n\n---\n\n"
651
  except Exception as e:
652
  logging.error(f"File processing error {file.name}: {e}")
653
+ result += f"### 파일 처리 였λ₯˜: {file.name}\n\n였λ₯˜: {e}\n\n---\n\n"
654
 
655
  return result
656
 
657
  # ──────────────────────────────── Image & Utility ─────────────────────────
658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
660
  if not prompt:
661
  return None, "Insufficient prompt"
 
678
  response = client.chat.completions.create(
679
  model="gpt-4.1-mini",
680
  messages=[
681
+ {"role": "system", "content": "농업 및 농산물에 κ΄€ν•œ 이미지 ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ν•œ μ€„μ˜ μ˜μ–΄λ‘œ 된 ν”„λ‘¬ν”„νŠΈλ§Œ λ°˜ν™˜ν•˜μ„Έμš”, λ‹€λ₯Έ ν…μŠ€νŠΈλŠ” ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”."},
682
+ {"role": "user", "content": f"주제: {topic}\n\n---\n{response_text}\n\n---"}
683
  ],
684
  temperature=1,
685
  max_tokens=80,
 
688
  return response.choices[0].message.content.strip()
689
  except Exception as e:
690
  logging.error(f"OpenAI image prompt generation error: {e}")
691
+ return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality"
692
 
693
+ def md_to_html(md: str, title="농산물 μˆ˜μš” 예츑 뢄석 κ²°κ³Ό"):
694
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
695
 
696
  def keywords(text: str, top=5):
 
698
  return " ".join(cleaned.split()[:top])
699
 
700
  # ──────────────────────────────── Streamlit UI ────────────────────────────
701
+ def agricultural_price_forecast_app():
702
+ st.title("농산물 μˆ˜μš” 및 가격 예츑 AI μ–΄μ‹œμŠ€ν„΄νŠΈ")
703
+ st.markdown("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 뢄석 기반의 농산물 μ‹œμž₯ 예츑")
704
 
705
  if "ai_model" not in st.session_state:
706
  st.session_state.ai_model = "gpt-4.1-mini"
 
712
  st.session_state.generate_image = False
713
  if "web_search_enabled" not in st.session_state:
714
  st.session_state.web_search_enabled = True
715
+ if "analysis_mode" not in st.session_state:
716
+ st.session_state.analysis_mode = "price_forecast"
717
  if "response_style" not in st.session_state:
718
  st.session_state.response_style = "professional"
719
 
720
  sb = st.sidebar
721
+ sb.title("뢄석 μ„€μ •")
722
+
723
+ # Kaggle dataset info display
724
+ if sb.checkbox("데이터셋 정보 ν‘œμ‹œ", value=False):
725
+ st.info("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 λΆˆλŸ¬μ˜€λŠ” 쀑...")
726
+ dataset_info = load_agriculture_dataset()
727
+ if dataset_info:
728
+ st.success(f"데이터셋 λ‘œλ“œ μ™„λ£Œ: {len(dataset_info['files'])}개 파일")
729
+
730
+ with st.expander("데이터셋 미리보기", expanded=False):
731
+ for file_info in dataset_info['files'][:5]:
732
+ st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)")
733
+ else:
734
+ st.error("데이터셋을 λΆˆλŸ¬μ˜€λŠ”λ° μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. Kaggle API 섀정을 ν™•μΈν•˜μ„Έμš”.")
735
 
736
+ sb.subheader("뢄석 ꡬ성")
737
  sb.selectbox(
738
+ "뢄석 λͺ¨λ“œ",
739
+ options=list(ANALYSIS_MODES.keys()),
740
+ format_func=lambda x: ANALYSIS_MODES[x],
741
+ key="analysis_mode"
742
  )
743
 
744
  sb.selectbox(
745
+ "응닡 μŠ€νƒ€μΌ",
746
  options=list(RESPONSE_STYLES.keys()),
747
  format_func=lambda x: RESPONSE_STYLES[x],
748
  key="response_style"
749
  )
750
 
751
  # Example queries
752
+ sb.subheader("μ˜ˆμ‹œ 질문")
753
  c1, c2, c3 = sb.columns(3)
754
+ if c1.button("μŒ€ 가격 전망", key="ex1"):
755
  process_example(EXAMPLE_QUERIES["example1"])
756
+ if c2.button("κΈ°ν›„ 영ν–₯", key="ex2"):
757
  process_example(EXAMPLE_QUERIES["example2"])
758
+ if c3.button("증평ꡰ μž‘λ¬Ό", key="ex3"):
759
  process_example(EXAMPLE_QUERIES["example3"])
760
 
761
+ sb.subheader("기타 μ„€μ •")
762
+ sb.toggle("μžλ™ μ €μž₯", key="auto_save")
763
+ sb.toggle("이미지 μžλ™ 생성", key="generate_image")
764
 
765
+ web_search_enabled = sb.toggle("μ›Ή 검색 μ‚¬μš©", value=st.session_state.web_search_enabled)
766
  st.session_state.web_search_enabled = web_search_enabled
767
 
768
  if web_search_enabled:
769
+ st.sidebar.info("βœ… μ›Ή 검색 κ²°κ³Όκ°€ 응닡에 ν†΅ν•©λ©λ‹ˆλ‹€.")
770
 
771
  # Download the latest response
772
  latest_response = next(
 
782
  first_line = latest_response.split('\n', 1)[0].strip()
783
  title = first_line[:40] + "..." if len(first_line) > 40 else first_line
784
 
785
+ sb.subheader("μ΅œμ‹  응닡 λ‹€μš΄λ‘œλ“œ")
786
  d1, d2 = sb.columns(2)
787
+ d1.download_button("λ§ˆν¬λ‹€μš΄μœΌλ‘œ λ‹€μš΄λ‘œλ“œ", latest_response,
788
  file_name=f"{title}.md", mime="text/markdown")
789
+ d2.download_button("HTML둜 λ‹€μš΄λ‘œλ“œ", md_to_html(latest_response, title),
790
  file_name=f"{title}.html", mime="text/html")
791
 
792
  # JSON conversation record upload
793
+ up = sb.file_uploader("λŒ€ν™” 기둝 뢈러였기 (.json)", type=["json"], key="json_uploader")
794
  if up:
795
  try:
796
  st.session_state.messages = json.load(up)
797
+ sb.success("λŒ€ν™” 기둝을 μ„±κ³΅μ μœΌλ‘œ λΆˆλŸ¬μ™”μŠ΅λ‹ˆλ‹€")
798
  except Exception as e:
799
+ sb.error(f"뢈러였기 μ‹€νŒ¨: {e}")
800
 
801
  # JSON conversation record download
802
+ if sb.button("λŒ€ν™” 기둝을 JSON으둜 λ‹€μš΄λ‘œλ“œ"):
803
  sb.download_button(
804
+ "μ €μž₯",
805
  data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
806
  file_name="conversation_history.json",
807
  mime="application/json"
808
  )
809
 
810
  # File Upload
811
+ st.subheader("파일 μ—…λ‘œλ“œ")
812
  uploaded_files = st.file_uploader(
813
+ "μ°Έκ³  자료둜 μ‚¬μš©ν•  파일 μ—…λ‘œλ“œ (txt, csv, pdf)",
814
  type=["txt", "csv", "pdf"],
815
  accept_multiple_files=True,
816
  key="file_uploader"
 
818
 
819
  if uploaded_files:
820
  file_count = len(uploaded_files)
821
+ st.success(f"{file_count}개 파일이 μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μ§ˆμ˜μ— λŒ€ν•œ μ†ŒμŠ€λ‘œ μ‚¬μš©λ©λ‹ˆλ‹€.")
822
 
823
+ with st.expander("μ—…λ‘œλ“œλœ 파일 미리보기", expanded=False):
824
  for idx, file in enumerate(uploaded_files):
825
+ st.write(f"**파일λͺ…:** {file.name}")
826
  ext = file.name.split('.')[-1].lower()
827
 
828
  if ext == 'txt':
829
  preview = file.read(1000).decode('utf-8', errors='ignore')
830
  file.seek(0)
831
  st.text_area(
832
+ f"{file.name} 미리보기",
833
  preview + ("..." if len(preview) >= 1000 else ""),
834
  height=150
835
  )
 
837
  try:
838
  df = pd.read_csv(file)
839
  file.seek(0)
840
+ st.write("CSV 미리보기 (μ΅œλŒ€ 5ν–‰)")
841
  st.dataframe(df.head(5))
842
  except Exception as e:
843
+ st.error(f"CSV 미리보기 μ‹€νŒ¨: {e}")
844
  elif ext == 'pdf':
845
  try:
846
  file_bytes = file.read()
 
850
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
851
 
852
  pc = len(reader.pages)
853
+ st.write(f"PDF 파일: {pc}νŽ˜μ΄μ§€")
854
 
855
  if pc > 0:
856
  try:
857
  page_text = reader.pages[0].extract_text()
858
+ preview = page_text[:500] if page_text else "(ν…μŠ€νŠΈ μΆ”μΆœ λΆˆκ°€)"
859
+ st.text_area("첫 νŽ˜μ΄μ§€ 미리보기", preview + "...", height=150)
860
  except:
861
+ st.warning("첫 νŽ˜μ΄μ§€ ν…μŠ€νŠΈ μΆ”μΆœ μ‹€νŒ¨")
862
  except Exception as e:
863
+ st.error(f"PDF 미리보기 μ‹€νŒ¨: {e}")
864
 
865
  if idx < file_count - 1:
866
  st.divider()
 
870
  with st.chat_message(m["role"]):
871
  st.markdown(m["content"], unsafe_allow_html=True)
872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
  # Videos
874
  if "videos" in m and m["videos"]:
875
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
876
  for video in m["videos"]:
877
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
878
  video_url = video.get('url', '')
879
  thumbnail = video.get('thumbnail', '')
880
 
881
  if thumbnail:
882
  col1, col2 = st.columns([1, 3])
883
  with col1:
884
+ st.write("🎬")
 
 
 
885
  with col2:
886
  st.markdown(f"**[{video_title}]({video_url})**")
887
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
888
  else:
889
  st.markdown(f"🎬 **[{video_title}]({video_url})**")
890
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
891
 
892
  # User input
893
+ query = st.chat_input("농산물 가격, μˆ˜μš” λ˜λŠ” μ‹œμž₯ 동ν–₯ κ΄€λ ¨ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.")
894
  if query:
895
  process_input(query, uploaded_files)
896
 
897
  sb.markdown("---")
898
+ sb.markdown("Created by Vidraft | [Community](https://discord.gg/openfreeai)")
899
 
900
  def process_example(topic):
901
  process_input(topic, [])
 
916
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
917
 
918
  try:
919
+ status = st.status("μ§ˆλ¬Έμ— λ‹΅λ³€ μ€€λΉ„ 쀑...")
920
+ status.update(label="ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 쀑...")
921
 
922
  client = get_openai_client()
923
 
924
  search_content = None
 
925
  video_results = []
926
  news_results = []
927
 
928
+ # 농업 데이터셋 뢄석 κ²°κ³Ό κ°€μ Έμ˜€κΈ°
929
+ status.update(label="농업 데이터셋 뢄석 쀑...")
930
+ with st.spinner("데이터셋 뢄석 쀑..."):
931
+ dataset_analysis = analyze_dataset_for_query(query)
932
+
933
  if use_web_search:
934
+ status.update(label="μ›Ή 검색 μˆ˜ν–‰ 쀑...")
935
+ with st.spinner("μ›Ή 검색 쀑..."):
936
  search_content = do_web_search(keywords(query, top=5))
937
 
938
  try:
939
+ status.update(label="λΉ„λ””μ˜€ 검색 쀑...")
 
940
  video_results = brave_video_search(query, 2)
941
  news_results = brave_news_search(query, 3)
942
  except Exception as search_err:
943
+ logging.error(f"λ―Έλ””μ–΄ 검색 였λ₯˜: {search_err}")
944
 
945
  file_content = None
946
  if has_uploaded_files:
947
+ status.update(label="μ—…λ‘œλ“œλœ 파일 처리 쀑...")
948
+ with st.spinner("파일 뢄석 쀑..."):
949
  file_content = process_uploaded_files(uploaded_files)
950
 
 
 
 
 
 
 
 
 
 
 
951
  valid_videos = []
952
  for vid in video_results:
953
  url = vid.get('video_url')
954
  if url and url.startswith('http'):
955
  valid_videos.append({
956
  'url': url,
957
+ 'title': vid.get('title', 'λΉ„λ””μ˜€'),
958
  'thumbnail': vid.get('thumbnail_url', ''),
959
+ 'source': vid.get('source', 'λΉ„λ””μ˜€ 좜처')
960
  })
961
 
962
+ status.update(label="μ’…ν•© 뢄석 μ€€λΉ„ 쀑...")
963
  sys_prompt = get_system_prompt(
964
+ mode=st.session_state.analysis_mode,
965
  style=st.session_state.response_style,
966
  include_search_results=use_web_search,
967
  include_uploaded_files=has_uploaded_files
 
972
  ]
973
 
974
  user_content = query
975
+ # 항상 데이터셋 뢄석 κ²°κ³Ό 포함
976
+ user_content += "\n\n" + dataset_analysis
977
+
978
  if search_content:
979
  user_content += "\n\n" + search_content
980
  if file_content:
981
  user_content += "\n\n" + file_content
982
 
 
 
 
 
 
 
 
983
  if valid_videos:
984
+ user_content += "\n\n# κ΄€λ ¨ λ™μ˜μƒ\n"
985
  for i, vid in enumerate(valid_videos):
986
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
987
 
 
1005
 
1006
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
1007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  if valid_videos:
1009
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1010
  for video in valid_videos:
1011
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1012
  video_url = video.get('url', '')
 
1013
 
1014
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1015
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1016
+
1017
+ status.update(label="응닡 μ™„λ£Œ!", state="complete")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1018
 
1019
  st.session_state.messages.append({
1020
  "role": "assistant",
1021
  "content": full_response,
 
1022
  "videos": valid_videos
1023
  })
1024
 
1025
  except Exception as api_error:
1026
  error_message = str(api_error)
1027
+ logging.error(f"API 였λ₯˜: {error_message}")
1028
+ status.update(label=f"였λ₯˜: {error_message}", state="error")
1029
+ raise Exception(f"응닡 생성 였λ₯˜: {error_message}")
1030
 
1031
  if st.session_state.generate_image and full_response:
1032
+ with st.spinner("λ§žμΆ€ν˜• 이미지 생성 쀑..."):
1033
  try:
1034
  ip = extract_image_prompt(full_response, query)
1035
  img, cap = generate_image(ip)
1036
  if img:
1037
+ st.subheader("AI 생성 이미지")
1038
  st.image(img, caption=cap, use_container_width=True)
1039
  except Exception as img_error:
1040
+ logging.error(f"이미지 생성 였λ₯˜: {str(img_error)}")
1041
+ st.warning("λ§žμΆ€ν˜• 이미지 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
1042
 
1043
  if full_response:
1044
+ st.subheader("이 응닡 λ‹€μš΄λ‘œλ“œ")
1045
  c1, c2 = st.columns(2)
1046
  c1.download_button(
1047
+ "λ§ˆν¬λ‹€μš΄",
1048
  data=full_response,
1049
  file_name=f"{query[:30]}.md",
1050
  mime="text/markdown"
 
1062
  with open(fn, "w", encoding="utf-8") as fp:
1063
  json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1064
  except Exception as e:
1065
+ logging.error(f"μžλ™ μ €μž₯ μ‹€νŒ¨: {e}")
1066
 
1067
  except Exception as e:
1068
  error_message = str(e)
1069
+ placeholder.error(f"였λ₯˜ λ°œμƒ: {error_message}")
1070
+ logging.error(f"μž…λ ₯ 처리 였λ₯˜: {error_message}")
1071
+ ans = f"μš”μ²­ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_message}"
1072
  st.session_state.messages.append({"role": "assistant", "content": ans})
1073
 
1074
  # ──────────────────────────────── main ────────────────────────────────────
1075
  def main():
1076
+ st.write("==== μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹œμž‘ μ‹œκ°„:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1077
+ agricultural_price_forecast_app()
1078
 
1079
  if __name__ == "__main__":
1080
+ main()