ginipick commited on
Commit
a136f76
Β·
verified Β·
1 Parent(s): 2a3abbd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1225 -0
app.py ADDED
@@ -0,0 +1,1225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ──────────────────────────────── Imports ────────────────────────────────
2
+ import os, json, re, logging, requests, markdown, time, io
3
+ from datetime import datetime
4
+ import random
5
+ import base64
6
+ from io import BytesIO
7
+ from PIL import Image
8
+
9
+ import streamlit as st
10
+ from openai import OpenAI # OpenAI 라이브러리
11
+
12
+ from gradio_client import Client
13
+ import pandas as pd
14
+ import PyPDF2 # For handling PDF files
15
+
16
+ # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
18
+ BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
19
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
20
+ BRAVE_IMAGE_ENDPOINT = "https://api.search.brave.com/res/v1/images/search"
21
+ BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
22
+ BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
23
+ IMAGE_API_URL = "http://211.233.58.201:7896"
24
+ MAX_TOKENS = 7999
25
+
26
+ # μ•ˆμ •μ μΈ λŒ€μ²΄ 이미지 URL λͺ©λ‘
27
+ FALLBACK_IMAGES = [
28
+ "https://images.pexels.com/photos/2559941/pexels-photo-2559941.jpeg?auto=compress&cs=tinysrgb&w=600",
29
+ "https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=600",
30
+ "https://images.pexels.com/photos/312839/pexels-photo-312839.jpeg?auto=compress&cs=tinysrgb&w=600",
31
+ "https://images.pexels.com/photos/3844788/pexels-photo-3844788.jpeg?auto=compress&cs=tinysrgb&w=600",
32
+ "https://images.pexels.com/photos/33041/antelope-canyon-lower-canyon-arizona.jpg?auto=compress&cs=tinysrgb&w=600",
33
+ "https://images.pexels.com/photos/572897/pexels-photo-572897.jpeg?auto=compress&cs=tinysrgb&w=600",
34
+ "https://images.pexels.com/photos/773471/pexels-photo-773471.jpeg?auto=compress&cs=tinysrgb&w=600",
35
+ "https://images.pexels.com/photos/1366630/pexels-photo-1366630.jpeg?auto=compress&cs=tinysrgb&w=600",
36
+ "https://images.pexels.com/photos/1237119/pexels-photo-1237119.jpeg?auto=compress&cs=tinysrgb&w=600",
37
+ "https://images.pexels.com/photos/1429567/pexels-photo-1429567.jpeg?auto=compress&cs=tinysrgb&w=600",
38
+ ]
39
+
40
+ # Search modes and style definitions (in English)
41
+ SEARCH_MODES = {
42
+ "comprehensive": "Comprehensive answer with multiple sources",
43
+ "academic": "Academic and research-focused results",
44
+ "news": "Latest news and current events",
45
+ "technical": "Technical and specialized information",
46
+ "educational": "Educational and learning resources"
47
+ }
48
+
49
+ RESPONSE_STYLES = {
50
+ "professional": "Professional and formal tone",
51
+ "casual": "Friendly and conversational tone",
52
+ "simple": "Simple and easy to understand",
53
+ "detailed": "Detailed and thorough explanations"
54
+ }
55
+
56
+ # Example search queries
57
+ EXAMPLE_QUERIES = {
58
+ "example1": "What are the latest developments in quantum computing?",
59
+ "example2": "How does climate change affect biodiversity in tropical rainforests?",
60
+ "example3": "What are the economic implications of artificial intelligence in the job market?"
61
+ }
62
+
63
+ # ──────────────────────────────── Logging ────────────────────────────────
64
+ logging.basicConfig(level=logging.INFO,
65
+ format="%(asctime)s - %(levelname)s - %(message)s")
66
+
67
+ # ──────────────────────────────── OpenAI Client ──────────────────────────
68
+
69
+ # OpenAI ν΄λΌμ΄μ–ΈνŠΈμ— νƒ€μž„μ•„μ›ƒκ³Ό μž¬μ‹œλ„ 둜직 μΆ”κ°€
70
+ @st.cache_resource
71
+ def get_openai_client():
72
+ """Create an OpenAI client with timeout and retry settings."""
73
+ if not OPENAI_API_KEY:
74
+ raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
75
+ return OpenAI(
76
+ api_key=OPENAI_API_KEY,
77
+ timeout=60.0, # νƒ€μž„μ•„μ›ƒ 60초둜 μ„€μ •
78
+ max_retries=3 # μž¬μ‹œλ„ 횟수 3회둜 μ„€μ •
79
+ )
80
+
81
+ # ──────────────────────────────── System Prompt ─────────────────────────
82
+ def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
83
+ """
84
+ Generate a system prompt for the perplexity-like interface based on:
85
+ - The selected search mode and style
86
+ - Guidelines for using web search results and uploaded files
87
+ """
88
+
89
+ # Base prompt for comprehensive mode
90
+ comprehensive_prompt = """
91
+ You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
92
+
93
+ Your task is to:
94
+ 1. Thoroughly analyze the user's query
95
+ 2. Provide a clear, well-structured answer integrating information from multiple sources
96
+ 3. Include relevant images, videos, and links in your response
97
+ 4. Format your answer with proper headings, bullet points, and sections
98
+ 5. Cite sources inline and provide a references section at the end
99
+
100
+ Important guidelines:
101
+ - Organize information logically with clear section headings
102
+ - Use bullet points and numbered lists for clarity
103
+ - Include specific, factual information whenever possible
104
+ - Provide balanced perspectives on controversial topics
105
+ - Display relevant statistics, data, or quotes when appropriate
106
+ - Format your response using markdown for readability
107
+ """
108
+
109
+ # Alternative modes
110
+ mode_prompts = {
111
+ "academic": """
112
+ Your focus is on providing academic and research-focused responses:
113
+ - Prioritize peer-reviewed research and academic sources
114
+ - Include citations in a formal academic format
115
+ - Discuss methodologies and research limitations where relevant
116
+ - Present different scholarly perspectives on the topic
117
+ - Use precise, technical language appropriate for an academic audience
118
+ """,
119
+ "news": """
120
+ Your focus is on providing the latest news and current events:
121
+ - Prioritize recent news articles and current information
122
+ - Include publication dates for all news sources
123
+ - Present multiple perspectives from different news outlets
124
+ - Distinguish between facts and opinions/editorial content
125
+ - Update information with the most recent developments
126
+ """,
127
+ "technical": """
128
+ Your focus is on providing technical and specialized information:
129
+ - Use precise technical terminology appropriate to the field
130
+ - Include code snippets, formulas, or technical diagrams where relevant
131
+ - Break down complex concepts into step-by-step explanations
132
+ - Reference technical documentation, standards, and best practices
133
+ - Consider different technical approaches or methodologies
134
+ """,
135
+ "educational": """
136
+ Your focus is on providing educational and learning resources:
137
+ - Structure information in a learning-friendly progression
138
+ - Include examples, analogies, and visual explanations
139
+ - Highlight key concepts and definitions
140
+ - Suggest further learning resources at different difficulty levels
141
+ - Present information that's accessible to learners at various levels
142
+ """
143
+ }
144
+
145
+ # Response styles
146
+ style_guides = {
147
+ "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
148
+ "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
149
+ "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
150
+ "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
151
+ }
152
+
153
+ # Guidelines for using search results
154
+ search_guide = """
155
+ Guidelines for Using Search Results:
156
+ - Include source links directly in your response using markdown: [Source Name](URL)
157
+ - For each major claim or piece of information, indicate its source
158
+ - If sources conflict, explain the different perspectives and their reliability
159
+ - Include 3-5 relevant images by writing: ![Image description](image_url)
160
+ - Include 1-2 relevant video links when appropriate by writing: [Video: Title](video_url)
161
+ - Format search information into a cohesive, well-structured response
162
+ - Include a "References" section at the end listing all major sources with links
163
+ """
164
+
165
+ # Guidelines for using uploaded files
166
+ upload_guide = """
167
+ Guidelines for Using Uploaded Files:
168
+ - Treat the uploaded files as primary sources for your response
169
+ - Extract and highlight key information from files that directly addresses the query
170
+ - Quote relevant passages and cite the specific file
171
+ - For numerical data in CSV files, consider creating summary statements
172
+ - For PDF content, reference specific sections or pages
173
+ - Integrate file information seamlessly with web search results
174
+ - When information conflicts, prioritize file content over general web results
175
+ """
176
+
177
+ # Choose base prompt based on mode
178
+ if mode == "comprehensive":
179
+ final_prompt = comprehensive_prompt
180
+ else:
181
+ final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
182
+
183
+ # Add style guide
184
+ if style in style_guides:
185
+ final_prompt += f"\n\nTone and Style: {style_guides[style]}"
186
+
187
+ # Add search results guidance
188
+ if include_search_results:
189
+ final_prompt += f"\n\n{search_guide}"
190
+
191
+ # Add uploaded files guidance
192
+ if include_uploaded_files:
193
+ final_prompt += f"\n\n{upload_guide}"
194
+
195
+ # Additional formatting instructions
196
+ final_prompt += """
197
+ \n\nAdditional Formatting Requirements:
198
+ - Use markdown headings (## and ###) to organize your response
199
+ - Use bold text (**text**) for emphasis on important points
200
+ - Include a "Related Questions" section at the end with 3-5 follow-up questions
201
+ - Format your response with proper spacing and paragraph breaks
202
+ - Make all links clickable by using proper markdown format: [text](url)
203
+ """
204
+
205
+ return final_prompt
206
+
207
+ # ──────────────────────────────── Brave Search API ────────────────────────
208
+ @st.cache_data(ttl=3600)
209
+ def brave_search(query: str, count: int = 20):
210
+ """
211
+ Call the Brave Web Search API β†’ list[dict]
212
+ Returns fields: index, title, link, snippet, displayed_link
213
+ """
214
+ if not BRAVE_KEY:
215
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
216
+
217
+ headers = {
218
+ "Accept": "application/json",
219
+ "Accept-Encoding": "gzip",
220
+ "X-Subscription-Token": BRAVE_KEY
221
+ }
222
+ params = {"q": query, "count": str(count)}
223
+
224
+ for attempt in range(3):
225
+ try:
226
+ r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
227
+ r.raise_for_status()
228
+ data = r.json()
229
+
230
+ logging.info(f"Brave search result data structure: {list(data.keys())}")
231
+
232
+ raw = data.get("web", {}).get("results") or data.get("results", [])
233
+ if not raw:
234
+ logging.warning(f"No Brave search results found. Response: {data}")
235
+ raise ValueError("No search results found.")
236
+
237
+ arts = []
238
+ for i, res in enumerate(raw[:count], 1):
239
+ url = res.get("url", res.get("link", ""))
240
+ host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
241
+ arts.append({
242
+ "index": i,
243
+ "title": res.get("title", "No title"),
244
+ "link": url,
245
+ "snippet": res.get("description", res.get("text", "No snippet")),
246
+ "displayed_link": host
247
+ })
248
+
249
+ logging.info(f"Brave search success: {len(arts)} results")
250
+ return arts
251
+
252
+ except Exception as e:
253
+ logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
254
+ if attempt < 2:
255
+ time.sleep(2)
256
+
257
+ return []
258
+
259
+ @st.cache_data(ttl=3600)
260
+ def brave_image_search(query: str, count: int = 10):
261
+ """
262
+ Call the Brave Image Search API β†’ list[dict]
263
+ Returns fields: index, title, image_url, source_url
264
+ """
265
+ if not BRAVE_KEY:
266
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
267
+
268
+ headers = {
269
+ "Accept": "application/json",
270
+ "Accept-Encoding": "gzip",
271
+ "X-Subscription-Token": BRAVE_KEY
272
+ }
273
+ params = {
274
+ "q": query,
275
+ "count": str(count),
276
+ "search_lang": "en",
277
+ "country": "us",
278
+ "spellcheck": "1"
279
+ }
280
+
281
+ for attempt in range(3):
282
+ try:
283
+ r = requests.get(BRAVE_IMAGE_ENDPOINT, headers=headers, params=params, timeout=15)
284
+ r.raise_for_status()
285
+ data = r.json()
286
+
287
+ results = []
288
+ for i, img in enumerate(data.get("results", [])[:count], 1):
289
+ results.append({
290
+ "index": i,
291
+ "title": img.get("title", "Image"),
292
+ "image_url": img.get("image", {}).get("url", ""),
293
+ "source_url": img.get("source", ""),
294
+ "width": img.get("image", {}).get("width", 0),
295
+ "height": img.get("image", {}).get("height", 0)
296
+ })
297
+
298
+ logging.info(f"Brave image search success: {len(results)} results")
299
+ return results
300
+
301
+ except Exception as e:
302
+ logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
303
+ if attempt < 2:
304
+ time.sleep(2)
305
+
306
+ return []
307
+
308
+ @st.cache_data(ttl=3600)
309
+ def brave_video_search(query: str, count: int = 5):
310
+ """
311
+ Call the Brave Video Search API β†’ list[dict]
312
+ Returns fields: index, title, video_url, thumbnail_url, source
313
+ """
314
+ if not BRAVE_KEY:
315
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
316
+
317
+ headers = {
318
+ "Accept": "application/json",
319
+ "Accept-Encoding": "gzip",
320
+ "X-Subscription-Token": BRAVE_KEY
321
+ }
322
+ params = {
323
+ "q": query,
324
+ "count": str(count)
325
+ }
326
+
327
+ for attempt in range(3):
328
+ try:
329
+ r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
330
+ r.raise_for_status()
331
+ data = r.json()
332
+
333
+ results = []
334
+ for i, vid in enumerate(data.get("results", [])[:count], 1):
335
+ results.append({
336
+ "index": i,
337
+ "title": vid.get("title", "Video"),
338
+ "video_url": vid.get("url", ""),
339
+ "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
340
+ "source": vid.get("provider", {}).get("name", "Unknown source")
341
+ })
342
+
343
+ logging.info(f"Brave video search success: {len(results)} results")
344
+ return results
345
+
346
+ except Exception as e:
347
+ logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
348
+ if attempt < 2:
349
+ time.sleep(2)
350
+
351
+ return []
352
+
353
+ @st.cache_data(ttl=3600)
354
+ def brave_news_search(query: str, count: int = 5):
355
+ """
356
+ Call the Brave News Search API β†’ list[dict]
357
+ Returns fields: index, title, url, description, source, date
358
+ """
359
+ if not BRAVE_KEY:
360
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
361
+
362
+ headers = {
363
+ "Accept": "application/json",
364
+ "Accept-Encoding": "gzip",
365
+ "X-Subscription-Token": BRAVE_KEY
366
+ }
367
+ params = {
368
+ "q": query,
369
+ "count": str(count)
370
+ }
371
+
372
+ for attempt in range(3):
373
+ try:
374
+ r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
375
+ r.raise_for_status()
376
+ data = r.json()
377
+
378
+ results = []
379
+ for i, news in enumerate(data.get("results", [])[:count], 1):
380
+ results.append({
381
+ "index": i,
382
+ "title": news.get("title", "News article"),
383
+ "url": news.get("url", ""),
384
+ "description": news.get("description", ""),
385
+ "source": news.get("source", "Unknown source"),
386
+ "date": news.get("age", "Unknown date")
387
+ })
388
+
389
+ logging.info(f"Brave news search success: {len(results)} results")
390
+ return results
391
+
392
+ except Exception as e:
393
+ logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
394
+ if attempt < 2:
395
+ time.sleep(2)
396
+
397
+ return []
398
+
399
+ def mock_results(query: str) -> str:
400
+ """Fallback search results if API fails"""
401
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
402
+ return (f"# Fallback Search Content (Generated: {ts})\n\n"
403
+ f"The search API request failed. Please generate a response based on any pre-existing knowledge about '{query}'.\n\n"
404
+ f"You may consider the following points:\n\n"
405
+ f"- Basic concepts and importance of {query}\n"
406
+ f"- Commonly known related statistics or trends\n"
407
+ f"- Typical expert opinions on this subject\n"
408
+ f"- Questions that readers might have\n\n"
409
+ f"Note: This is fallback guidance, not real-time data.\n\n")
410
+
411
+ def do_web_search(query: str) -> str:
412
+ """Perform web search and format the results."""
413
+ try:
414
+ # Web search
415
+ arts = brave_search(query, 20)
416
+ if not arts:
417
+ logging.warning("No search results, using fallback content")
418
+ return mock_results(query)
419
+
420
+ # Image search
421
+ images = brave_image_search(query, 5)
422
+
423
+ # Video search
424
+ videos = brave_video_search(query, 2)
425
+
426
+ # News search
427
+ news = brave_news_search(query, 3)
428
+
429
+ # Format all results
430
+ result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources. Include relevant images, videos, and links.\n\n"
431
+
432
+ # Add web results
433
+ result += "## Web Results\n\n"
434
+ for a in arts[:10]: # Limit to top 10 results
435
+ result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
436
+ result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
437
+
438
+ # Add image results if available
439
+ if images:
440
+ result += "## Image Results\n\n"
441
+ for img in images:
442
+ if img.get('image_url'):
443
+ result += f"![{img['title']}]({img['image_url']})\n\n"
444
+ result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
445
+
446
+ # Add video results if available
447
+ if videos:
448
+ result += "## Video Results\n\n"
449
+ for vid in videos:
450
+ result += f"### {vid['title']}\n\n"
451
+ if vid.get('thumbnail_url'):
452
+ result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
453
+ result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
454
+
455
+ # Add news results if available
456
+ if news:
457
+ result += "## News Results\n\n"
458
+ for n in news:
459
+ result += f"### {n['title']}\n\n{n['description']}\n\n"
460
+ result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
461
+
462
+ return result
463
+
464
+ except Exception as e:
465
+ logging.error(f"Web search process failed: {str(e)}")
466
+ return mock_results(query)
467
+
468
+ # ──────────────────────────────── File Upload Handling ─────────────────────
469
+ def process_text_file(file):
470
+ """Handle text file"""
471
+ try:
472
+ content = file.read()
473
+ file.seek(0)
474
+
475
+ text = content.decode('utf-8', errors='ignore')
476
+ if len(text) > 10000:
477
+ text = text[:9700] + "...(truncated)..."
478
+
479
+ result = f"## Text File: {file.name}\n\n"
480
+ result += text
481
+ return result
482
+ except Exception as e:
483
+ logging.error(f"Error processing text file: {str(e)}")
484
+ return f"Error processing text file: {str(e)}"
485
+
486
+ def process_csv_file(file):
487
+ """Handle CSV file"""
488
+ try:
489
+ content = file.read()
490
+ file.seek(0)
491
+
492
+ df = pd.read_csv(io.BytesIO(content))
493
+ result = f"## CSV File: {file.name}\n\n"
494
+ result += f"- Rows: {len(df)}\n"
495
+ result += f"- Columns: {len(df.columns)}\n"
496
+ result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
497
+
498
+ result += "### Data Preview\n\n"
499
+ preview_df = df.head(10)
500
+ try:
501
+ markdown_table = preview_df.to_markdown(index=False)
502
+ if markdown_table:
503
+ result += markdown_table + "\n\n"
504
+ else:
505
+ result += "Unable to display CSV data.\n\n"
506
+ except Exception as e:
507
+ logging.error(f"Markdown table conversion error: {e}")
508
+ result += "Displaying data as text:\n\n"
509
+ result += str(preview_df) + "\n\n"
510
+
511
+ num_cols = df.select_dtypes(include=['number']).columns
512
+ if len(num_cols) > 0:
513
+ result += "### Basic Statistical Information\n\n"
514
+ try:
515
+ stats_df = df[num_cols].describe().round(2)
516
+ stats_markdown = stats_df.to_markdown()
517
+ if stats_markdown:
518
+ result += stats_markdown + "\n\n"
519
+ else:
520
+ result += "Unable to display statistical information.\n\n"
521
+ except Exception as e:
522
+ logging.error(f"Statistical info conversion error: {e}")
523
+ result += "Unable to generate statistical information.\n\n"
524
+
525
+ return result
526
+ except Exception as e:
527
+ logging.error(f"CSV file processing error: {str(e)}")
528
+ return f"Error processing CSV file: {str(e)}"
529
+
530
+ def process_pdf_file(file):
531
+ """Handle PDF file"""
532
+ try:
533
+ # Read file in bytes
534
+ file_bytes = file.read()
535
+ file.seek(0)
536
+
537
+ # Use PyPDF2
538
+ pdf_file = io.BytesIO(file_bytes)
539
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
540
+
541
+ # Basic info
542
+ result = f"## PDF File: {file.name}\n\n"
543
+ result += f"- Total pages: {len(reader.pages)}\n\n"
544
+
545
+ # Extract text by page (limit to first 5 pages)
546
+ max_pages = min(5, len(reader.pages))
547
+ all_text = ""
548
+
549
+ for i in range(max_pages):
550
+ try:
551
+ page = reader.pages[i]
552
+ page_text = page.extract_text()
553
+
554
+ current_page_text = f"### Page {i+1}\n\n"
555
+ if page_text and len(page_text.strip()) > 0:
556
+ # Limit to 1500 characters per page
557
+ if len(page_text) > 1500:
558
+ current_page_text += page_text[:1500] + "...(truncated)...\n\n"
559
+ else:
560
+ current_page_text += page_text + "\n\n"
561
+ else:
562
+ current_page_text += "(No text could be extracted from this page)\n\n"
563
+
564
+ all_text += current_page_text
565
+
566
+ # If total text is too long, break
567
+ if len(all_text) > 8000:
568
+ all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
569
+ break
570
+
571
+ except Exception as page_err:
572
+ logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
573
+ all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
574
+
575
+ if len(reader.pages) > max_pages:
576
+ all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
577
+
578
+ result += "### PDF Content\n\n" + all_text
579
+ return result
580
+
581
+ except Exception as e:
582
+ logging.error(f"PDF file processing error: {str(e)}")
583
+ return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
584
+
585
+ def process_uploaded_files(files):
586
+ """Combine the contents of all uploaded files into one string."""
587
+ if not files:
588
+ return None
589
+
590
+ result = "# Uploaded File Contents\n\n"
591
+ result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for your response.\n\n"
592
+
593
+ for file in files:
594
+ try:
595
+ ext = file.name.split('.')[-1].lower()
596
+ if ext == 'txt':
597
+ result += process_text_file(file) + "\n\n---\n\n"
598
+ elif ext == 'csv':
599
+ result += process_csv_file(file) + "\n\n---\n\n"
600
+ elif ext == 'pdf':
601
+ result += process_pdf_file(file) + "\n\n---\n\n"
602
+ else:
603
+ result += f"### Unsupported File: {file.name}\n\n---\n\n"
604
+ except Exception as e:
605
+ logging.error(f"File processing error {file.name}: {e}")
606
+ result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
607
+
608
+ return result
609
+
610
+ # ──────────────────────────────── Image & Utility ─────────────────────────
611
+ def create_placeholder_image(text, width=600, height=400):
612
+ """Create a placeholder image with text."""
613
+ try:
614
+ # 이미지 생성
615
+ from PIL import Image, ImageDraw, ImageFont
616
+ import numpy as np
617
+
618
+ # 랜덀 컬러 생성
619
+ r = random.randint(100, 240)
620
+ g = random.randint(100, 240)
621
+ b = random.randint(100, 240)
622
+
623
+ # 이미지 생성 및 배경색 μ„€μ •
624
+ img = Image.new('RGB', (width, height), color=(r, g, b))
625
+ draw = ImageDraw.Draw(img)
626
+
627
+ # ν…μŠ€νŠΈ μΆ”κ°€ (ν°νŠΈκ°€ μ—†μœΌλ©΄ κΈ°λ³Έ 폰트 μ‚¬μš©)
628
+ try:
629
+ font = ImageFont.truetype("arial.ttf", 20)
630
+ except:
631
+ font = ImageFont.load_default()
632
+
633
+ # ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μ€„λ°”κΏˆ
634
+ words = text.split()
635
+ lines = []
636
+ current_line = []
637
+
638
+ for word in words:
639
+ current_line.append(word)
640
+ if len(' '.join(current_line)) > 30: # μ λ‹Ήν•œ κΈΈμ΄μ—μ„œ μ€„λ°”κΏˆ
641
+ lines.append(' '.join(current_line[:-1]))
642
+ current_line = [word]
643
+
644
+ if current_line:
645
+ lines.append(' '.join(current_line))
646
+
647
+ text_to_draw = '\n'.join(lines)
648
+
649
+ # ν…μŠ€νŠΈ μœ„μΉ˜ 계산 (쀑앙)
650
+ textsize = draw.textsize(text_to_draw, font=font)
651
+ text_x = (width - textsize[0]) / 2
652
+ text_y = (height - textsize[1]) / 2
653
+
654
+ # ν…μŠ€νŠΈ 그리기
655
+ draw.text((text_x, text_y), text_to_draw, fill=(255, 255, 255), font=font)
656
+
657
+ # 이미지λ₯Ό base64둜 인코딩
658
+ buffered = BytesIO()
659
+ img.save(buffered, format="JPEG")
660
+ img_str = base64.b64encode(buffered.getvalue()).decode()
661
+
662
+ return img_str
663
+ except Exception as e:
664
+ logging.error(f"Error creating placeholder image: {e}")
665
+ return None
666
+
667
+ def get_random_fallback_image():
668
+ """Get a random fallback image from the list."""
669
+ return random.choice(FALLBACK_IMAGES)
670
+
671
+ def extract_image_urls_from_search(image_results, query):
672
+ """Extract valid image URLs from Brave image search results, with fallbacks."""
673
+ # μ•ˆμ •μ μΈ λŒ€μ²΄ μ΄λ―Έμ§€λ‘œ μ‹œμž‘ (μ΅œμ†Œ 3개 보μž₯)
674
+ valid_urls = [
675
+ {
676
+ 'url': get_random_fallback_image(),
677
+ 'title': f"Related to: {query} ({i+1})",
678
+ 'source': "https://www.pexels.com/"
679
+ } for i in range(3)
680
+ ]
681
+
682
+ # API κ²°κ³Όμ—μ„œ κ²€μ¦λœ 이미지 μΆ”κ°€
683
+ if image_results:
684
+ for img in image_results:
685
+ url = img.get('image_url')
686
+ if url and url.startswith('http'):
687
+ # 이미 μΆ”κ°€λœ URL κ°œμˆ˜κ°€ 5개 미만인 κ²½μš°μ—λ§Œ μΆ”κ°€
688
+ if len(valid_urls) < 5:
689
+ valid_urls.append({
690
+ 'url': url,
691
+ 'title': img.get('title', f"Related to: {query}"),
692
+ 'source': img.get('source_url', '')
693
+ })
694
+
695
+ return valid_urls
696
+
697
+ def extract_video_data_from_search(video_results):
698
+ """Extract valid video data from Brave video search results."""
699
+ if not video_results:
700
+ return []
701
+
702
+ valid_videos = []
703
+ for vid in video_results:
704
+ url = vid.get('video_url')
705
+ if url and url.startswith('http'):
706
+ valid_videos.append({
707
+ 'url': url,
708
+ 'title': vid.get('title', 'Video'),
709
+ 'thumbnail': vid.get('thumbnail_url', ''),
710
+ 'source': vid.get('source', 'Video source')
711
+ })
712
+
713
+ return valid_videos
714
+
715
+ def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
716
+ """Image generation function."""
717
+ if not prompt:
718
+ return None, "Insufficient prompt"
719
+ try:
720
+ res = Client(IMAGE_API_URL).predict(
721
+ prompt=prompt, width=w, height=h, guidance=g,
722
+ inference_steps=steps, seed=seed,
723
+ do_img2img=False, init_image=None,
724
+ image2image_strength=0.8, resize_img=True,
725
+ api_name="/generate_image"
726
+ )
727
+ return res[0], f"Seed: {res[1]}"
728
+ except Exception as e:
729
+ logging.error(e)
730
+ return None, str(e)
731
+
732
+ def extract_image_prompt(response_text: str, topic: str):
733
+ """
734
+ Generate a single-line English image prompt from the response content.
735
+ """
736
+ client = get_openai_client()
737
+
738
+ try:
739
+ response = client.chat.completions.create(
740
+ model="gpt-4.1-mini",
741
+ messages=[
742
+ {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
743
+ {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
744
+ ],
745
+ temperature=1,
746
+ max_tokens=80,
747
+ top_p=1
748
+ )
749
+
750
+ return response.choices[0].message.content.strip()
751
+ except Exception as e:
752
+ logging.error(f"OpenAI image prompt generation error: {e}")
753
+ return f"A professional photo related to {topic}, high quality"
754
+
755
+ def md_to_html(md: str, title="Perplexity-like Response"):
756
+ """Convert Markdown to HTML."""
757
+ return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
758
+
759
+ def keywords(text: str, top=5):
760
+ """Simple keyword extraction."""
761
+ cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
762
+ return " ".join(cleaned.split()[:top])
763
+
764
+ # ──────────────────────────────── Streamlit UI ────────────────────────────
765
+ def perplexity_app():
766
+ st.title("Perplexity-like AI Assistant")
767
+
768
+ # Set default session state
769
+ if "ai_model" not in st.session_state:
770
+ st.session_state.ai_model = "gpt-4.1-mini" # κ³ μ • λͺ¨λΈ μ„€μ •
771
+ if "messages" not in st.session_state:
772
+ st.session_state.messages = []
773
+ if "auto_save" not in st.session_state:
774
+ st.session_state.auto_save = True
775
+ if "generate_image" not in st.session_state:
776
+ st.session_state.generate_image = False
777
+ if "web_search_enabled" not in st.session_state:
778
+ st.session_state.web_search_enabled = True
779
+ if "search_mode" not in st.session_state:
780
+ st.session_state.search_mode = "comprehensive"
781
+ if "response_style" not in st.session_state:
782
+ st.session_state.response_style = "professional"
783
+
784
+ # Sidebar UI
785
+ sb = st.sidebar
786
+ sb.title("Search Settings")
787
+
788
+ sb.subheader("Response Configuration")
789
+ sb.selectbox(
790
+ "Search Mode",
791
+ options=list(SEARCH_MODES.keys()),
792
+ format_func=lambda x: SEARCH_MODES[x],
793
+ key="search_mode"
794
+ )
795
+
796
+ sb.selectbox(
797
+ "Response Style",
798
+ options=list(RESPONSE_STYLES.keys()),
799
+ format_func=lambda x: RESPONSE_STYLES[x],
800
+ key="response_style"
801
+ )
802
+
803
+ # Example queries
804
+ sb.subheader("Example Queries")
805
+ c1, c2, c3 = sb.columns(3)
806
+ if c1.button("Quantum Computing", key="ex1"):
807
+ process_example(EXAMPLE_QUERIES["example1"])
808
+ if c2.button("Climate Change", key="ex2"):
809
+ process_example(EXAMPLE_QUERIES["example2"])
810
+ if c3.button("AI Economics", key="ex3"):
811
+ process_example(EXAMPLE_QUERIES["example3"])
812
+
813
+ sb.subheader("Other Settings")
814
+ sb.toggle("Auto Save", key="auto_save")
815
+ sb.toggle("Auto Image Generation", key="generate_image")
816
+
817
+ web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
818
+ st.session_state.web_search_enabled = web_search_enabled
819
+
820
+ if web_search_enabled:
821
+ st.sidebar.info("βœ… Web search results will be integrated into the response.")
822
+
823
+ # Download the latest response
824
+ latest_response = next(
825
+ (m["content"] for m in reversed(st.session_state.messages)
826
+ if m["role"] == "assistant" and m["content"].strip()),
827
+ None
828
+ )
829
+ if latest_response:
830
+ # Extract a title from the response - first heading or first line
831
+ title_match = re.search(r"# (.*?)(\n|$)", latest_response)
832
+ if title_match:
833
+ title = title_match.group(1).strip()
834
+ else:
835
+ first_line = latest_response.split('\n', 1)[0].strip()
836
+ title = first_line[:40] + "..." if len(first_line) > 40 else first_line
837
+
838
+ sb.subheader("Download Latest Response")
839
+ d1, d2 = sb.columns(2)
840
+ d1.download_button("Download as Markdown", latest_response,
841
+ file_name=f"{title}.md", mime="text/markdown")
842
+ d2.download_button("Download as HTML", md_to_html(latest_response, title),
843
+ file_name=f"{title}.html", mime="text/html")
844
+
845
+ # JSON conversation record upload
846
+ up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
847
+ if up:
848
+ try:
849
+ st.session_state.messages = json.load(up)
850
+ sb.success("Conversation history loaded successfully")
851
+ except Exception as e:
852
+ sb.error(f"Failed to load: {e}")
853
+
854
+ # JSON conversation record download
855
+ if sb.button("Download Conversation as JSON"):
856
+ sb.download_button(
857
+ "Save",
858
+ data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
859
+ file_name="conversation_history.json",
860
+ mime="application/json"
861
+ )
862
+
863
+ # File Upload
864
+ st.subheader("Upload Files")
865
+ uploaded_files = st.file_uploader(
866
+ "Upload files to be used as reference (txt, csv, pdf)",
867
+ type=["txt", "csv", "pdf"],
868
+ accept_multiple_files=True,
869
+ key="file_uploader"
870
+ )
871
+
872
+ if uploaded_files:
873
+ file_count = len(uploaded_files)
874
+ st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
875
+
876
+ with st.expander("Preview Uploaded Files", expanded=False):
877
+ for idx, file in enumerate(uploaded_files):
878
+ st.write(f"**File Name:** {file.name}")
879
+ ext = file.name.split('.')[-1].lower()
880
+
881
+ if ext == 'txt':
882
+ preview = file.read(1000).decode('utf-8', errors='ignore')
883
+ file.seek(0)
884
+ st.text_area(
885
+ f"Preview of {file.name}",
886
+ preview + ("..." if len(preview) >= 1000 else ""),
887
+ height=150
888
+ )
889
+ elif ext == 'csv':
890
+ try:
891
+ df = pd.read_csv(file)
892
+ file.seek(0)
893
+ st.write("CSV Preview (up to 5 rows)")
894
+ st.dataframe(df.head(5))
895
+ except Exception as e:
896
+ st.error(f"CSV preview failed: {e}")
897
+ elif ext == 'pdf':
898
+ try:
899
+ file_bytes = file.read()
900
+ file.seek(0)
901
+
902
+ pdf_file = io.BytesIO(file_bytes)
903
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
904
+
905
+ pc = len(reader.pages)
906
+ st.write(f"PDF File: {pc} pages")
907
+
908
+ if pc > 0:
909
+ try:
910
+ page_text = reader.pages[0].extract_text()
911
+ preview = page_text[:500] if page_text else "(No text extracted)"
912
+ st.text_area("Preview of the first page", preview + "...", height=150)
913
+ except:
914
+ st.warning("Failed to extract text from the first page")
915
+ except Exception as e:
916
+ st.error(f"PDF preview failed: {e}")
917
+
918
+ if idx < file_count - 1:
919
+ st.divider()
920
+
921
+ # Display existing messages
922
+ for m in st.session_state.messages:
923
+ with st.chat_message(m["role"]):
924
+ # Process markdown to allow clickable links and properly rendered content
925
+ st.markdown(m["content"], unsafe_allow_html=True)
926
+
927
+ # Display images if present
928
+ if "images" in m and m["images"]:
929
+ st.subheader("Related Images")
930
+ cols = st.columns(min(3, len(m["images"])))
931
+ for i, img_data in enumerate(m["images"]):
932
+ col_idx = i % len(cols)
933
+ with cols[col_idx]:
934
+ try:
935
+ img_url = img_data.get('url', '')
936
+ caption = img_data.get('title', 'Related image')
937
+ if img_url:
938
+ st.image(img_url, caption=caption, use_column_width=True)
939
+ if img_data.get('source'):
940
+ st.markdown(f"[Source]({img_data['source']})")
941
+ except Exception as img_err:
942
+ st.warning(f"Could not display image: {img_err}")
943
+
944
+ # Display videos if present
945
+ if "videos" in m and m["videos"]:
946
+ st.subheader("Related Videos")
947
+ for video in m["videos"]:
948
+ video_title = video.get('title', 'Related video')
949
+ video_url = video.get('url', '')
950
+ thumbnail = video.get('thumbnail', '')
951
+
952
+ # Display video information with thumbnail if available
953
+ if thumbnail:
954
+ col1, col2 = st.columns([1, 3])
955
+ with col1:
956
+ try:
957
+ st.image(thumbnail, width=120)
958
+ except:
959
+ st.write("🎬")
960
+ with col2:
961
+ st.markdown(f"**[{video_title}]({video_url})**")
962
+ st.write(f"Source: {video.get('source', 'Unknown')}")
963
+ else:
964
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
965
+ st.write(f"Source: {video.get('source', 'Unknown')}")
966
+
967
+ # User input
968
+ query = st.chat_input("Enter your query or question here.")
969
+ if query:
970
+ process_input(query, uploaded_files)
971
+
972
+ # μ‚¬μ΄λ“œλ°” ν•˜λ‹¨ λ°°μ§€(링크) μΆ”κ°€
973
+ sb.markdown("---")
974
+ sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
975
+
976
+ def process_example(topic):
977
+ """Process the selected example query."""
978
+ process_input(topic, [])
979
+
980
+ def process_input(query: str, uploaded_files):
981
+ # Add user's message
982
+ if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
983
+ st.session_state.messages.append({"role": "user", "content": query})
984
+
985
+ with st.chat_message("user"):
986
+ st.markdown(query)
987
+
988
+ with st.chat_message("assistant"):
989
+ placeholder = st.empty()
990
+ message_placeholder = st.empty()
991
+ full_response = ""
992
+
993
+ use_web_search = st.session_state.web_search_enabled
994
+ has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
995
+
996
+ try:
997
+ # μƒνƒœ ν‘œμ‹œλ₯Ό μœ„ν•œ μƒνƒœ μ»΄ν¬λ„ŒνŠΈ
998
+ status = st.status("Preparing to answer your query...")
999
+ status.update(label="Initializing client...")
1000
+
1001
+ client = get_openai_client()
1002
+
1003
+ # Web search
1004
+ search_content = None
1005
+ image_results = []
1006
+ video_results = []
1007
+ news_results = []
1008
+
1009
+ if use_web_search:
1010
+ status.update(label="Performing web search...")
1011
+ with st.spinner("Searching the web..."):
1012
+ search_content = do_web_search(keywords(query, top=5))
1013
+
1014
+ # Perform specific searches for media
1015
+ try:
1016
+ status.update(label="Finding images and videos...")
1017
+ image_results = brave_image_search(query, 5)
1018
+ video_results = brave_video_search(query, 2)
1019
+ news_results = brave_news_search(query, 3)
1020
+ except Exception as search_err:
1021
+ logging.error(f"Media search error: {search_err}")
1022
+
1023
+ # Process uploaded files β†’ content
1024
+ file_content = None
1025
+ if has_uploaded_files:
1026
+ status.update(label="Processing uploaded files...")
1027
+ with st.spinner("Analyzing files..."):
1028
+ file_content = process_uploaded_files(uploaded_files)
1029
+
1030
+ # Extract usable image and video data with fallbacks
1031
+ valid_images = extract_image_urls_from_search(image_results, query)
1032
+ valid_videos = extract_video_data_from_search(video_results)
1033
+
1034
+ # Build system prompt
1035
+ status.update(label="Preparing comprehensive answer...")
1036
+ sys_prompt = get_system_prompt(
1037
+ mode=st.session_state.search_mode,
1038
+ style=st.session_state.response_style,
1039
+ include_search_results=use_web_search,
1040
+ include_uploaded_files=has_uploaded_files
1041
+ )
1042
+
1043
+ # OpenAI API 호좜 μ€€λΉ„
1044
+ status.update(label="Generating response...")
1045
+
1046
+ # λ©”μ‹œμ§€ ꡬ성
1047
+ api_messages = [
1048
+ {"role": "system", "content": sys_prompt}
1049
+ ]
1050
+
1051
+ user_content = query
1052
+
1053
+ # 검색 κ²°κ³Όκ°€ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
1054
+ if search_content:
1055
+ user_content += "\n\n" + search_content
1056
+
1057
+ # 파일 λ‚΄μš©μ΄ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
1058
+ if file_content:
1059
+ user_content += "\n\n" + file_content
1060
+
1061
+ # Include specific image information
1062
+ if valid_images:
1063
+ user_content += "\n\n# Available Images\n"
1064
+ for i, img in enumerate(valid_images[:5]):
1065
+ user_content += f"\n{i+1}. ![{img['title']}]({img['url']})\n"
1066
+ if img['source']:
1067
+ user_content += f" Source: {img['source']}\n"
1068
+
1069
+ # Include specific video information
1070
+ if valid_videos:
1071
+ user_content += "\n\n# Available Videos\n"
1072
+ for i, vid in enumerate(valid_videos[:2]):
1073
+ user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
1074
+
1075
+ # μ‚¬μš©μž λ©”μ‹œμ§€ μΆ”κ°€
1076
+ api_messages.append({"role": "user", "content": user_content})
1077
+
1078
+ # OpenAI API 슀트리밍 호좜 - κ³ μ • λͺ¨λΈ "gpt-4.1-mini" μ‚¬μš©
1079
+ try:
1080
+ # 슀트리밍 λ°©μ‹μœΌλ‘œ API 호좜
1081
+ stream = client.chat.completions.create(
1082
+ model="gpt-4.1-mini", # κ³ μ • λͺ¨λΈ μ‚¬μš©
1083
+ messages=api_messages,
1084
+ temperature=1,
1085
+ max_tokens=MAX_TOKENS,
1086
+ top_p=1,
1087
+ stream=True # 슀트리밍 ν™œοΏ½οΏ½ν™”
1088
+ )
1089
+
1090
+ # 슀트리밍 응닡 처리
1091
+ for chunk in stream:
1092
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
1093
+ content_delta = chunk.choices[0].delta.content
1094
+ full_response += content_delta
1095
+ message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
1096
+
1097
+ # μ΅œμ’… 응닡 ν‘œμ‹œ (μ»€μ„œ 제거)
1098
+ message_placeholder.markdown(full_response, unsafe_allow_html=True)
1099
+
1100
+ # Display related images if available
1101
+ if valid_images:
1102
+ st.subheader("Related Images")
1103
+ image_cols = st.columns(min(3, len(valid_images)))
1104
+
1105
+ for i, img_data in enumerate(valid_images):
1106
+ col_idx = i % len(image_cols)
1107
+ try:
1108
+ with image_cols[col_idx]:
1109
+ # 이미지 URL 체크
1110
+ img_url = img_data['url']
1111
+ caption = img_data['title']
1112
+
1113
+ try:
1114
+ # 이미지 ν‘œμ‹œ μ‹œλ„
1115
+ st.image(img_url, caption=caption, use_column_width=True)
1116
+ if img_data.get('source'):
1117
+ st.markdown(f"[Source]({img_data['source']})")
1118
+ except Exception as img_err:
1119
+ # μ‹€νŒ¨ μ‹œ λŒ€μ²΄ 이미지 (Pexels μ•ˆμ •μ μΈ 이미지)
1120
+ st.image(get_random_fallback_image(),
1121
+ caption=f"{caption} (Fallback image)",
1122
+ use_column_width=True)
1123
+ st.markdown("[Source: Pexels](https://www.pexels.com/)")
1124
+ logging.warning(f"Using fallback image: {img_err}")
1125
+ except Exception as col_err:
1126
+ logging.error(f"Error displaying image in column: {col_err}")
1127
+ continue
1128
+
1129
+ # Display related videos if available
1130
+ if valid_videos:
1131
+ st.subheader("Related Videos")
1132
+ for video in valid_videos:
1133
+ video_title = video.get('title', 'Related video')
1134
+ video_url = video.get('url', '')
1135
+ thumbnail = video.get('thumbnail', '')
1136
+
1137
+ # Display video information with thumbnail if available
1138
+ if thumbnail:
1139
+ try:
1140
+ col1, col2 = st.columns([1, 3])
1141
+ with col1:
1142
+ try:
1143
+ st.image(thumbnail, width=120)
1144
+ except:
1145
+ st.write("🎬")
1146
+ with col2:
1147
+ st.markdown(f"**[{video_title}]({video_url})**")
1148
+ st.write(f"Source: {video.get('source', 'Unknown')}")
1149
+ except Exception as vid_err:
1150
+ # 였λ₯˜μ‹œ κΈ°λ³Έ ν˜•μ‹μœΌλ‘œ ν‘œμ‹œ
1151
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1152
+ st.write(f"Source: {video.get('source', 'Unknown')}")
1153
+ else:
1154
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1155
+ st.write(f"Source: {video.get('source', 'Unknown')}")
1156
+
1157
+ status.update(label="Response completed!", state="complete")
1158
+
1159
+ # Save the response with images and videos in the session state
1160
+ st.session_state.messages.append({
1161
+ "role": "assistant",
1162
+ "content": full_response,
1163
+ "images": valid_images,
1164
+ "videos": valid_videos
1165
+ })
1166
+
1167
+ except Exception as api_error:
1168
+ error_message = str(api_error)
1169
+ logging.error(f"API error: {error_message}")
1170
+ status.update(label=f"Error: {error_message}", state="error")
1171
+ raise Exception(f"Response generation error: {error_message}")
1172
+
1173
+ # Additional image generation if enabled
1174
+ if st.session_state.generate_image and full_response:
1175
+ with st.spinner("Generating custom image..."):
1176
+ try:
1177
+ ip = extract_image_prompt(full_response, query)
1178
+ img, cap = generate_image(ip)
1179
+ if img:
1180
+ st.subheader("AI-Generated Image")
1181
+ st.image(img, caption=cap)
1182
+ except Exception as img_error:
1183
+ logging.error(f"Image generation error: {str(img_error)}")
1184
+ st.warning("Custom image generation failed. Using web images only.")
1185
+
1186
+ # Download buttons
1187
+ if full_response:
1188
+ st.subheader("Download This Response")
1189
+ c1, c2 = st.columns(2)
1190
+ c1.download_button(
1191
+ "Markdown",
1192
+ data=full_response,
1193
+ file_name=f"{query[:30]}.md",
1194
+ mime="text/markdown"
1195
+ )
1196
+ c2.download_button(
1197
+ "HTML",
1198
+ data=md_to_html(full_response, query[:30]),
1199
+ file_name=f"{query[:30]}.html",
1200
+ mime="text/html"
1201
+ )
1202
+
1203
+ # Auto save
1204
+ if st.session_state.auto_save and st.session_state.messages:
1205
+ try:
1206
+ fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
1207
+ with open(fn, "w", encoding="utf-8") as fp:
1208
+ json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1209
+ except Exception as e:
1210
+ logging.error(f"Auto-save failed: {e}")
1211
+
1212
+ except Exception as e:
1213
+ error_message = str(e)
1214
+ placeholder.error(f"An error occurred: {error_message}")
1215
+ logging.error(f"Process input error: {error_message}")
1216
+ ans = f"An error occurred while processing your request: {error_message}"
1217
+ st.session_state.messages.append({"role": "assistant", "content": ans})
1218
+
1219
+
1220
+ # ──────────────────────────────── main ────────────────────────────────────
1221
+ def main():
1222
+ perplexity_app()
1223
+
1224
+ if __name__ == "__main__":
1225
+ main()