openfree commited on
Commit
727526c
ยท
verified ยท
1 Parent(s): a342866

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -49
app.py CHANGED
@@ -14,6 +14,9 @@ import numpy as np
14
  import soundfile as sf
15
  import subprocess
16
  import shutil
 
 
 
17
  from dataclasses import dataclass
18
  from typing import List, Tuple, Dict, Optional
19
  from pathlib import Path
@@ -72,6 +75,9 @@ except:
72
 
73
  load_dotenv()
74
 
 
 
 
75
 
76
  @dataclass
77
  class ConversationConfig:
@@ -87,6 +93,70 @@ class ConversationConfig:
87
  max_new_tokens: int = 8000 # 4000์—์„œ 8000์œผ๋กœ ์ฆ๊ฐ€
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  class UnifiedAudioConverter:
91
  def __init__(self, config: ConversationConfig):
92
  self.config = config
@@ -238,10 +308,10 @@ class UnifiedAudioConverter:
238
  else:
239
  return MessagesFormatterType.LLAMA_3
240
 
241
- def _build_prompt(self, text: str, language: str = "English") -> str:
242
- """Build prompt for conversation generation"""
243
  if language == "Korean":
244
- # ๊ฐ•ํ™”๋œ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ
245
  template = """
246
  {
247
  "conversation": [
@@ -252,22 +322,41 @@ class UnifiedAudioConverter:
252
  ]
253
  }
254
  """
255
- return (
256
- f"{text}\n\n"
 
 
257
  f"์œ„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ 30๋Œ€ ํ•œ๊ตญ์ธ ๋‘ ๋ช…์ด ์ง„ํ–‰ํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”.\n\n"
258
- f"ํ•„์ˆ˜ ์ง€์นจ:\n"
259
- f"1. ์ค€์ˆ˜(์ง„ํ–‰์ž): ์นœ๊ทผํ•˜๊ณ  ํ˜ธ๊ธฐ์‹ฌ ๋งŽ์€ ์„ฑ๊ฒฉ, ์ฒญ์ทจ์ž์˜ ๊ถ๊ธˆ์ฆ์„ ๋Œ€๋ณ€\n"
260
- f"2. ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€): ํ•ด๋‹น ์ฃผ์ œ์— ๋Œ€ํ•œ ๊นŠ์€ ์ง€์‹์„ ๊ฐ€์ง„ ์ „๋ฌธ๊ฐ€, ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ๋Šฅ๋ ฅ\n"
261
- f"3. ํ•œ๊ตญ์ธ์ด ์‹ค์ œ๋กœ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ ๊ฐํƒ„์‚ฌ ์‚ฌ์šฉ ('์•„~', '๊ทธ๋ ‡๊ตฌ๋‚˜', '์™€~', '์ง„์งœ์š”?')\n"
262
- f"4. ์ ์ ˆํ•œ ์กด๋Œ“๋ง๊ณผ ํŽธ์•ˆํ•œ ๋ฐ˜๋ง์„ ์„ž์–ด ์นœ๋ฐ€๊ฐ ์กฐ์„ฑ\n"
263
- f"5. ํ•œ๊ตญ ๋ฌธํ™”์™€ ์ผ์ƒ์— ๋งž๋Š” ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ์™€ ๋น„์œ  ์‚ฌ์šฉ\n"
264
- f"6. ๊ฐ ๋Œ€ํ™”๋Š” ์ถฉ๋ถ„ํžˆ ๊ธธ๊ณ  ์ƒ์„ธํ•˜๊ฒŒ (์ตœ์†Œ 3-4๋ฌธ์žฅ ์ด์ƒ)\n"
265
- f"7. ์ „์ฒด ๋Œ€ํ™”๋Š” ์ตœ์†Œ 10ํšŒ ์ด์ƒ์˜ ์ฃผ๊ณ ๋ฐ›๊ธฐ๋กœ ๊ตฌ์„ฑ\n"
266
- f"8. ์ฒญ์ทจ์ž๊ฐ€ '๋‚˜๋„ ๊ถ๊ธˆํ–ˆ๋˜ ๋‚ด์šฉ์ด์•ผ'๋ผ๊ณ  ๊ณต๊ฐํ•  ์ˆ˜ ์žˆ๋Š” ์งˆ๋ฌธ ํฌํ•จ\n"
267
- f"9. ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ „๋‹ฌํ•˜๋ฉด์„œ๋„ ์ง€๋ฃจํ•˜์ง€ ์•Š๊ฒŒ ๊ตฌ์„ฑ\n"
268
- f"10. ๋งˆ๋ฌด๋ฆฌ๋Š” ํ•ต์‹ฌ ๋‚ด์šฉ ์š”์•ฝ๊ณผ ์ฒญ์ทจ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๋Š” ์‹ค์šฉ์  ์กฐ์–ธ\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  f"๋‹ค์Œ JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”:\n{template}"
270
  )
 
 
 
271
  else:
272
  template = """
273
  {
@@ -279,8 +368,10 @@ class UnifiedAudioConverter:
279
  ]
280
  }
281
  """
282
- return (
283
- f"{text}\n\n"
 
 
284
  f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
285
  f"Guidelines:\n"
286
  f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
@@ -291,37 +382,59 @@ class UnifiedAudioConverter:
291
  f"6. Create at least 10 back-and-forth exchanges\n"
292
  f"7. Address common questions and misconceptions\n"
293
  f"8. Maintain an informative yet entertaining tone\n"
294
- f"9. End with key takeaways and practical advice\n\n"
 
295
  f"Return ONLY the JSON in this format:\n{template}"
296
  )
 
 
297
 
298
- def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
299
- """Build messages for local LLM"""
300
  if language == "Korean":
301
  system_message = (
302
- "๋‹น์‹ ์€ ํ•œ๊ตญ ์ตœ๊ณ ์˜ ํŒŸ์บ์ŠคํŠธ ๋Œ€๋ณธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
303
- "ํ•œ๊ตญ๏ฟฝ๏ฟฝ์˜ ์ •์„œ์™€ ๋ฌธํ™”๋ฅผ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , ์ฒญ์ทจ์ž๋“ค์ด ๋๊นŒ์ง€ ์ง‘์ค‘ํ•  ์ˆ˜ ์žˆ๋Š” "
304
- "๋งค๋ ฅ์ ์ด๊ณ  ์œ ์ตํ•œ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด๋ƒ…๋‹ˆ๋‹ค. "
305
- "์‹ค์ œ ํ•œ๊ตญ์ธ๋“ค์ด ์ผ์ƒ์—์„œ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ "
306
- "์ ์ ˆํ•œ ๊ฐ์ • ํ‘œํ˜„์„ ํ†ตํ•ด ์ƒ๋™๊ฐ ์žˆ๋Š” ๋Œ€ํ™”๋ฅผ ๊ตฌ์„ฑํ•ฉ๋‹ˆ๋‹ค."
 
 
 
 
 
307
  )
308
  else:
309
  system_message = (
310
  "You are an expert podcast scriptwriter who creates engaging, "
311
  "natural conversations that keep listeners hooked. "
312
  "You understand how to balance information with entertainment, "
313
- "using real conversational patterns and authentic reactions."
 
314
  )
315
 
316
  return [
317
  {"role": "system", "content": system_message},
318
- {"role": "user", "content": self._build_prompt(text, language)}
319
  ]
320
 
321
  @spaces.GPU(duration=120)
322
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
323
- """Extract conversation using new local LLM (primary method)"""
324
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  # ๋จผ์ € ์ƒˆ๋กœ์šด ๋กœ์ปฌ LLM ์‹œ๋„
326
  self.initialize_local_mode()
327
 
@@ -334,9 +447,11 @@ class UnifiedAudioConverter:
334
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
335
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
336
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
 
 
337
  "์‹ค์ œ ํ•œ๊ตญ์ธ์ด ๋Œ€ํ™”ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„, ์ ์ ˆํ•œ ๊ฐํƒ„์‚ฌ, "
338
  "๋ฌธํ™”์ ์œผ๋กœ ์ ํ•ฉํ•œ ์˜ˆ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ๋ชฐ์ž…ํ•  ์ˆ˜ ์žˆ๋Š” "
339
- "๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
340
  )
341
  else:
342
  system_message = (
@@ -344,6 +459,7 @@ class UnifiedAudioConverter:
344
  "engaging, natural conversations that captivate listeners. "
345
  "You excel at transforming complex information into accessible, "
346
  "entertaining dialogue while maintaining authenticity and educational value. "
 
347
  "Respond only in JSON format."
348
  )
349
 
@@ -364,7 +480,7 @@ class UnifiedAudioConverter:
364
 
365
  messages = BasicChatHistory()
366
 
367
- prompt = self._build_prompt(text, language)
368
  response = agent.get_chat_response(
369
  prompt,
370
  llm_sampling_settings=settings,
@@ -384,10 +500,10 @@ class UnifiedAudioConverter:
384
 
385
  except Exception as e:
386
  print(f"Local LLM failed: {e}, falling back to legacy local method")
387
- return self.extract_conversation_legacy_local(text, language, progress)
388
 
389
  @spaces.GPU(duration=120)
390
- def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None) -> Dict:
391
  """Extract conversation using legacy local model (fallback)"""
392
  try:
393
  self.initialize_legacy_local_mode()
@@ -397,17 +513,20 @@ class UnifiedAudioConverter:
397
  system_message = (
398
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
399
  "30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. "
400
- "์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” ํ•œ๊ตญ์–ด ํ‘œํ˜„๊ณผ ๋ฌธํ™”์  ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
 
 
401
  )
402
  else:
403
  system_message = (
404
  "You are an expert podcast scriptwriter. "
405
- "Create natural, engaging conversations that inform and entertain listeners."
 
406
  )
407
 
408
  chat = [
409
  {"role": "system", "content": system_message},
410
- {"role": "user", "content": self._build_prompt(text, language)}
411
  ]
412
 
413
  terminators = [
@@ -450,14 +569,14 @@ class UnifiedAudioConverter:
450
 
451
  except Exception as e:
452
  print(f"Legacy local model also failed: {e}")
453
- # Return default template with Korean male names
454
  if language == "Korean":
455
  return {
456
  "conversation": [
457
- {"speaker": "์ค€์ˆ˜", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์—ฌ๋Ÿฌ๋ถ„! ์˜ค๋Š˜๋„ ์ €ํฌ ํŒŸ์บ์ŠคํŠธ๋ฅผ ์ฐพ์•„์ฃผ์…”์„œ ์ •๋ง ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."},
458
- {"speaker": "๋ฏผํ˜ธ", "text": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜์€ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ฃผ์ œ๋ฅผ ์ค€๋น„ํ–ˆ๋Š”๋ฐ์š”, ํ•จ๊ป˜ ์ด์•ผ๊ธฐ ๋‚˜๋ˆ ๋ณด์‹œ์ฃ ."},
459
- {"speaker": "์ค€์ˆ˜", "text": "๋„ค, ์ •๋ง ๊ธฐ๋Œ€๋˜๋Š”๋ฐ์š”. ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๋„ ๊ถ๊ธˆํ•ดํ•˜์‹ค ๊ฒƒ ๊ฐ™์•„์š”."},
460
- {"speaker": "๋ฏผํ˜ธ", "text": "๋งž์•„์š”. ๊ทธ๋Ÿผ ๋ณธ๊ฒฉ์ ์œผ๋กœ ์‹œ์ž‘ํ•ด๋ณผ๊นŒ์š”?"}
461
  ]
462
  }
463
  else:
@@ -471,30 +590,44 @@ class UnifiedAudioConverter:
471
  }
472
 
473
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
474
- """Extract conversation using API (fallback method)"""
475
  if not self.llm_client:
476
  raise RuntimeError("API mode not initialized")
477
 
478
  try:
 
 
 
 
 
 
 
 
 
 
 
 
479
  # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
480
  if language == "Korean":
481
  system_message = (
482
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
483
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
484
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
485
- "์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)๋ผ๋Š” ๋‘ ๋ช…์˜ 30๋Œ€ ๋‚จ์„ฑ์ด ๋Œ€ํ™”ํ•˜๋Š” ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”."
 
486
  )
487
  else:
488
  system_message = (
489
  "You are an expert podcast scriptwriter who creates engaging, "
490
  "natural conversations between Alex (host) and Jordan (expert). "
491
- "Create informative yet entertaining dialogue that keeps listeners engaged."
 
492
  )
493
 
494
  chat_completion = self.llm_client.chat.completions.create(
495
  messages=[
496
  {"role": "system", "content": system_message},
497
- {"role": "user", "content": self._build_prompt(text, language)}
498
  ],
499
  model=self.config.api_model_name,
500
  )
@@ -905,6 +1038,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
905
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
906
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
907
  - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
 
908
  """)
909
 
910
  with gr.Row():
@@ -968,7 +1102,8 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
968
 
969
  **ํ•œ๊ตญ์–ด ์ง€์›:**
970
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
971
- - ๐Ÿ‘จโ€๐Ÿ‘จ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ ๋‚จ์„ฑ์ด ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค
 
972
  """)
973
 
974
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
@@ -977,10 +1112,10 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
977
  with gr.Column():
978
  conversation_output = gr.Textbox(
979
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
980
- lines=20, # ๋” ๊ธด ๋Œ€ํ™”๋ฅผ ์œ„ํ•ด ์ฆ๊ฐ€
981
- max_lines=40,
982
  interactive=True,
983
- placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
984
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
985
  )
986
 
 
14
  import soundfile as sf
15
  import subprocess
16
  import shutil
17
+ import requests
18
+ import logging
19
+ from datetime import datetime, timedelta
20
  from dataclasses import dataclass
21
  from typing import List, Tuple, Dict, Optional
22
  from pathlib import Path
 
75
 
76
  load_dotenv()
77
 
78
+ # Brave Search API ์„ค์ •
79
+ BRAVE_KEY = os.getenv("BSEARCH_API")
80
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
81
 
82
  @dataclass
83
  class ConversationConfig:
 
93
  max_new_tokens: int = 8000 # 4000์—์„œ 8000์œผ๋กœ ์ฆ๊ฐ€
94
 
95
 
96
+ def brave_search(query: str, count: int = 8, freshness_days: int | None = None):
97
+ """Brave Search API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ตœ์‹  ์ •๋ณด ๊ฒ€์ƒ‰"""
98
+ if not BRAVE_KEY:
99
+ return []
100
+ params = {"q": query, "count": str(count)}
101
+ if freshness_days:
102
+ dt_from = (datetime.utcnow() - timedelta(days=freshness_days)).strftime("%Y-%m-%d")
103
+ params["freshness"] = dt_from
104
+ try:
105
+ r = requests.get(
106
+ BRAVE_ENDPOINT,
107
+ headers={"Accept": "application/json", "X-Subscription-Token": BRAVE_KEY},
108
+ params=params,
109
+ timeout=15
110
+ )
111
+ raw = r.json().get("web", {}).get("results") or []
112
+ return [{
113
+ "title": r.get("title", ""),
114
+ "url": r.get("url", r.get("link", "")),
115
+ "snippet": r.get("description", r.get("text", "")),
116
+ "host": re.sub(r"https?://(www\.)?", "", r.get("url", "")).split("/")[0]
117
+ } for r in raw[:count]]
118
+ except Exception as e:
119
+ logging.error(f"Brave search error: {e}")
120
+ return []
121
+
122
+
123
+ def format_search_results(query: str) -> str:
124
+ """๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ํฌ๋งทํŒ…ํ•˜์—ฌ ๋ฐ˜ํ™˜"""
125
+ rows = brave_search(query, 6, freshness_days=3)
126
+ if not rows:
127
+ return f"# [Web-Search] No live results for "{query}".\n"
128
+ hdr = f"# [Web-Search] Top results for "{query}" (last 3 days)\n\n"
129
+ body = "\n".join(
130
+ f"- **{r['title']}** ({r['host']})\n {r['snippet']}\n [link]({r['url']})"
131
+ for r in rows
132
+ )
133
+ return hdr + body + "\n"
134
+
135
+
136
+ def extract_keywords_for_search(text: str, language: str = "English") -> List[str]:
137
+ """ํ…์ŠคํŠธ์—์„œ ๊ฒ€์ƒ‰ํ•  ํ‚ค์›Œ๋“œ ์ถ”์ถœ"""
138
+ # ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ ์ถ”์ถœ (์‹ค์ œ๋กœ๋Š” ๋” ์ •๊ตํ•œ ๋ฐฉ๋ฒ• ์‚ฌ์šฉ ๊ฐ€๋Šฅ)
139
+ lines = text.split('\n')[:5] # ์ฒซ 5์ค„์—์„œ ํ‚ค์›Œ๋“œ ์ถ”์ถœ
140
+ text_sample = ' '.join(lines)
141
+
142
+ # ์–ธ์–ด๋ณ„ ์ค‘์š” ํ‚ค์›Œ๋“œ ํŒจํ„ด
143
+ if language == "Korean":
144
+ # ํ•œ๊ตญ์–ด ํ‚ค์›Œ๋“œ ํŒจํ„ด (๋ช…์‚ฌํ˜• ๋‹จ์–ด๋“ค)
145
+ import re
146
+ keywords = re.findall(r'[๊ฐ€-ํžฃ]{2,}', text_sample)
147
+ # ์ค‘๋ณต ์ œ๊ฑฐ ๋ฐ ์ƒ์œ„ 3๊ฐœ ์„ ํƒ
148
+ unique_keywords = list(dict.fromkeys(keywords))[:3]
149
+ else:
150
+ # ์˜์–ด ํ‚ค์›Œ๋“œ ํŒจํ„ด
151
+ words = text_sample.split()
152
+ # ๊ธธ์ด 3 ์ด์ƒ, ๋Œ€๋ฌธ์ž๋กœ ์‹œ์ž‘ํ•˜๋Š” ๋‹จ์–ด๋“ค ์šฐ์„ 
153
+ keywords = [word.strip('.,!?;:') for word in words
154
+ if len(word) > 3 and (word[0].isupper() or word.isupper())]
155
+ unique_keywords = list(dict.fromkeys(keywords))[:3]
156
+
157
+ return unique_keywords
158
+
159
+
160
  class UnifiedAudioConverter:
161
  def __init__(self, config: ConversationConfig):
162
  self.config = config
 
308
  else:
309
  return MessagesFormatterType.LLAMA_3
310
 
311
+ def _build_prompt(self, text: str, language: str = "English", search_context: str = "") -> str:
312
+ """Build prompt for conversation generation with search context"""
313
  if language == "Korean":
314
+ # ๊ฐ•ํ™”๋œ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ (์กด๋Œ“๋ง ๊ฐ•ํ™” ๋ฐ ํ•œ๊ตญ์  ํŠน์„ฑ ๋ฐ˜์˜)
315
  template = """
316
  {
317
  "conversation": [
 
322
  ]
323
  }
324
  """
325
+
326
+ base_prompt = (
327
+ f"# ์›๋ณธ ์ฝ˜ํ…์ธ :\n{text}\n\n"
328
+ f"# ์ตœ์‹  ๊ด€๋ จ ์ •๋ณด:\n{search_context}\n\n" if search_context else f"# ์›๋ณธ ์ฝ˜ํ…์ธ :\n{text}\n\n"
329
  f"์œ„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ 30๋Œ€ ํ•œ๊ตญ์ธ ๋‘ ๋ช…์ด ์ง„ํ–‰ํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”.\n\n"
330
+ f"## ํ•„์ˆ˜ ์ง€์นจ:\n\n"
331
+ f"### ๐Ÿ‘ฅ ์บ๋ฆญํ„ฐ ์„ค์ •:\n"
332
+ f"- **์ค€์ˆ˜(์ง„ํ–‰์ž)**: ์นœ๊ทผํ•˜๊ณ  ํ˜ธ๊ธฐ์‹ฌ ๋งŽ์€ ์„ฑ๊ฒฉ, ์ฒญ์ทจ์ž์˜ ๊ถ๊ธˆ์ฆ์„ ๋Œ€๋ณ€ํ•˜๋Š” 30๋Œ€ ๋‚จ์„ฑ\n"
333
+ f"- **๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)**: ํ•ด๋‹น ์ฃผ์ œ์— ๋Œ€ํ•œ ๊นŠ์€ ์ง€์‹์„ ๊ฐ€์ง„ ์ „๋ฌธ๊ฐ€, ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ๋Šฅ๋ ฅ์„ ๊ฐ€์ง„ 30๋Œ€ ๋‚จ์„ฑ\n\n"
334
+ f"### ๐Ÿ—ฃ๏ธ ์–ธ์–ด ์Šคํƒ€์ผ (์ค‘์š”!):\n"
335
+ f"- **์กด๋Œ“๋ง ํ•„์ˆ˜**: ๋‘ ํ™”์ž๋Š” ์„œ๋กœ์—๊ฒŒ ์ตœ์†Œํ•œ์˜ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค ('~์Šต๋‹ˆ๋‹ค', '~์„ธ์š”', '~๊ฑฐ๋“ ์š”')\n"
336
+ f"- **๋ฐ˜๋ง ์ ˆ๋Œ€ ๊ธˆ์ง€**: '~์•ผ', '~๋‹ค', '~ํ•ด' ๋“ฑ์˜ ๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”\n"
337
+ f"- **์ž์—ฐ์Šค๋Ÿฌ์šด ์กด๋Œ“๋ง**: ๋”ฑ๋”ฑํ•˜์ง€ ์•Š๊ณ  ์นœ๊ทผํ•œ ์กด๋Œ“๋ง ์‚ฌ์šฉ ('๊ทธ๋ ‡๊ตฐ์š”', '๋งž์œผ์„ธ์š”', '๊ทธ๋Ÿฐ๋ฐ์š”')\n"
338
+ f"- **๊ฐํƒ„์‚ฌ ํ™œ์šฉ**: '์•„~', '๊ทธ๋ ‡๊ตฌ๋‚˜์š”', '์™€~', '์ง„์งœ์š”?', '์–ด๋จธ๋‚˜' ๋“ฑ ์ž์—ฐ์Šค๋Ÿฌ์šด ๋ฐ˜์‘\n\n"
339
+ f"### ๐Ÿ“ ๋Œ€ํ™” ๊ตฌ์„ฑ:\n"
340
+ f"1. **ํ•œ๊ตญ ๋ฌธํ™” ๋งž์ถค**: ํ•œ๊ตญ์ธ์˜ ์ •์„œ์™€ ์ผ์ƒ์— ๋งž๋Š” ๊ตฌ์ฒด์  ์˜ˆ์‹œ์™€ ๋น„์œ  ์‚ฌ์šฉ\n"
341
+ f"2. **๊ณต๊ฐ๋Œ€ ํ˜•์„ฑ**: '์šฐ๋ฆฌ๋‚˜๋ผ์—์„œ๋Š”', 'ํ•œ๊ตญ ์‚ฌ๋žŒ๋“ค์ด', '์š”์ฆ˜ ์‚ฌ๋žŒ๋“ค' ๋“ฑ์˜ ํ‘œํ˜„์œผ๋กœ ์นœ๋ฐ€๊ฐ ์กฐ์„ฑ\n"
342
+ f"3. **์ถฉ๋ถ„ํ•œ ๋ถ„๋Ÿ‰**: ๊ฐ ๋Œ€ํ™”๋Š” ์ตœ์†Œ 3-4๋ฌธ์žฅ ์ด์ƒ, ์ „์ฒด 10ํšŒ ์ด์ƒ ์ฃผ๊ณ ๋ฐ›๊ธฐ\n"
343
+ f"4. **์‹ค์šฉ์  ์กฐ์–ธ**: ์ฒญ์ทจ์ž๊ฐ€ ์‹ค์ œ๋กœ ์ ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ตฌ์ฒด์ ์ด๊ณ  ์œ ์šฉํ•œ ์ •๋ณด ์ œ๊ณต\n"
344
+ f"5. **์ตœ์‹  ์ •๋ณด ๋ฐ˜์˜**: ์ œ๊ณต๋œ ์ตœ์‹  ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋Œ€ํ™”์— ํฌํ•จ\n\n"
345
+ f"### ๐ŸŽฏ ํŒŸ์บ์ŠคํŠธ ํ’ˆ์งˆ:\n"
346
+ f"- **์˜คํ”„๋‹**: ๋”ฐ๋œปํ•œ ์ธ์‚ฌ์™€ ์ฃผ์ œ ์†Œ๊ฐœ\n"
347
+ f"- **๋ฉ”์ธ**: ํ•ต์‹ฌ ๋‚ด์šฉ์„ ์žฌ๋ฏธ์žˆ๊ณ  ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ์ „๋‹ฌ\n"
348
+ f"- **์ƒํ˜ธ์ž‘์šฉ**: '์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„์€ ์–ด๋–ป๊ฒŒ ์ƒ๊ฐํ•˜์„ธ์š”?' ๊ฐ™์€ ์ฐธ์—ฌ ์œ ๋„\n"
349
+ f"- **ํด๋กœ์ง•**: ํ•ต์‹ฌ ์š”์•ฝ๊ณผ ์‹ค์šฉ์  ์กฐ์–ธ์œผ๋กœ ๋งˆ๋ฌด๋ฆฌ\n\n"
350
+ f"### ๐Ÿ’ก ํ•œ๊ตญ์–ด ํŠนํ™” ์š”์†Œ:\n"
351
+ f"- **ํ˜ธ์นญ**: '์ค€์ˆ˜์”จ', '๋ฏผํ˜ธ์”จ' ๋“ฑ ์ ์ ˆํ•œ ํ˜ธ์นญ ์‚ฌ์šฉ\n"
352
+ f"- **๊ด€์šฉ์–ด๊ตฌ**: ์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด ๊ด€์šฉํ‘œํ˜„ ํ™œ์šฉ\n"
353
+ f"- **์ •์„œ์  ์—ฐ๊ฒฐ**: ํ•œ๊ตญ์ธ์˜ '์ •', '๋ˆˆ์น˜', '์ฒด๋ฉด' ๋“ฑ์˜ ๋ฌธํ™”์  ์ฝ”๋“œ ๋ฐ˜์˜\n"
354
+ f"- **๊ณ„์ ˆ๊ฐ**: ํ˜„์žฌ ๊ณ„์ ˆ์ด๋‚˜ ์‹œ๊ธฐ์  ํŠน์„ฑ ๋ฐ˜์˜\n\n"
355
  f"๋‹ค์Œ JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”:\n{template}"
356
  )
357
+
358
+ return base_prompt
359
+
360
  else:
361
  template = """
362
  {
 
368
  ]
369
  }
370
  """
371
+
372
+ base_prompt = (
373
+ f"# Original Content:\n{text}\n\n"
374
+ f"# Latest Related Information:\n{search_context}\n\n" if search_context else f"# Original Content:\n{text}\n\n"
375
  f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
376
  f"Guidelines:\n"
377
  f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
 
382
  f"6. Create at least 10 back-and-forth exchanges\n"
383
  f"7. Address common questions and misconceptions\n"
384
  f"8. Maintain an informative yet entertaining tone\n"
385
+ f"9. Incorporate the latest related information naturally into the conversation\n"
386
+ f"10. End with key takeaways and practical advice\n\n"
387
  f"Return ONLY the JSON in this format:\n{template}"
388
  )
389
+
390
+ return base_prompt
391
 
392
+ def _build_messages_for_local(self, text: str, language: str = "English", search_context: str = "") -> List[Dict]:
393
+ """Build messages for local LLM with enhanced Korean guidelines"""
394
  if language == "Korean":
395
  system_message = (
396
+ "๋‹น์‹ ์€ ํ•œ๊ตญ ์ตœ๊ณ ์˜ ํŒŸ์บ์ŠคํŠธ ๋Œ€๋ณธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
397
+ "ํ•œ๊ตญ์ธ์˜ ์ •์„œ์™€ ๋ฌธํ™”๋ฅผ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , 30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋“ค์ด ๋๊นŒ์ง€ ์ง‘์ค‘ํ•  ์ˆ˜ ์žˆ๋Š” "
398
+ "๋งค๋ ฅ์ ์ด๊ณ  ์œ ์ตํ•œ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด๋ƒ…๋‹ˆ๋‹ค.\n\n"
399
+ "ํ•ต์‹ฌ ์›์น™:\n"
400
+ "1. ๋‘ ํ™”์ž๋Š” ๋ฐ˜๋“œ์‹œ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค (๋ฐ˜๋ง ์ ˆ๋Œ€ ๊ธˆ์ง€)\n"
401
+ "2. ํ•œ๊ตญ ๋ฌธํ™”์˜ ์ •์„œ์  ์ฝ”๋“œ์™€ ๊ฐ€์น˜๊ด€์„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค\n"
402
+ "3. ์‹ค์ œ ํ•œ๊ตญ์ธ๋“ค์ด ์ผ์ƒ์—์„œ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„์„ ๊ตฌ์‚ฌํ•ฉ๋‹ˆ๋‹ค\n"
403
+ "4. ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ์‹ค์šฉ์ ์œผ๋กœ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๋‚ด์šฉ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค\n"
404
+ "5. ์ตœ์‹  ์ •๋ณด์™€ ํŠธ๋ Œ๋“œ๋ฅผ ์ ์ ˆํžˆ ๋ฐ˜์˜ํ•˜์—ฌ ์‹œ์˜์„ฑ์„ ํ™•๋ณดํ•ฉ๋‹ˆ๋‹ค\n\n"
405
+ "๋‹น์‹ ์˜ ๋Œ€๋ณธ์€ ํ•œ๊ตญ ํŒŸ์บ์ŠคํŠธ ์‹œ์žฅ์—์„œ ์ตœ๊ณ  ์ˆ˜์ค€์˜ ํ’ˆ์งˆ๋กœ ์ธ์ •๋ฐ›๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค."
406
  )
407
  else:
408
  system_message = (
409
  "You are an expert podcast scriptwriter who creates engaging, "
410
  "natural conversations that keep listeners hooked. "
411
  "You understand how to balance information with entertainment, "
412
+ "using real conversational patterns and authentic reactions. "
413
+ "You excel at incorporating current information and trends to make content relevant and timely."
414
  )
415
 
416
  return [
417
  {"role": "system", "content": system_message},
418
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
419
  ]
420
 
421
  @spaces.GPU(duration=120)
422
  def extract_conversation_local(self, text: str, language: str = "English", progress=None) -> Dict:
423
+ """Extract conversation using new local LLM with search context"""
424
  try:
425
+ # ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
426
+ search_context = ""
427
+ if BRAVE_KEY:
428
+ try:
429
+ keywords = extract_keywords_for_search(text, language)
430
+ if keywords:
431
+ # ์ฒซ ๋ฒˆ์งธ ํ‚ค์›Œ๋“œ๋กœ ๊ฒ€์ƒ‰
432
+ search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
433
+ search_context = format_search_results(search_query)
434
+ print(f"Search context added for: {search_query}")
435
+ except Exception as e:
436
+ print(f"Search failed, continuing without context: {e}")
437
+
438
  # ๋จผ์ € ์ƒˆ๋กœ์šด ๋กœ์ปฌ LLM ์‹œ๋„
439
  self.initialize_local_mode()
440
 
 
447
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
448
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
449
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
450
+ "ํŠนํžˆ ๋‘ ํ™”์ž๊ฐ€ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜๋Š” ๊ฒƒ์ด ํ•„์ˆ˜์ด๋ฉฐ, "
451
+ "๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. "
452
  "์‹ค์ œ ํ•œ๊ตญ์ธ์ด ๋Œ€ํ™”ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„, ์ ์ ˆํ•œ ๊ฐํƒ„์‚ฌ, "
453
  "๋ฌธํ™”์ ์œผ๋กœ ์ ํ•ฉํ•œ ์˜ˆ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ๋ชฐ์ž…ํ•  ์ˆ˜ ์žˆ๋Š” "
454
+ "๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. ์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•˜์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
455
  )
456
  else:
457
  system_message = (
 
459
  "engaging, natural conversations that captivate listeners. "
460
  "You excel at transforming complex information into accessible, "
461
  "entertaining dialogue while maintaining authenticity and educational value. "
462
+ "Incorporate current trends and latest information naturally. "
463
  "Respond only in JSON format."
464
  )
465
 
 
480
 
481
  messages = BasicChatHistory()
482
 
483
+ prompt = self._build_prompt(text, language, search_context)
484
  response = agent.get_chat_response(
485
  prompt,
486
  llm_sampling_settings=settings,
 
500
 
501
  except Exception as e:
502
  print(f"Local LLM failed: {e}, falling back to legacy local method")
503
+ return self.extract_conversation_legacy_local(text, language, progress, search_context)
504
 
505
  @spaces.GPU(duration=120)
506
+ def extract_conversation_legacy_local(self, text: str, language: str = "English", progress=None, search_context: str = "") -> Dict:
507
  """Extract conversation using legacy local model (fallback)"""
508
  try:
509
  self.initialize_legacy_local_mode()
 
513
  system_message = (
514
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
515
  "30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. "
516
+ "๋‘ ํ™”์ž๋Š” ๋ฐ˜๋“œ์‹œ ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜๋ฉฐ, ๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. "
517
+ "์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” ํ•œ๊ตญ์–ด ํ‘œํ˜„๊ณผ ๋ฌธํ™”์  ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”. "
518
+ "์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ํฌํ•จ์‹œ์ผœ์ฃผ์„ธ์š”."
519
  )
520
  else:
521
  system_message = (
522
  "You are an expert podcast scriptwriter. "
523
+ "Create natural, engaging conversations that inform and entertain listeners. "
524
+ "Incorporate current information and trends naturally."
525
  )
526
 
527
  chat = [
528
  {"role": "system", "content": system_message},
529
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
530
  ]
531
 
532
  terminators = [
 
569
 
570
  except Exception as e:
571
  print(f"Legacy local model also failed: {e}")
572
+ # Return default template with Korean male names using formal speech
573
  if language == "Korean":
574
  return {
575
  "conversation": [
576
+ {"speaker": "์ค€์ˆ˜", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์—ฌ๋Ÿฌ๋ถ„! ์˜ค๋Š˜๋„ ์ €ํฌ ํŒŸ์บ์ŠคํŠธ๋ฅผ ์ฐพ์•„์ฃผ์…”์„œ ์ •๋ง ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค. ๋ฏผํ˜ธ์”จ, ์˜ค๋Š˜ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ฃผ์ œ๋ฅผ ์ค€๋น„ํ•ด์ฃผ์…จ๋‹ค๊ณ  ๋“ค์—ˆ์–ด์š”."},
577
+ {"speaker": "๋ฏผํ˜ธ", "text": "๋„ค, ์•ˆ๋…•ํ•˜์„ธ์š”! ์ค€์ˆ˜์”จ ๋ง์”€์ฒ˜๋Ÿผ ์˜ค๋Š˜์€ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ด์•ผ๊ธฐ๋ฅผ ์ค€๋น„ํ–ˆ์Šต๋‹ˆ๋‹ค. ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๊ป˜์„œ๋„ ๋งŽ์€ ๊ด€์‹ฌ์„ ๊ฐ€์ง€๊ณ  ๊ณ„์‹ค ์ฃผ์ œ์ธ ๊ฒƒ ๊ฐ™์•„์š”."},
578
+ {"speaker": "์ค€์ˆ˜", "text": "์ •๋ง ๊ธฐ๋Œ€๋˜๋Š”๋ฐ์š”. ๊ทธ๋Ÿฐ๋ฐ ๋ฏผํ˜ธ์”จ, ์ด ์ฃผ์ œ๊ฐ€ ์š”์ฆ˜ ์™œ ์ด๋ ‡๊ฒŒ ํ™”์ œ๊ฐ€ ๋˜๊ณ  ์žˆ๋Š” ๊ฑด๊ฐ€์š”? ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๋„ ๊ถ๊ธˆํ•ดํ•˜์‹ค ๊ฒƒ ๊ฐ™์•„์š”."},
579
+ {"speaker": "๋ฏผํ˜ธ", "text": "์ข‹์€ ์งˆ๋ฌธ์ด์„ธ์š”, ์ค€์ˆ˜์”จ. ์‚ฌ์‹ค ์ตœ๊ทผ์— ์ด ๋ถ„์•ผ์— ๋งŽ์€ ๋ณ€ํ™”๊ฐ€ ์žˆ์—ˆ๊ฑฐ๋“ ์š”. ๊ทธ๋Ÿผ ๋ณธ๊ฒฉ์ ์œผ๋กœ ํ•˜๋‚˜์”ฉ ์ฐจ๊ทผ์ฐจ๊ทผ ์„ค๋ช…ํ•ด๋“œ๋ฆด๊ฒŒ์š”."}
580
  ]
581
  }
582
  else:
 
590
  }
591
 
592
  def extract_conversation_api(self, text: str, language: str = "English") -> Dict:
593
+ """Extract conversation using API with search context"""
594
  if not self.llm_client:
595
  raise RuntimeError("API mode not initialized")
596
 
597
  try:
598
+ # ๊ฒ€์ƒ‰ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
599
+ search_context = ""
600
+ if BRAVE_KEY:
601
+ try:
602
+ keywords = extract_keywords_for_search(text, language)
603
+ if keywords:
604
+ search_query = keywords[0] if language == "Korean" else f"{keywords[0]} latest news"
605
+ search_context = format_search_results(search_query)
606
+ print(f"Search context added for: {search_query}")
607
+ except Exception as e:
608
+ print(f"Search failed, continuing without context: {e}")
609
+
610
  # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
611
  if language == "Korean":
612
  system_message = (
613
  "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
614
  "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
615
  "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
616
+ "์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)๋ผ๋Š” ๋‘ ๋ช…์˜ 30๋Œ€ ๋‚จ์„ฑ์ด ์„œ๋กœ์—๊ฒŒ ์กด๋Œ“๋ง์„ ์‚ฌ์šฉํ•˜์—ฌ ๋Œ€ํ™”ํ•˜๋Š” ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”. "
617
+ "๋ฐ˜๋ง์€ ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ์•Š์œผ๋ฉฐ, ์ตœ์‹  ์ •๋ณด๋„ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ๋ฐ˜์˜ํ•˜์„ธ์š”."
618
  )
619
  else:
620
  system_message = (
621
  "You are an expert podcast scriptwriter who creates engaging, "
622
  "natural conversations between Alex (host) and Jordan (expert). "
623
+ "Create informative yet entertaining dialogue that keeps listeners engaged. "
624
+ "Incorporate current trends and latest information naturally."
625
  )
626
 
627
  chat_completion = self.llm_client.chat.completions.create(
628
  messages=[
629
  {"role": "system", "content": system_message},
630
+ {"role": "user", "content": self._build_prompt(text, language, search_context)}
631
  ],
632
  model=self.config.api_model_name,
633
  )
 
1038
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
1039
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
1040
  - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
1041
+ - **Search**: {"โœ… Brave Search Enabled" if BRAVE_KEY else "โŒ Brave Search Not Available - Set BSEARCH_API"}
1042
  """)
1043
 
1044
  with gr.Row():
 
1102
 
1103
  **ํ•œ๊ตญ์–ด ์ง€์›:**
1104
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
1105
+ - ๐Ÿ‘จโ€๐Ÿ‘จ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ ๋‚จ์„ฑ์ด ์กด๋Œ“๋ง๋กœ ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค
1106
+ - ๐Ÿ” **์ตœ์‹  ์ •๋ณด ๋ฐ˜์˜**: Brave Search๋ฅผ ํ†ตํ•ด ์ตœ์‹  ์‹œ์‚ฌ ๋‚ด์šฉ์„ ์ž๋™์œผ๋กœ ๊ฒ€์ƒ‰ํ•˜์—ฌ ๋Œ€๋ณธ์— ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค
1107
  """)
1108
 
1109
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
 
1112
  with gr.Column():
1113
  conversation_output = gr.Textbox(
1114
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
1115
+ lines=25, # ๋” ๊ธด ๋Œ€ํ™”๋ฅผ ์œ„ํ•ด ์ฆ๊ฐ€
1116
+ max_lines=50,
1117
  interactive=True,
1118
+ placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n\nํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์กด๋Œ“๋ง๋กœ ์ง„ํ–‰๋˜๋ฉฐ ์ตœ์‹  ์‹œ์‚ฌ ๋‚ด์šฉ์ด ๋ฐ˜์˜๋ฉ๋‹ˆ๋‹ค.",
1119
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
1120
  )
1121