openfree commited on
Commit
a96c15a
ยท
verified ยท
1 Parent(s): 023a9e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -44
app.py CHANGED
@@ -82,6 +82,9 @@ class ConversationConfig:
82
  # ์ƒˆ๋กœ์šด ๋กœ์ปฌ ๋ชจ๋ธ ์„ค์ •
83
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
84
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
 
 
 
85
 
86
 
87
  class UnifiedAudioConverter:
@@ -227,8 +230,6 @@ class UnifiedAudioConverter:
227
  return text
228
  except Exception as e:
229
  raise RuntimeError(f"Failed to extract text from PDF: {e}")
230
-
231
-
232
 
233
  def _get_messages_formatter_type(self, model_name):
234
  """Get appropriate message formatter for the model"""
@@ -240,41 +241,77 @@ class UnifiedAudioConverter:
240
  def _build_prompt(self, text: str, language: str = "English") -> str:
241
  """Build prompt for conversation generation"""
242
  if language == "Korean":
 
243
  template = """
244
  {
245
  "conversation": [
246
- {"speaker": "", "text": ""},
247
- {"speaker": "", "text": ""}
 
 
248
  ]
249
  }
250
  """
251
  return (
252
- f"{text}\n\n์ œ๊ณต๋œ ํ…์ŠคํŠธ๋ฅผ ๋‘ ๋ช…์˜ ์ „๋ฌธ๊ฐ€ ๊ฐ„์˜ ์งง๊ณ  ์œ ์ตํ•˜๋ฉฐ ๋ช…ํ™•ํ•œ "
253
- f"ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋กœ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”. ํ†ค์€ ์ „๋ฌธ์ ์ด๊ณ  ๋งค๋ ฅ์ ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค. "
254
- f"๋‹ค์Œ ํ˜•์‹์„ ์ค€์ˆ˜ํ•˜๊ณ  JSON๋งŒ ๋ฐ˜ํ™˜ํ•ด์ฃผ์„ธ์š”:\n{template}"
 
 
 
 
 
 
 
 
 
 
 
255
  )
256
  else:
257
  template = """
258
  {
259
  "conversation": [
260
- {"speaker": "", "text": ""},
261
- {"speaker": "", "text": ""}
 
 
262
  ]
263
  }
264
  """
265
  return (
266
- f"{text}\n\nConvert the provided text into a short, informative and crisp "
267
- f"podcast conversation between two experts. The tone should be "
268
- f"professional and engaging. Please adhere to the following "
269
- f"format and return ONLY the JSON:\n{template}"
 
 
 
 
 
 
 
 
 
270
  )
271
 
272
  def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
273
  """Build messages for local LLM"""
274
  if language == "Korean":
275
- system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
 
 
 
 
 
 
276
  else:
277
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
 
 
 
 
 
278
 
279
  return [
280
  {"role": "system", "content": system_message},
@@ -291,11 +328,24 @@ class UnifiedAudioConverter:
291
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
292
  provider = LlamaCppPythonProvider(self.local_llm)
293
 
294
- # ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
295
  if language == "Korean":
296
- system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
 
 
 
 
 
 
 
297
  else:
298
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations. Respond only in JSON format."
 
 
 
 
 
 
299
 
300
  agent = LlamaCppAgent(
301
  provider,
@@ -305,10 +355,10 @@ class UnifiedAudioConverter:
305
  )
306
 
307
  settings = provider.get_provider_default_settings()
308
- settings.temperature = 0.7
309
  settings.top_k = 40
310
  settings.top_p = 0.95
311
- settings.max_tokens = 2048
312
  settings.repeat_penalty = 1.1
313
  settings.stream = False
314
 
@@ -342,11 +392,18 @@ class UnifiedAudioConverter:
342
  try:
343
  self.initialize_legacy_local_mode()
344
 
345
- # ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
346
  if language == "Korean":
347
- system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
 
 
 
 
348
  else:
349
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
 
 
 
350
 
351
  chat = [
352
  {"role": "system", "content": system_message},
@@ -370,7 +427,7 @@ class UnifiedAudioConverter:
370
  generate_kwargs = dict(
371
  model_inputs,
372
  streamer=streamer,
373
- max_new_tokens=4000,
374
  do_sample=True,
375
  temperature=0.9,
376
  eos_token_id=terminators,
@@ -393,19 +450,23 @@ class UnifiedAudioConverter:
393
 
394
  except Exception as e:
395
  print(f"Legacy local model also failed: {e}")
396
- # Return default template
397
  if language == "Korean":
398
  return {
399
  "conversation": [
400
- {"speaker": "์ง„ํ–‰์ž", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ํŒŸ์บ์ŠคํŠธ์— ์˜ค์‹  ๊ฒƒ์„ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค."},
401
- {"speaker": "๊ฒŒ์ŠคํŠธ", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์ดˆ๋Œ€ํ•ด ์ฃผ์…”์„œ ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."}
 
 
402
  ]
403
  }
404
  else:
405
  return {
406
  "conversation": [
407
- {"speaker": "Host", "text": "Welcome to our podcast."},
408
- {"speaker": "Guest", "text": "Thank you for having me."}
 
 
409
  ]
410
  }
411
 
@@ -415,11 +476,20 @@ class UnifiedAudioConverter:
415
  raise RuntimeError("API mode not initialized")
416
 
417
  try:
418
- # ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
419
  if language == "Korean":
420
- system_message = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ์ƒ์„ฑํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์œ ์ตํ•œ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”."
 
 
 
 
 
421
  else:
422
- system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
 
 
 
 
423
 
424
  chat_completion = self.llm_client.chat.completions.create(
425
  messages=[
@@ -460,17 +530,16 @@ class UnifiedAudioConverter:
460
  filenames = []
461
 
462
  try:
463
- # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ •
464
  if language == "Korean":
465
  voices = [
466
- "ko-KR-HyunsuNeural", # ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
467
- "ko-KR-InJoonNeural" # ๋‚จ๋‚จ์„ฑ ์Œ์„ฑ (์ž์—ฐ์Šค๋Ÿฌ์šด ํ•œ๊ตญ์–ด)
468
-
469
  ]
470
  else:
471
  voices = [
472
- "en-US-AvaMultilingualNeural", # ์—ฌ์„ฑ ์Œ์„ฑ
473
- "en-US-AndrewMultilingualNeural" # ๋‚จ์„ฑ ์Œ์„ฑ
474
  ]
475
 
476
  for i, turn in enumerate(conversation_json["conversation"]):
@@ -522,13 +591,13 @@ class UnifiedAudioConverter:
522
  # Create different voice characteristics for different speakers
523
  if language == "Korean":
524
  voice_configs = [
525
- {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ง„ํ–‰์ž์ž…๋‹ˆ๋‹ค.", "gender": "male"},
526
- {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ๊ฒŒ์ŠคํŠธ๋กœ ์ฐธ์—ฌํ•˜๊ฒŒ ๋˜์–ด ๊ธฐ์ฉ๋‹ˆ๋‹ค.", "gender": "male"}
527
  ]
528
  else:
529
  voice_configs = [
530
- {"prompt_text": "Hello, welcome to our podcast. I'm your host today.", "gender": "female"},
531
- {"prompt_text": "Thank you for having me. I'm excited to be here.", "gender": "male"}
532
  ]
533
 
534
  for i, turn in enumerate(conversation_json["conversation"]):
@@ -835,6 +904,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
835
  - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
836
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
837
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
 
838
  """)
839
 
840
  with gr.Row():
@@ -898,6 +968,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
898
 
899
  **ํ•œ๊ตญ์–ด ์ง€์›:**
900
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
 
901
  """)
902
 
903
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
@@ -906,8 +977,8 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
906
  with gr.Column():
907
  conversation_output = gr.Textbox(
908
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
909
- lines=15,
910
- max_lines=30,
911
  interactive=True,
912
  placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
913
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"
 
82
  # ์ƒˆ๋กœ์šด ๋กœ์ปฌ ๋ชจ๋ธ ์„ค์ •
83
  local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
84
  local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
85
+ # ํ† ํฐ ์ˆ˜ ์ฆ๊ฐ€
86
+ max_tokens: int = 6000 # 2048์—์„œ 6000์œผ๋กœ ์ฆ๊ฐ€
87
+ max_new_tokens: int = 8000 # 4000์—์„œ 8000์œผ๋กœ ์ฆ๊ฐ€
88
 
89
 
90
  class UnifiedAudioConverter:
 
230
  return text
231
  except Exception as e:
232
  raise RuntimeError(f"Failed to extract text from PDF: {e}")
 
 
233
 
234
  def _get_messages_formatter_type(self, model_name):
235
  """Get appropriate message formatter for the model"""
 
241
  def _build_prompt(self, text: str, language: str = "English") -> str:
242
  """Build prompt for conversation generation"""
243
  if language == "Korean":
244
+ # ๊ฐ•ํ™”๋œ ํ•œ๊ตญ์–ด ํ”„๋กฌํ”„ํŠธ
245
  template = """
246
  {
247
  "conversation": [
248
+ {"speaker": "์ค€์ˆ˜", "text": ""},
249
+ {"speaker": "๋ฏผํ˜ธ", "text": ""},
250
+ {"speaker": "์ค€์ˆ˜", "text": ""},
251
+ {"speaker": "๋ฏผํ˜ธ", "text": ""}
252
  ]
253
  }
254
  """
255
  return (
256
+ f"{text}\n\n"
257
+ f"์œ„ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ 30๋Œ€ ํ•œ๊ตญ์ธ ๋‘ ๋ช…์ด ์ง„ํ–‰ํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”.\n\n"
258
+ f"ํ•„์ˆ˜ ์ง€์นจ:\n"
259
+ f"1. ์ค€์ˆ˜(์ง„ํ–‰์ž): ์นœ๊ทผํ•˜๊ณ  ํ˜ธ๊ธฐ์‹ฌ ๋งŽ์€ ์„ฑ๊ฒฉ, ์ฒญ์ทจ์ž์˜ ๊ถ๊ธˆ์ฆ์„ ๋Œ€๋ณ€\n"
260
+ f"2. ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€): ํ•ด๋‹น ์ฃผ์ œ์— ๋Œ€ํ•œ ๊นŠ์€ ์ง€์‹์„ ๊ฐ€์ง„ ์ „๋ฌธ๊ฐ€, ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ๋Šฅ๋ ฅ\n"
261
+ f"3. ํ•œ๊ตญ์ธ์ด ์‹ค์ œ๋กœ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ ๊ฐํƒ„์‚ฌ ์‚ฌ์šฉ ('์•„~', '๊ทธ๋ ‡๊ตฌ๋‚˜', '์™€~', '์ง„์งœ์š”?')\n"
262
+ f"4. ์ ์ ˆํ•œ ์กด๋Œ“๋ง๊ณผ ํŽธ์•ˆํ•œ ๋ฐ˜๋ง์„ ์„ž์–ด ์นœ๋ฐ€๊ฐ ์กฐ์„ฑ\n"
263
+ f"5. ํ•œ๊ตญ ๋ฌธํ™”์™€ ์ผ์ƒ์— ๋งž๋Š” ๊ตฌ์ฒด์ ์ธ ์˜ˆ์‹œ์™€ ๋น„์œ  ์‚ฌ์šฉ\n"
264
+ f"6. ๊ฐ ๋Œ€ํ™”๋Š” ์ถฉ๋ถ„ํžˆ ๊ธธ๊ณ  ์ƒ์„ธํ•˜๊ฒŒ (์ตœ์†Œ 3-4๋ฌธ์žฅ ์ด์ƒ)\n"
265
+ f"7. ์ „์ฒด ๋Œ€ํ™”๋Š” ์ตœ์†Œ 10ํšŒ ์ด์ƒ์˜ ์ฃผ๊ณ ๋ฐ›๊ธฐ๋กœ ๊ตฌ์„ฑ\n"
266
+ f"8. ์ฒญ์ทจ์ž๊ฐ€ '๋‚˜๋„ ๊ถ๊ธˆํ–ˆ๋˜ ๋‚ด์šฉ์ด์•ผ'๋ผ๊ณ  ๊ณต๊ฐํ•  ์ˆ˜ ์žˆ๋Š” ์งˆ๋ฌธ ํฌํ•จ\n"
267
+ f"9. ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ „๋‹ฌํ•˜๋ฉด์„œ๋„ ์ง€๋ฃจํ•˜์ง€ ์•Š๊ฒŒ ๊ตฌ์„ฑ\n"
268
+ f"10. ๋งˆ๋ฌด๋ฆฌ๋Š” ํ•ต์‹ฌ ๋‚ด์šฉ ์š”์•ฝ๊ณผ ์ฒญ์ทจ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๋Š” ์‹ค์šฉ์  ์กฐ์–ธ\n\n"
269
+ f"๋‹ค์Œ JSON ํ˜•์‹์œผ๋กœ๋งŒ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”:\n{template}"
270
  )
271
  else:
272
  template = """
273
  {
274
  "conversation": [
275
+ {"speaker": "Alex", "text": ""},
276
+ {"speaker": "Jordan", "text": ""},
277
+ {"speaker": "Alex", "text": ""},
278
+ {"speaker": "Jordan", "text": ""}
279
  ]
280
  }
281
  """
282
  return (
283
+ f"{text}\n\n"
284
+ f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
285
+ f"Guidelines:\n"
286
+ f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
287
+ f"2. Jordan (Expert): Knowledgeable but approachable, explains complex topics simply\n"
288
+ f"3. Use natural conversational English with appropriate reactions ('Wow', 'That's interesting', 'Really?')\n"
289
+ f"4. Include concrete examples and relatable analogies\n"
290
+ f"5. Each response should be substantial (minimum 3-4 sentences)\n"
291
+ f"6. Create at least 10 back-and-forth exchanges\n"
292
+ f"7. Address common questions and misconceptions\n"
293
+ f"8. Maintain an informative yet entertaining tone\n"
294
+ f"9. End with key takeaways and practical advice\n\n"
295
+ f"Return ONLY the JSON in this format:\n{template}"
296
  )
297
 
298
  def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
299
  """Build messages for local LLM"""
300
  if language == "Korean":
301
+ system_message = (
302
+ "๋‹น์‹ ์€ ํ•œ๊ตญ ์ตœ๊ณ ์˜ ํŒŸ์บ์ŠคํŠธ ๋Œ€๋ณธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
303
+ "ํ•œ๊ตญ์ธ์˜ ์ •์„œ์™€ ๋ฌธํ™”๋ฅผ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , ์ฒญ์ทจ์ž๋“ค์ด ๋๊นŒ์ง€ ์ง‘์ค‘ํ•  ์ˆ˜ ์žˆ๋Š” "
304
+ "๋งค๋ ฅ์ ์ด๊ณ  ์œ ์ตํ•œ ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด๋ƒ…๋‹ˆ๋‹ค. "
305
+ "์‹ค์ œ ํ•œ๊ตญ์ธ๋“ค์ด ์ผ์ƒ์—์„œ ์‚ฌ์šฉํ•˜๋Š” ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„๊ณผ "
306
+ "์ ์ ˆํ•œ ๊ฐ์ • ํ‘œํ˜„์„ ํ†ตํ•ด ์ƒ๋™๊ฐ ์žˆ๋Š” ๋Œ€ํ™”๋ฅผ ๊ตฌ์„ฑํ•ฉ๋‹ˆ๋‹ค."
307
+ )
308
  else:
309
+ system_message = (
310
+ "You are an expert podcast scriptwriter who creates engaging, "
311
+ "natural conversations that keep listeners hooked. "
312
+ "You understand how to balance information with entertainment, "
313
+ "using real conversational patterns and authentic reactions."
314
+ )
315
 
316
  return [
317
  {"role": "system", "content": system_message},
 
328
  chat_template = self._get_messages_formatter_type(self.config.local_model_name)
329
  provider = LlamaCppPythonProvider(self.local_llm)
330
 
331
+ # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
332
  if language == "Korean":
333
+ system_message = (
334
+ "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
335
+ "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
336
+ "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
337
+ "์‹ค์ œ ํ•œ๊ตญ์ธ์ด ๋Œ€ํ™”ํ•˜๋Š” ๊ฒƒ์ฒ˜๋Ÿผ ์ž์—ฐ์Šค๋Ÿฌ์šด ํ‘œํ˜„, ์ ์ ˆํ•œ ๊ฐํƒ„์‚ฌ, "
338
+ "๋ฌธํ™”์ ์œผ๋กœ ์ ํ•ฉํ•œ ์˜ˆ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ฒญ์ทจ์ž๊ฐ€ ๊ณต๊ฐํ•˜๊ณ  ๋ชฐ์ž…ํ•  ์ˆ˜ ์žˆ๋Š” "
339
+ "๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”."
340
+ )
341
  else:
342
+ system_message = (
343
+ "You are an expert podcast scriptwriter specializing in creating "
344
+ "engaging, natural conversations that captivate listeners. "
345
+ "You excel at transforming complex information into accessible, "
346
+ "entertaining dialogue while maintaining authenticity and educational value. "
347
+ "Respond only in JSON format."
348
+ )
349
 
350
  agent = LlamaCppAgent(
351
  provider,
 
355
  )
356
 
357
  settings = provider.get_provider_default_settings()
358
+ settings.temperature = 0.8 # ์•ฝ๊ฐ„ ๋†’์—ฌ์„œ ๋” ์ž์—ฐ์Šค๋Ÿฌ์šด ๋Œ€ํ™” ์ƒ์„ฑ
359
  settings.top_k = 40
360
  settings.top_p = 0.95
361
+ settings.max_tokens = self.config.max_tokens # ์ฆ๊ฐ€๋œ ํ† ํฐ ์ˆ˜ ์‚ฌ์šฉ
362
  settings.repeat_penalty = 1.1
363
  settings.stream = False
364
 
 
392
  try:
393
  self.initialize_legacy_local_mode()
394
 
395
+ # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
396
  if language == "Korean":
397
+ system_message = (
398
+ "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
399
+ "30๋Œ€ ํ•œ๊ตญ์ธ ์ฒญ์ทจ์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ํฅ๋ฏธ๋กœ์šด ๋Œ€ํ™”๋ฅผ ๋งŒ๋“ค์–ด์ฃผ์„ธ์š”. "
400
+ "์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” ํ•œ๊ตญ์–ด ํ‘œํ˜„๊ณผ ๋ฌธํ™”์  ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•˜์—ฌ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."
401
+ )
402
  else:
403
+ system_message = (
404
+ "You are an expert podcast scriptwriter. "
405
+ "Create natural, engaging conversations that inform and entertain listeners."
406
+ )
407
 
408
  chat = [
409
  {"role": "system", "content": system_message},
 
427
  generate_kwargs = dict(
428
  model_inputs,
429
  streamer=streamer,
430
+ max_new_tokens=self.config.max_new_tokens, # ์ฆ๊ฐ€๋œ ํ† ํฐ ์ˆ˜ ์‚ฌ์šฉ
431
  do_sample=True,
432
  temperature=0.9,
433
  eos_token_id=terminators,
 
450
 
451
  except Exception as e:
452
  print(f"Legacy local model also failed: {e}")
453
+ # Return default template with Korean male names
454
  if language == "Korean":
455
  return {
456
  "conversation": [
457
+ {"speaker": "์ค€์ˆ˜", "text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์—ฌ๋Ÿฌ๋ถ„! ์˜ค๋Š˜๋„ ์ €ํฌ ํŒŸ์บ์ŠคํŠธ๋ฅผ ์ฐพ์•„์ฃผ์…”์„œ ์ •๋ง ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค."},
458
+ {"speaker": "๋ฏผํ˜ธ", "text": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜์€ ์ •๋ง ํฅ๋ฏธ๋กœ์šด ์ฃผ์ œ๋ฅผ ์ค€๋น„ํ–ˆ๋Š”๋ฐ์š”, ํ•จ๊ป˜ ์ด์•ผ๊ธฐ ๋‚˜๋ˆ ๋ณด์‹œ์ฃ ."},
459
+ {"speaker": "์ค€์ˆ˜", "text": "๋„ค, ์ •๋ง ๊ธฐ๋Œ€๋˜๋Š”๋ฐ์š”. ์ฒญ์ทจ์ž ์—ฌ๋Ÿฌ๋ถ„๋“ค๋„ ๊ถ๊ธˆํ•ดํ•˜์‹ค ๊ฒƒ ๊ฐ™์•„์š”."},
460
+ {"speaker": "๋ฏผํ˜ธ", "text": "๋งž์•„์š”. ๊ทธ๋Ÿผ ๋ณธ๊ฒฉ์ ์œผ๋กœ ์‹œ์ž‘ํ•ด๋ณผ๊นŒ์š”?"}
461
  ]
462
  }
463
  else:
464
  return {
465
  "conversation": [
466
+ {"speaker": "Alex", "text": "Welcome everyone to our podcast! We have an fascinating topic to discuss today."},
467
+ {"speaker": "Jordan", "text": "Thanks for having me, Alex. I'm excited to dive into this subject with our listeners."},
468
+ {"speaker": "Alex", "text": "So let's get started. Can you give us an overview of what we'll be covering?"},
469
+ {"speaker": "Jordan", "text": "Absolutely! Today we'll explore some really interesting aspects that I think will surprise many people."}
470
  ]
471
  }
472
 
 
476
  raise RuntimeError("API mode not initialized")
477
 
478
  try:
479
+ # ๊ฐ•ํ™”๋œ ์–ธ์–ด๋ณ„ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
480
  if language == "Korean":
481
+ system_message = (
482
+ "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ํŒŸ์บ์ŠคํŠธ ์ „๋ฌธ ์ž‘๊ฐ€์ž…๋‹ˆ๋‹ค. "
483
+ "ํ•œ๊ตญ ์ฒญ์ทจ์ž๋“ค์˜ ๋ฌธํ™”์  ๋งฅ๋ฝ๊ณผ ์–ธ์–ด์  ํŠน์„ฑ์„ ์™„๋ฒฝํžˆ ์ดํ•ดํ•˜๊ณ , "
484
+ "์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๋งค๋ ฅ์ ์ธ ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. "
485
+ "์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€)๋ผ๋Š” ๋‘ ๋ช…์˜ 30๋Œ€ ๋‚จ์„ฑ์ด ๋Œ€ํ™”ํ•˜๋Š” ํ˜•์‹์œผ๋กœ ์ž‘์„ฑํ•˜์„ธ์š”."
486
+ )
487
  else:
488
+ system_message = (
489
+ "You are an expert podcast scriptwriter who creates engaging, "
490
+ "natural conversations between Alex (host) and Jordan (expert). "
491
+ "Create informative yet entertaining dialogue that keeps listeners engaged."
492
+ )
493
 
494
  chat_completion = self.llm_client.chat.completions.create(
495
  messages=[
 
530
  filenames = []
531
 
532
  try:
533
+ # ์–ธ์–ด๋ณ„ ์Œ์„ฑ ์„ค์ • - ํ•œ๊ตญ์–ด๋Š” ๋ชจ๋‘ ๋‚จ์„ฑ ์Œ์„ฑ
534
  if language == "Korean":
535
  voices = [
536
+ "ko-KR-HyunsuNeural", # ๋‚จ์„ฑ ์Œ์„ฑ 1 (์ฐจ๋ถ„ํ•˜๊ณ  ์‹ ๋ขฐ๊ฐ ์žˆ๋Š”)
537
+ "ko-KR-InJoonNeural" # ๋‚จ์„ฑ ์Œ์„ฑ 2 (ํ™œ๊ธฐ์ฐจ๊ณ  ์นœ๊ทผํ•œ)
 
538
  ]
539
  else:
540
  voices = [
541
+ "en-US-AndrewMultilingualNeural", # ๋‚จ์„ฑ ์Œ์„ฑ 1
542
+ "en-US-BrianMultilingualNeural" # ๋‚จ์„ฑ ์Œ์„ฑ 2
543
  ]
544
 
545
  for i, turn in enumerate(conversation_json["conversation"]):
 
591
  # Create different voice characteristics for different speakers
592
  if language == "Korean":
593
  voice_configs = [
594
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜ ํŒŸ์บ์ŠคํŠธ ์ง„ํ–‰์„ ๋งก์€ ์ค€์ˆ˜์ž…๋‹ˆ๋‹ค. ์—ฌ๋Ÿฌ๋ถ„๊ณผ ํ•จ๊ป˜ ํฅ๋ฏธ๋กœ์šด ์ด์•ผ๊ธฐ๋ฅผ ๋‚˜๋ˆ ๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.", "gender": "male"},
595
+ {"prompt_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์ €๋Š” ์˜ค๋Š˜ ์ด ์ฃผ์ œ์— ๋Œ€ํ•ด ์„ค๋ช…๋“œ๋ฆด ๋ฏผํ˜ธ์ž…๋‹ˆ๋‹ค. ์‰ฝ๊ณ  ์žฌ๋ฏธ์žˆ๊ฒŒ ์„ค๋ช…๋“œ๋ฆด๊ฒŒ์š”.", "gender": "male"}
596
  ]
597
  else:
598
  voice_configs = [
599
+ {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast. Let's explore this fascinating topic together.", "gender": "male"},
600
+ {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights on this subject with you all today.", "gender": "male"}
601
  ]
602
 
603
  for i, turn in enumerate(conversation_json["conversation"]):
 
904
  - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
905
  - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
906
  - **Status**: {"โœ… Llama CPP Available" if LLAMA_CPP_AVAILABLE else "โŒ Llama CPP Not Available - Install llama-cpp-python"}
907
+ - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
908
  """)
909
 
910
  with gr.Row():
 
968
 
969
  **ํ•œ๊ตญ์–ด ์ง€์›:**
970
  - ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ตญ์–ด ์„ ํƒ ์‹œ Edge-TTS๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
971
+ - ๐Ÿ‘จโ€๐Ÿ‘จ ํ•œ๊ตญ์–ด ๋Œ€ํ™”๋Š” ์ค€์ˆ˜(์ง„ํ–‰์ž)์™€ ๋ฏผํ˜ธ(์ „๋ฌธ๊ฐ€) ๋‘ ๋‚จ์„ฑ์ด ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค
972
  """)
973
 
974
  convert_btn = gr.Button("๐ŸŽฏ Generate Conversation / ๋Œ€ํ™” ์ƒ์„ฑ", variant="primary", size="lg")
 
977
  with gr.Column():
978
  conversation_output = gr.Textbox(
979
  label="Generated Conversation (Editable) / ์ƒ์„ฑ๋œ ๋Œ€ํ™” (ํŽธ์ง‘ ๊ฐ€๋Šฅ)",
980
+ lines=20, # ๋” ๊ธด ๋Œ€ํ™”๋ฅผ ์œ„ํ•ด ์ฆ๊ฐ€
981
+ max_lines=40,
982
  interactive=True,
983
  placeholder="Generated conversation will appear here. You can edit it before generating audio.\n์ƒ์„ฑ๋œ ๋Œ€ํ™”๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค. ์˜ค๋””์˜ค ์ƒ์„ฑ ์ „์— ํŽธ์ง‘ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
984
  info="Edit the conversation as needed. Format: 'Speaker Name: Text' / ํ•„์š”์— ๋”ฐ๋ผ ๋Œ€ํ™”๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”. ํ˜•์‹: 'ํ™”์ž ์ด๋ฆ„: ํ…์ŠคํŠธ'"