Live-Podcast

Running on Zero

App Files Files Community

openfree commited on May 25

Commit

a96c15a

verified ·

1 Parent(s): 023a9e4

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -44

app.py CHANGED Viewed

@@ -82,6 +82,9 @@ class ConversationConfig:
     # 새로운 로컬 모델 설정
     local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
     local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
 class UnifiedAudioConverter:
@@ -227,8 +230,6 @@ class UnifiedAudioConverter:
             return text
         except Exception as e:
             raise RuntimeError(f"Failed to extract text from PDF: {e}")
     def _get_messages_formatter_type(self, model_name):
         """Get appropriate message formatter for the model"""
@@ -240,41 +241,77 @@ class UnifiedAudioConverter:
     def _build_prompt(self, text: str, language: str = "English") -> str:
         """Build prompt for conversation generation"""
         if language == "Korean":
             template = """
             {
                 "conversation": [
-                    {"speaker": "", "text": ""},
-                    {"speaker": "", "text": ""}
                 ]
             }
             """
             return (
-                f"{text}\n\n제공된 텍스트를 두 명의 전문가 간의 짧고 유익하며 명확한 "
-                f"팟캐스트 대화로 변환해주세요. 톤은 전문적이고 매력적이어야 합니다. "
-                f"다음 형식을 준수하고 JSON만 반환해주세요:\n{template}"
             )
         else:
             template = """
             {
                 "conversation": [
-                    {"speaker": "", "text": ""},
-                    {"speaker": "", "text": ""}
                 ]
             }
             """
             return (
-                f"{text}\n\nConvert the provided text into a short, informative and crisp "
-                f"podcast conversation between two experts. The tone should be "
-                f"professional and engaging. Please adhere to the following "
-                f"format and return ONLY the JSON:\n{template}"
             )
     def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
         """Build messages for local LLM"""
         if language == "Korean":
-            system_message = "당신은 한국어로 팟캐스트 대화를 생성하는 전문가입니다. 자연스럽고 유익한 한국어 대화를 만들어주세요."
         else:
-            system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
         return [
             {"role": "system", "content": system_message},
@@ -291,11 +328,24 @@ class UnifiedAudioConverter:
             chat_template = self._get_messages_formatter_type(self.config.local_model_name)
             provider = LlamaCppPythonProvider(self.local_llm)
-            # 언어별 시스템 메시지
             if language == "Korean":
-                system_message = "당신은 한국어로 팟캐스트 대화를 생성하는 전문가입니다. 자연스럽고 유익한 한국어 대화를 만들어주세요. JSON 형식으로만 응답하세요."
             else:
-                system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations. Respond only in JSON format."
             agent = LlamaCppAgent(
                 provider,
@@ -305,10 +355,10 @@ class UnifiedAudioConverter:
             )
             settings = provider.get_provider_default_settings()
-            settings.temperature = 0.7
             settings.top_k = 40
             settings.top_p = 0.95
-            settings.max_tokens = 2048
             settings.repeat_penalty = 1.1
             settings.stream = False
@@ -342,11 +392,18 @@ class UnifiedAudioConverter:
         try:
             self.initialize_legacy_local_mode()
-            # 언어별 시스템 메시지
             if language == "Korean":
-                system_message = "당신은 한국어로 팟캐스트 대화를 생성하는 전문가입니다. 자연스럽고 유익한 한국어 대화를 만들어주세요."
             else:
-                system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
             chat = [
                 {"role": "system", "content": system_message},
@@ -370,7 +427,7 @@ class UnifiedAudioConverter:
             generate_kwargs = dict(
                 model_inputs,
                 streamer=streamer,
-                max_new_tokens=4000,
                 do_sample=True,
                 temperature=0.9,
                 eos_token_id=terminators,
@@ -393,19 +450,23 @@ class UnifiedAudioConverter:
         except Exception as e:
             print(f"Legacy local model also failed: {e}")
-            # Return default template
             if language == "Korean":
                 return {
                     "conversation": [
-                        {"speaker": "진행자", "text": "안녕하세요, 팟캐스트에 오신 것을 환영합니다."},
-                        {"speaker": "게스트", "text": "안녕하세요, 초대해 주셔서 감사합니다."}
                     ]
                 }
             else:
                 return {
                     "conversation": [
-                        {"speaker": "Host", "text": "Welcome to our podcast."},
-                        {"speaker": "Guest", "text": "Thank you for having me."}
                     ]
                 }
@@ -415,11 +476,20 @@ class UnifiedAudioConverter:
             raise RuntimeError("API mode not initialized")
         try:
-            # 언어별 프롬프트 구성
             if language == "Korean":
-                system_message = "당신은 한국어로 팟캐스트 대화를 생성하는 전문가입니다. 자연스럽고 유익한 한국어 대화를 만들어주세요."
             else:
-                system_message = "You are an expert at creating podcast conversations in English. Create natural and informative English conversations."
             chat_completion = self.llm_client.chat.completions.create(
                 messages=[
@@ -460,17 +530,16 @@ class UnifiedAudioConverter:
         filenames = []
         try:
-            # 언어별 음성 설정
             if language == "Korean":
                 voices = [
-                    "ko-KR-HyunsuNeural",  # 남성 음성 (자연스러운 한국어)
-                    "ko-KR-InJoonNeural"  # 남남성 음성 (자연스러운 한국어)
                 ]
             else:
                 voices = [
-                    "en-US-AvaMultilingualNeural",    # 여성 음성
-                    "en-US-AndrewMultilingualNeural"  # 남성 음성
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
@@ -522,13 +591,13 @@ class UnifiedAudioConverter:
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
-                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 진행자입니다.", "gender": "male"},
-                    {"prompt_text": "안녕하세요, 오늘 게스트로 참여하게 되어 기쁩니다.", "gender": "male"}
                 ]
             else:
                 voice_configs = [
-                    {"prompt_text": "Hello, welcome to our podcast. I'm your host today.", "gender": "female"},
-                    {"prompt_text": "Thank you for having me. I'm excited to be here.", "gender": "male"}
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
@@ -835,6 +904,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
         - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
         - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
         - **Status**: {"✅ Llama CPP Available" if LLAMA_CPP_AVAILABLE else "❌ Llama CPP Not Available - Install llama-cpp-python"}
         """)
     with gr.Row():
@@ -898,6 +968,7 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
                 **한국어 지원:**
                 - 🇰🇷 한국어 선택 시 Edge-TTS만 사용 가능합니다
                 """)
     convert_btn = gr.Button("🎯 Generate Conversation / 대화 생성", variant="primary", size="lg")
@@ -906,8 +977,8 @@ with gr.Blocks(theme='soft', title="URL/PDF to Podcast Converter") as demo:
         with gr.Column():
             conversation_output = gr.Textbox(
                 label="Generated Conversation (Editable) / 생성된 대화 (편집 가능)",
-                lines=15,
-                max_lines=30,
                 interactive=True,
                 placeholder="Generated conversation will appear here. You can edit it before generating audio.\n생성된 대화가 여기에 표시됩니다. 오디오 생성 전에 편집할 수 있습니다.",
                 info="Edit the conversation as needed. Format: 'Speaker Name: Text' / 필요에 따라 대화를 편집하세요. 형식: '화자 이름: 텍스트'"

     # 새로운 로컬 모델 설정
     local_model_name: str = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
     local_model_repo: str = "ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503"
+    # 토큰 수 증가
+    max_tokens: int = 6000  # 2048에서 6000으로 증가
+    max_new_tokens: int = 8000  # 4000에서 8000으로 증가
 class UnifiedAudioConverter:
             return text
         except Exception as e:
             raise RuntimeError(f"Failed to extract text from PDF: {e}")
     def _get_messages_formatter_type(self, model_name):
         """Get appropriate message formatter for the model"""
     def _build_prompt(self, text: str, language: str = "English") -> str:
         """Build prompt for conversation generation"""
         if language == "Korean":
+            # 강화된 한국어 프롬프트
             template = """
             {
                 "conversation": [
+                    {"speaker": "준수", "text": ""},
+                    {"speaker": "민호", "text": ""},
+                    {"speaker": "준수", "text": ""},
+                    {"speaker": "민호", "text": ""}
                 ]
             }
             """
             return (
+                f"{text}\n\n"
+                f"위 내용을 바탕으로 30대 한국인 두 명이 진행하는 자연스럽고 흥미로운 한국어 팟캐스트 대화를 만들어주세요.\n\n"
+                f"필수 지침:\n"
+                f"1. 준수(진행자): 친근하고 호기심 많은 성격, 청취자의 궁금증을 대변\n"
+                f"2. 민호(전문가): 해당 주제에 대한 깊은 지식을 가진 전문가, 쉽게 설명하는 능력\n"
+                f"3. 한국인이 실제로 사용하는 자연스러운 표현과 감탄사 사용 ('아~', '그렇구나', '와~', '진짜요?')\n"
+                f"4. 적절한 존댓말과 편안한 반말을 섞어 친밀감 조성\n"
+                f"5. 한국 문화와 일상에 맞는 구체적인 예시와 비유 사용\n"
+                f"6. 각 대화는 충분히 길고 상세하게 (최소 3-4문장 이상)\n"
+                f"7. 전체 대화는 최소 10회 이상의 주고받기로 구성\n"
+                f"8. 청취자가 '나도 궁금했던 내용이야'라고 공감할 수 있는 질문 포함\n"
+                f"9. 핵심 정보를 자연스럽게 전달하면서도 지루하지 않게 구성\n"
+                f"10. 마무리는 핵심 내용 요약과 청취자에게 도움이 되는 실용적 조언\n\n"
+                f"다음 JSON 형식으로만 반환하세요:\n{template}"
             )
         else:
             template = """
             {
                 "conversation": [
+                    {"speaker": "Alex", "text": ""},
+                    {"speaker": "Jordan", "text": ""},
+                    {"speaker": "Alex", "text": ""},
+                    {"speaker": "Jordan", "text": ""}
                 ]
             }
             """
             return (
+                f"{text}\n\n"
+                f"Convert the provided text into an engaging, natural podcast conversation between two experts.\n\n"
+                f"Guidelines:\n"
+                f"1. Alex (Host): Curious, engaging personality representing audience questions\n"
+                f"2. Jordan (Expert): Knowledgeable but approachable, explains complex topics simply\n"
+                f"3. Use natural conversational English with appropriate reactions ('Wow', 'That's interesting', 'Really?')\n"
+                f"4. Include concrete examples and relatable analogies\n"
+                f"5. Each response should be substantial (minimum 3-4 sentences)\n"
+                f"6. Create at least 10 back-and-forth exchanges\n"
+                f"7. Address common questions and misconceptions\n"
+                f"8. Maintain an informative yet entertaining tone\n"
+                f"9. End with key takeaways and practical advice\n\n"
+                f"Return ONLY the JSON in this format:\n{template}"
             )
     def _build_messages_for_local(self, text: str, language: str = "English") -> List[Dict]:
         """Build messages for local LLM"""
         if language == "Korean":
+            system_message = (
+                "당신은 한국 최고의 팟캐스트 대본 작가입니다. "
+                "한국인의 정서와 문화를 완벽히 이해하고, 청취자들이 끝까지 집중할 수 있는 "
+                "매력적이고 유익한 대화를 만들어냅니다. "
+                "실제 한국인들이 일상에서 사용하는 자연스러운 표현과 "
+                "적절한 감정 표현을 통해 생동감 있는 대화를 구성합니다."
+            )
         else:
+            system_message = (
+                "You are an expert podcast scriptwriter who creates engaging, "
+                "natural conversations that keep listeners hooked. "
+                "You understand how to balance information with entertainment, "
+                "using real conversational patterns and authentic reactions."
+            )
         return [
             {"role": "system", "content": system_message},
             chat_template = self._get_messages_formatter_type(self.config.local_model_name)
             provider = LlamaCppPythonProvider(self.local_llm)
+            # 강화된 언어별 시스템 메시지
             if language == "Korean":
+                system_message = (
+                    "당신은 한국어 팟캐스트 전문 작가입니다. "
+                    "한국 청취자들의 문화적 맥락과 언어적 특성을 완벽히 이해하고, "
+                    "자연스럽고 매력적인 대본을 작성합니다. "
+                    "실제 한국인이 대화하는 것처럼 자연스러운 표현, 적절한 감탄사, "
+                    "문화적으로 적합한 예시를 사용하여 청취자가 공감하고 몰입할 수 있는 "
+                    "대화를 만들어주세요. JSON 형식으로만 응답하세요."
+                )
             else:
+                system_message = (
+                    "You are an expert podcast scriptwriter specializing in creating "
+                    "engaging, natural conversations that captivate listeners. "
+                    "You excel at transforming complex information into accessible, "
+                    "entertaining dialogue while maintaining authenticity and educational value. "
+                    "Respond only in JSON format."
+                )
             agent = LlamaCppAgent(
                 provider,
             )
             settings = provider.get_provider_default_settings()
+            settings.temperature = 0.8  # 약간 높여서 더 자연스러운 대화 생성
             settings.top_k = 40
             settings.top_p = 0.95
+            settings.max_tokens = self.config.max_tokens  # 증가된 토큰 수 사용
             settings.repeat_penalty = 1.1
             settings.stream = False
         try:
             self.initialize_legacy_local_mode()
+            # 강화된 언어별 시스템 메시지
             if language == "Korean":
+                system_message = (
+                    "당신은 한국어 팟캐스트 전문 작가입니다. "
+                    "30대 한국인 청취자를 대상으로 자연스럽고 흥미로운 대화를 만들어주세요. "
+                    "실제 사용하는 한국어 표현과 문화적 맥락을 반영하여 작성해주세요."
+                )
             else:
+                system_message = (
+                    "You are an expert podcast scriptwriter. "
+                    "Create natural, engaging conversations that inform and entertain listeners."
+                )
             chat = [
                 {"role": "system", "content": system_message},
             generate_kwargs = dict(
                 model_inputs,
                 streamer=streamer,
+                max_new_tokens=self.config.max_new_tokens,  # 증가된 토큰 수 사용
                 do_sample=True,
                 temperature=0.9,
                 eos_token_id=terminators,
         except Exception as e:
             print(f"Legacy local model also failed: {e}")
+            # Return default template with Korean male names
             if language == "Korean":
                 return {
                     "conversation": [
+                        {"speaker": "준수", "text": "안녕하세요, 여러분! 오늘도 저희 팟캐스트를 찾아주셔서 정말 감사합니다."},
+                        {"speaker": "민호", "text": "안녕하세요! 오늘은 정말 흥미로운 주제를 준비했는데요, 함께 이야기 나눠보시죠."},
+                        {"speaker": "준수", "text": "네, 정말 기대되는데요. 청취자 여러분들도 궁금해하실 것 같아요."},
+                        {"speaker": "민호", "text": "맞아요. 그럼 본격적으로 시작해볼까요?"}
                     ]
                 }
             else:
                 return {
                     "conversation": [
+                        {"speaker": "Alex", "text": "Welcome everyone to our podcast! We have an fascinating topic to discuss today."},
+                        {"speaker": "Jordan", "text": "Thanks for having me, Alex. I'm excited to dive into this subject with our listeners."},
+                        {"speaker": "Alex", "text": "So let's get started. Can you give us an overview of what we'll be covering?"},
+                        {"speaker": "Jordan", "text": "Absolutely! Today we'll explore some really interesting aspects that I think will surprise many people."}
                     ]
                 }
             raise RuntimeError("API mode not initialized")
         try:
+            # 강화된 언어별 프롬프트 구성
             if language == "Korean":
+                system_message = (
+                    "당신은 한국어 팟캐스트 전문 작가입니다. "
+                    "한국 청취자들의 문화적 맥락과 언어적 특성을 완벽히 이해하고, "
+                    "자연스럽고 매력적인 대본을 작성합니다. "
+                    "준수(진행자)와 민호(전문가)라는 두 명의 30대 남성이 대화하는 형식으로 작성하세요."
+                )
             else:
+                system_message = (
+                    "You are an expert podcast scriptwriter who creates engaging, "
+                    "natural conversations between Alex (host) and Jordan (expert). "
+                    "Create informative yet entertaining dialogue that keeps listeners engaged."
+                )
             chat_completion = self.llm_client.chat.completions.create(
                 messages=[
         filenames = []
         try:
+            # 언어별 음성 설정 - 한국어는 모두 남성 음성
             if language == "Korean":
                 voices = [
+                    "ko-KR-HyunsuNeural",  # 남성 음성 1 (차분하고 신뢰감 있는)
+                    "ko-KR-InJoonNeural"   # 남성 음성 2 (활기차고 친근한)
                 ]
             else:
                 voices = [
+                    "en-US-AndrewMultilingualNeural",    # 남성 음성 1
+                    "en-US-BrianMultilingualNeural"      # 남성 음성 2
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
             # Create different voice characteristics for different speakers
             if language == "Korean":
                 voice_configs = [
+                    {"prompt_text": "안녕하세요, 오늘 팟캐스트 진행을 맡은 준수입니다. 여러분과 함께 흥미로운 이야기를 나눠보겠습니다.", "gender": "male"},
+                    {"prompt_text": "안녕하세요, 저는 오늘 이 주제에 대해 설명드릴 민호입니다. 쉽고 재미있게 설명드릴게요.", "gender": "male"}
                 ]
             else:
                 voice_configs = [
+                    {"prompt_text": "Hello everyone, I'm Alex, your host for today's podcast. Let's explore this fascinating topic together.", "gender": "male"},
+                    {"prompt_text": "Hi, I'm Jordan. I'm excited to share my insights on this subject with you all today.", "gender": "male"}
                 ]
             for i, turn in enumerate(conversation_json["conversation"]):
         - **Primary**: Local LLM ({converter.config.local_model_name}) - Runs on your device
         - **Fallback**: API LLM ({converter.config.api_model_name}) - Used when local fails
         - **Status**: {"✅ Llama CPP Available" if LLAMA_CPP_AVAILABLE else "❌ Llama CPP Not Available - Install llama-cpp-python"}
+        - **Max Tokens**: {converter.config.max_tokens} (Extended for longer conversations)
         """)
     with gr.Row():
                 **한국어 지원:**
                 - 🇰🇷 한국어 선택 시 Edge-TTS만 사용 가능합니다
+                - 👨‍👨 한국어 대화는 준수(진행자)와 민호(전문가) 두 남성이 진행합니다
                 """)
     convert_btn = gr.Button("🎯 Generate Conversation / 대화 생성", variant="primary", size="lg")
         with gr.Column():
             conversation_output = gr.Textbox(
                 label="Generated Conversation (Editable) / 생성된 대화 (편집 가능)",
+                lines=20,  # 더 긴 대화를 위해 증가
+                max_lines=40,
                 interactive=True,
                 placeholder="Generated conversation will appear here. You can edit it before generating audio.\n생성된 대화가 여기에 표시됩니다. 오디오 생성 전에 편집할 수 있습니다.",
                 info="Edit the conversation as needed. Format: 'Speaker Name: Text' / 필요에 따라 대화를 편집하세요. 형식: '화자 이름: 텍스트'"