podcastgen

Sleeping

App Files Files Community

Rausda6 commited on 19 days ago

Commit

4b51c12

verified ·

1 Parent(s): 413618e

Update app.py

Browse files

Files changed (1) hide show

app.py +467 -192

app.py CHANGED Viewed

@@ -10,8 +10,11 @@ import time
 import mimetypes
 import torch
 import re
-from typing import List, Dict
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 # Constants
 MAX_FILE_SIZE_MB = 20
@@ -19,36 +22,77 @@ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
 MODEL_ID = "unsloth/gemma-3-1b-pt"
-# Initialize model with proper error handling
-try:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto",
-        trust_remote_code=True
-    ).eval()
-    # Configure generation parameters
-    generation_config = GenerationConfig(
-        max_new_tokens=1024,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        pad_token_id=tokenizer.pad_token_id,
-        eos_token_id=tokenizer.eos_token_id,
-    )
-    print(f"Model loaded successfully on device: {model.device}")
-except Exception as e:
-    print(f"Model initialization error: {e}")
-    model = None
-    tokenizer = None
-    generation_config = None
 class PodcastGenerator:
     def __init__(self):
@@ -56,263 +100,471 @@ class PodcastGenerator:
         self.tokenizer = tokenizer
         self.generation_config = generation_config
-    def extract_json_from_text(self, text: str) -> Dict:
-        """Extract JSON from model output using regex patterns"""
-        # Remove the input prompt from the output
-        # Look for JSON-like structures
-        json_patterns = [
-            r'\{[^{}]*"topic"[^{}]*"podcast"[^{}]*\[.*?\]\s*\}',
             r'\{.*?"topic".*?"podcast".*?\[.*?\].*?\}',
         ]
-        for pattern in json_patterns:
             matches = re.findall(pattern, text, re.DOTALL | re.IGNORECASE)
             for match in matches:
                 try:
-                    # Clean up the match
-                    cleaned_match = match.strip()
-                    return json.loads(cleaned_match)
-                except json.JSONDecodeError:
                     continue
-        # If no valid JSON found, create a fallback structure
         return self.create_fallback_podcast(text)
     def create_fallback_podcast(self, text: str) -> Dict:
-        """Create a basic podcast structure when JSON parsing fails"""
-        # Extract meaningful sentences from the text
-        sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 10]
         if not sentences:
-            sentences = ["Let's discuss this interesting topic.", "That's a great point to consider."]
         podcast_lines = []
-        for i, sentence in enumerate(sentences[:10]):  # Limit to 10 exchanges
             speaker = (i % 2) + 1
             podcast_lines.append({
                 "speaker": speaker,
-                "line": sentence + "." if not sentence.endswith('.') else sentence
             })
-        return {
             "topic": "Generated Discussion",
             "podcast": podcast_lines
         }
     async def generate_script(self, prompt: str, language: str, file_obj=None, progress=None) -> Dict:
-        if not self.model or not self.tokenizer:
-            raise Exception("Model not properly initialized. Please check model loading.")
         example_json = {
-            "topic": "AGI",
             "podcast": [
-                {"speaker": 1, "line": "So, AGI, huh? Seems like everyone's talking about it these days."},
-                {"speaker": 2, "line": "Yeah, it's definitely having a moment, isn't it?"},
-                {"speaker": 1, "line": "It really is. What got you hooked on this topic?"},
-                {"speaker": 2, "line": "The potential implications are fascinating and concerning at the same time."}
             ]
         }
-        if language == "Auto Detect":
-            language_instruction = "Use the same language as the input text"
-        else:
-            language_instruction = f"Generate the podcast in {language} language"
-        # Simplified, more direct prompt
-        system_prompt = f"""Generate a podcast script as valid JSON. {language_instruction}.
 Requirements:
 - Exactly 2 speakers (speaker 1 and 2)
-- Natural, engaging conversation
-- JSON format only
-Example format:
-{json.dumps(example_json, indent=2)}
-Input topic: {prompt}
-Generate JSON:"""
         try:
             if progress:
-                progress(0.3, "Generating podcast script...")
-            # Tokenize with proper attention mask
             inputs = self.tokenizer(
-                system_prompt,
-                return_tensors="pt",
-                padding=True,
                 truncation=True,
-                max_length=2048
             )
             inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
-            # Generate with timeout
             with torch.no_grad():
-                output = self.model.generate(
                     **inputs,
                     generation_config=self.generation_config,
                     pad_token_id=self.tokenizer.pad_token_id,
                 )
-            logs.append("✅ calling llm")
-            # Decode only the new tokens
             generated_text = self.tokenizer.decode(
-                output[0][inputs['input_ids'].shape[1]:],
-                skip_special_tokens=True
             )
-            logs.append("✅ generated text")
-            print(f"Generated text: {generated_text[:500]}...")
             if progress:
-                progress(0.4, "Processing generated script...")
-            # Extract JSON from the generated text
-            result = self.extract_json_from_text(generated_text)
             if progress:
-                progress(0.5, "Script generated successfully!")
             return result
         except Exception as e:
-            print(f"Generation error: {e}")
-            # Return fallback podcast
-            return {
-                "topic": prompt or "Discussion",
-                "podcast": [
-                    {"speaker": 1, "line": f"Welcome to our discussion about {prompt or 'this topic'}."},
-                    {"speaker": 2, "line": "Thanks for having me. This is indeed an interesting subject."},
-                    {"speaker": 1, "line": "Let's dive into the key points and explore different perspectives."},
-                    {"speaker": 2, "line": "Absolutely. There's a lot to unpack here."},
-                    {"speaker": 1, "line": "What aspects do you find most compelling?"},
-                    {"speaker": 2, "line": "The implications and potential applications are fascinating."},
-                    {"speaker": 1, "line": "That's a great point. Thanks for the insightful discussion."},
-                    {"speaker": 2, "line": "Thank you. This has been a valuable conversation."}
-                ]
-            }
     async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
-        """Generate TTS audio with improved error handling"""
         voice = speaker1 if speaker == 1 else speaker2
-        speech = edge_tts.Communicate(text, voice)
-        temp_filename = f"temp_audio_{uuid.uuid4()}.wav"
         max_retries = 3
         for attempt in range(max_retries):
             try:
-                await asyncio.wait_for(speech.save(temp_filename), timeout=30)
-                if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 0:
                     return temp_filename
                 else:
-                    raise Exception("Generated audio file is empty")
             except asyncio.TimeoutError:
                 if os.path.exists(temp_filename):
                     os.remove(temp_filename)
                 if attempt == max_retries - 1:
                     raise Exception("TTS generation timed out after multiple attempts")
-                await asyncio.sleep(1)  # Brief delay before retry
             except Exception as e:
                 if os.path.exists(temp_filename):
                     os.remove(temp_filename)
                 if attempt == max_retries - 1:
-                    raise Exception(f"TTS generation failed: {str(e)}")
-                await asyncio.sleep(1)
     async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
-        """Combine audio files with silence padding"""
         if progress:
-            progress(0.9, "Combining audio files...")
         try:
             combined_audio = AudioSegment.empty()
-            silence_padding = AudioSegment.silent(duration=500)  # 500ms silence
             for i, audio_file in enumerate(audio_files):
                 try:
                     audio_segment = AudioSegment.from_file(audio_file)
                     combined_audio += audio_segment
                     # Add silence between speakers (except for the last file)
                     if i < len(audio_files) - 1:
                         combined_audio += silence_padding
                 except Exception as e:
-                    print(f"Warning: Could not process audio file {audio_file}: {e}")
                 finally:
                     # Clean up temporary file
-                    if os.path.exists(audio_file):
-                        os.remove(audio_file)
             if len(combined_audio) == 0:
-                raise Exception("No audio content generated")
-            output_filename = f"podcast_output_{uuid.uuid4()}.wav"
             combined_audio.export(output_filename, format="wav")
             if progress:
-                progress(1.0, "Podcast generated successfully!")
             return output_filename
         except Exception as e:
             # Clean up any remaining temp files
             for audio_file in audio_files:
-                if os.path.exists(audio_file):
-                    os.remove(audio_file)
-            raise Exception(f"Audio combination failed: {str(e)}")
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, file_obj=None, progress=None) -> str:
-        """Main podcast generation pipeline with improved error handling"""
         try:
             if progress:
-                progress(0.1, "Starting podcast generation...")
             # Generate script
             podcast_json = await self.generate_script(input_text, language, file_obj, progress)
-            if not podcast_json.get('podcast'):
-                raise Exception("No podcast content generated")
-            logs.append("✅ process input 1")
             if progress:
-                progress(0.5, "Converting text to speech...")
-            # Generate TTS with sequential processing to avoid overload
             audio_files = []
             total_lines = len(podcast_json['podcast'])
             for i, item in enumerate(podcast_json['podcast']):
                 try:
                     audio_file = await self.tts_generate(
                         item['line'],
                         item['speaker'],
                         speaker1,
                         speaker2
                     )
                     audio_files.append(audio_file)
                     # Update progress
                     if progress:
                         current_progress = 0.5 + (0.4 * (i + 1) / total_lines)
-                        progress(current_progress, f"Generated speech {i + 1}/{total_lines}")
                 except Exception as e:
-                    print(f"TTS error for line {i}: {e}")
-                    # Continue with remaining lines
                     continue
             if not audio_files:
-                raise Exception("No audio files generated successfully")
             # Combine audio files
             combined_audio = await self.combine_audio_files(audio_files, progress)
             return combined_audio
         except Exception as e:
-            raise Exception(f"Podcast generation failed: {str(e)}")
 # Voice mapping
 VOICE_MAPPING = {
@@ -327,72 +579,107 @@ VOICE_MAPPING = {
 }
 async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, progress=None) -> str:
-    """Process input and generate podcast"""
-    start_time = time.time()
     try:
         if progress:
-            progress(0.05, "Processing input...")
         # Map speaker names to voice IDs
         speaker1_voice = VOICE_MAPPING.get(speaker1, "en-US-AndrewMultilingualNeural")
         speaker2_voice = VOICE_MAPPING.get(speaker2, "en-US-AvaMultilingualNeural")
-        logs.append("✅ process input 1")
         # Validate input
         if not input_text or input_text.strip() == "":
             if input_file is None:
-                raise Exception("Please provide either text input or upload a file")
-            # TODO: Add file processing logic here if needed
         podcast_generator = PodcastGenerator()
         result = await podcast_generator.generate_podcast(
             input_text, language, speaker1_voice, speaker2_voice, input_file, progress
         )
-        logs.append("✅ process input 2")
-        end_time = time.time()
-        print(f"Total generation time: {end_time - start_time:.2f} seconds")
         return result
     except Exception as e:
-        error_msg = str(e)
-        print(f"Processing error: {error_msg}")
-        raise Exception(f"Generation failed: {error_msg}")
 def generate_podcast_gradio(input_text, input_file, language, speaker1, speaker2):
     try:
         # Validate inputs
         if not input_text and input_file is None:
-            return None
         if input_text and len(input_text.strip()) == 0:
             input_text = None
-        logs.append("✅ File processing 1")
-        # Create a simple progress tracker
-        progress_history = []
         def progress_callback(value, text):
-            progress_history.append(f"{value:.1%}: {text}")
-            print(f"Progress: {value:.1%} - {text}")
-        logs.append("✅ File processing 2")
-        # Run the async function
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
         try:
-            result = loop.run_until_complete(
                 process_input(input_text, input_file, language, speaker1, speaker2, progress_callback)
             )
-            return result
-        finally:
-            loop.close()
     except Exception as e:
-        print(f"Gradio function error: {e}")
-        raise gr.Error(f"Failed to generate podcast: {str(e)}")
 def create_interface():
-    """Create the Gradio interface with proper component configuration"""
     language_options = [
         "Auto Detect", "English", "German", "French", "Spanish", "Italian",
         "Portuguese", "Dutch", "Russian", "Chinese", "Japanese", "Korean"
@@ -409,6 +696,12 @@ def create_interface():
         gr.Markdown("# 🎙️ PodcastGen 2")
         gr.Markdown("Generate professional 2-speaker podcasts from text input!")
         with gr.Row():
             with gr.Column(scale=2):
                 input_text = gr.Textbox(
@@ -423,7 +716,7 @@ def create_interface():
                     label="Upload File (Optional)",
                     file_types=[".pdf", ".txt"],
                     type="filepath",
-                    #info=f"Max size: {MAX_FILE_SIZE_MB}MB"
                 )
         with gr.Row():
@@ -449,13 +742,15 @@ def create_interface():
         generate_btn = gr.Button(
             "🎙️ Generate Podcast",
             variant="primary",
-            size="lg"
         )
         log_output = gr.Textbox(
             label="🪵 Debug & Transcript Log",
             lines=15,
-            interactive=False
         )
         output_audio = gr.Audio(
@@ -469,32 +764,12 @@ def create_interface():
         generate_btn.click(
             fn=generate_podcast_gradio,
             inputs=[input_text, input_file, language, speaker1, speaker2],
-            outputs=[output_audio],
             show_progress=True
         )
         # Add usage instructions
-        with gr.Accordion("Usage Instructions", open=False):
             gr.Markdown("""
             ### How to use:
-            1. **Input**: Enter your topic or text in the text box, or upload a PDF/TXT file
-            2. **Language**: Choose the output language (Auto Detect recommended)
-            3. **Voices**: Select different voices for Speaker 1 and Speaker 2
-            4. **Generate**: Click the button and wait for processing
-            ### Tips:
-            - Provide clear, specific topics for better results
-            - The AI will create a natural conversation between two speakers
-            - Generation may take 1-3 minutes depending on text length
-            """)
-    return demo
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        share=False
-    )

 import mimetypes
 import torch
 import re
+from typing import List, Dict, Optional
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
+import PyPDF2
+import io
+import traceback
 # Constants
 MAX_FILE_SIZE_MB = 20
 MODEL_ID = "unsloth/gemma-3-1b-pt"
+# Global logging system - CRITICAL FIX #1
+logs = []
+def add_log(message):
+    """Thread-safe logging function"""
+    logs.append(f"[{time.strftime('%H:%M:%S')}] {message}")
+    print(message)
+# Initialize model with comprehensive error handling - CRITICAL FIX #2
+model = None
+tokenizer = None
+generation_config = None
+def initialize_model():
+    """Separate model initialization with better error handling"""
+    global model, tokenizer, generation_config
+    try:
+        add_log("🔄 Initializing model...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_ID,
+            trust_remote_code=True,
+            use_fast=False  # Sometimes fast tokenizers cause issues
+        )
+        # Ensure proper padding token
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+            add_log("✅ Set pad_token to eos_token")
+        # Load model with proper device management
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch_dtype,
+            device_map="auto" if torch.cuda.is_available() else None,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        if not torch.cuda.is_available():
+            model = model.to(device)
+        model.eval()
+        # Configure generation parameters
+        generation_config = GenerationConfig(
+            max_new_tokens=512,  # Reduced for stability
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            repetition_penalty=1.1,
+            length_penalty=1.0
+        )
+        add_log(f"✅ Model loaded successfully on device: {model.device}")
+        return True
+    except Exception as e:
+        error_msg = f"❌ Model initialization failed: {str(e)}"
+        add_log(error_msg)
+        add_log(f"Traceback: {traceback.format_exc()}")
+        return False
+# Initialize model at startup
+model_loaded = initialize_model()
 class PodcastGenerator:
     def __init__(self):
         self.tokenizer = tokenizer
         self.generation_config = generation_config
+    def extract_text_from_pdf(self, file_path: str) -> str:
+        """Extract text from PDF file - CRITICAL FIX #3"""
+        try:
+            add_log(f"📖 Extracting text from PDF: {file_path}")
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page_num, page in enumerate(pdf_reader.pages):
+                    try:
+                        page_text = page.extract_text()
+                        text += page_text + "\n"
+                        add_log(f"✅ Extracted page {page_num + 1}")
+                    except Exception as e:
+                        add_log(f"⚠️ Failed to extract page {page_num + 1}: {e}")
+                        continue
+                if not text.strip():
+                    raise Exception("No text could be extracted from PDF")
+                add_log(f"✅ PDF extraction complete. Text length: {len(text)} characters")
+                return text.strip()
+        except Exception as e:
+            error_msg = f"❌ PDF extraction failed: {str(e)}"
+            add_log(error_msg)
+            raise Exception(error_msg)
+    def clean_and_validate_json(self, text: str) -> Dict:
+        """Improved JSON extraction and validation - CRITICAL FIX #4"""
+        add_log("🔍 Attempting to extract JSON from generated text")
+        # Multiple strategies for JSON extraction
+        strategies = [
+            # Strategy 1: Look for complete JSON objects
+            r'\{[^{}]*"topic"[^{}]*"podcast"[^{}]*\[[^\]]*\][^{}]*\}',
+            # Strategy 2: More flexible pattern
             r'\{.*?"topic".*?"podcast".*?\[.*?\].*?\}',
+            # Strategy 3: Extract content between first { and last }
+            r'\{.*\}'
         ]
+        for i, pattern in enumerate(strategies):
+            add_log(f"🎯 Trying extraction strategy {i+1}")
             matches = re.findall(pattern, text, re.DOTALL | re.IGNORECASE)
             for match in matches:
                 try:
+                    # Clean the match
+                    cleaned = match.strip()
+                    # Fix common JSON issues
+                    cleaned = re.sub(r',\s*}', '}', cleaned)  # Remove trailing commas
+                    cleaned = re.sub(r',\s*]', ']', cleaned)  # Remove trailing commas in arrays
+                    parsed = json.loads(cleaned)
+                    # Validate structure
+                    if self.validate_podcast_structure(parsed):
+                        add_log("✅ Valid JSON structure found")
+                        return parsed
+                except json.JSONDecodeError as e:
+                    add_log(f"⚠️ JSON parse error in strategy {i+1}: {e}")
                     continue
+        add_log("⚠️ No valid JSON found, creating fallback")
         return self.create_fallback_podcast(text)
+    def validate_podcast_structure(self, data: Dict) -> bool:
+        """Validate podcast JSON structure"""
+        try:
+            if not isinstance(data, dict):
+                return False
+            if 'topic' not in data or 'podcast' not in data:
+                return False
+            if not isinstance(data['podcast'], list):
+                return False
+            for item in data['podcast']:
+                if not isinstance(item, dict):
+                    return False
+                if 'speaker' not in item or 'line' not in item:
+                    return False
+                if not isinstance(item['speaker'], int) or item['speaker'] not in [1, 2]:
+                    return False
+                if not isinstance(item['line'], str) or len(item['line'].strip()) == 0:
+                    return False
+            return len(data['podcast']) > 0
+        except Exception:
+            return False
     def create_fallback_podcast(self, text: str) -> Dict:
+        """Create fallback podcast structure - IMPROVED"""
+        add_log("🔧 Creating fallback podcast structure")
+        # Extract meaningful content from the original text
+        sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 20]
         if not sentences:
+            sentences = [
+                "Welcome to our podcast discussion",
+                "Today we're exploring an interesting topic",
+                "Let's dive into the key points",
+                "That's a fascinating perspective",
+                "What are your thoughts on this matter",
+                "I think there are multiple angles to consider",
+                "This is definitely worth exploring further",
+                "Thank you for this engaging conversation"
+            ]
+        # Create balanced conversation
         podcast_lines = []
+        for i, sentence in enumerate(sentences[:12]):  # Limit to 12 exchanges
             speaker = (i % 2) + 1
+            line = sentence + "." if not sentence.endswith('.') else sentence
             podcast_lines.append({
                 "speaker": speaker,
+                "line": line
             })
+        result = {
             "topic": "Generated Discussion",
             "podcast": podcast_lines
         }
+        add_log(f"✅ Fallback podcast created with {len(podcast_lines)} lines")
+        return result
     async def generate_script(self, prompt: str, language: str, file_obj=None, progress=None) -> Dict:
+        """Improved script generation with better error handling"""
+        if not model_loaded or not self.model or not self.tokenizer:
+            raise Exception("❌ Model not properly initialized. Please restart the application.")
+        add_log("🎬 Starting script generation")
+        # Process file if provided - CRITICAL FIX #5
+        if file_obj is not None:
+            try:
+                add_log(f"📁 Processing uploaded file: {file_obj}")
+                if file_obj.endswith('.pdf'):
+                    extracted_text = self.extract_text_from_pdf(file_obj)
+                    # Truncate if too long
+                    if len(extracted_text) > 2000:
+                        extracted_text = extracted_text[:2000] + "..."
+                        add_log("✂️ Text truncated to 2000 characters")
+                    prompt = f"Create a podcast discussion about this content: {extracted_text}"
+                elif file_obj.endswith('.txt'):
+                    with open(file_obj, 'r', encoding='utf-8') as f:
+                        file_content = f.read()
+                    if len(file_content) > 2000:
+                        file_content = file_content[:2000] + "..."
+                    prompt = f"Create a podcast discussion about this content: {file_content}"
+            except Exception as e:
+                add_log(f"⚠️ File processing error: {e}")
+                # Continue with original prompt
+        # Create focused prompt - CRITICAL FIX #6
         example_json = {
+            "topic": "AI Technology",
             "podcast": [
+                {"speaker": 1, "line": "Welcome to our discussion about AI technology."},
+                {"speaker": 2, "line": "Thanks for having me. This is such an exciting field."},
+                {"speaker": 1, "line": "What aspects of AI do you find most interesting?"},
+                {"speaker": 2, "line": "I'm particularly fascinated by machine learning applications."}
             ]
         }
+        language_instruction = f"Generate in {language}" if language != "Auto Detect" else "Use appropriate language"
+        # Simplified and more reliable prompt
+        system_prompt = f"""Create a podcast script in valid JSON format.
 Requirements:
 - Exactly 2 speakers (speaker 1 and 2)
+- Natural conversation style
+- 6-8 exchanges total
+- {language_instruction}
+Example JSON structure:
+{json.dumps(example_json)}
+Topic: {prompt}
+JSON:"""
         try:
             if progress:
+                progress(0.3, "🤖 Generating script...")
+            add_log("🔤 Tokenizing input...")
+            # Tokenize with proper handling
             inputs = self.tokenizer(
+                system_prompt,
+                return_tensors="pt",
+                padding=True,
                 truncation=True,
+                max_length=1024  # Reduced for stability
             )
+            # Move to correct device
             inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+            add_log(f"✅ Inputs moved to device: {self.model.device}")
+            add_log("��� Generating with model...")
+            # Generate with timeout and better parameters
             with torch.no_grad():
+                torch.cuda.empty_cache() if torch.cuda.is_available() else None
+                outputs = self.model.generate(
                     **inputs,
                     generation_config=self.generation_config,
                     pad_token_id=self.tokenizer.pad_token_id,
+                    attention_mask=inputs.get('attention_mask'),
+                    use_cache=True
                 )
+            add_log("✅ Model generation complete")
+            # Decode only new tokens
             generated_text = self.tokenizer.decode(
+                outputs[0][inputs['input_ids'].shape[1]:],
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=True
             )
+            add_log(f"📝 Generated text length: {len(generated_text)} characters")
+            add_log(f"🔍 Generated text preview: {generated_text[:200]}...")
             if progress:
+                progress(0.4, "🔍 Processing generated script...")
+            # Extract and validate JSON
+            result = self.clean_and_validate_json(generated_text)
             if progress:
+                progress(0.5, "✅ Script generated successfully!")
+            add_log(f"✅ Final script has {len(result.get('podcast', []))} lines")
             return result
         except Exception as e:
+            error_msg = f"❌ Script generation error: {str(e)}"
+            add_log(error_msg)
+            add_log(f"🔍 Traceback: {traceback.format_exc()}")
+            # Return robust fallback
+            return self.create_fallback_podcast(prompt or "General Discussion")
     async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
+        """Improved TTS generation with better error handling - CRITICAL FIX #7"""
         voice = speaker1 if speaker == 1 else speaker2
+        add_log(f"🎙️ Generating TTS for speaker {speaker} with voice {voice}")
+        # Clean text for TTS
+        text = text.strip()
+        if not text:
+            raise Exception("Empty text for TTS")
+        # Remove problematic characters
+        text = re.sub(r'[^\w\s.,!?;:\-\'"()]', '', text)
+        temp_filename = f"temp_audio_{uuid.uuid4().hex[:8]}.wav"
         max_retries = 3
         for attempt in range(max_retries):
             try:
+                add_log(f"🎵 TTS attempt {attempt + 1} for: {text[:50]}...")
+                communicate = edge_tts.Communicate(text, voice)
+                # Use asyncio.wait_for with timeout
+                await asyncio.wait_for(
+                    communicate.save(temp_filename),
+                    timeout=30.0
+                )
+                # Verify file was created and has content
+                if os.path.exists(temp_filename) and os.path.getsize(temp_filename) > 1000:
+                    add_log(f"✅ TTS successful: {os.path.getsize(temp_filename)} bytes")
                     return temp_filename
                 else:
+                    raise Exception("Generated audio file is too small or empty")
             except asyncio.TimeoutError:
+                add_log(f"⏰ TTS timeout on attempt {attempt + 1}")
                 if os.path.exists(temp_filename):
                     os.remove(temp_filename)
                 if attempt == max_retries - 1:
                     raise Exception("TTS generation timed out after multiple attempts")
+                await asyncio.sleep(2)
             except Exception as e:
+                add_log(f"❌ TTS error on attempt {attempt + 1}: {str(e)}")
                 if os.path.exists(temp_filename):
                     os.remove(temp_filename)
                 if attempt == max_retries - 1:
+                    raise Exception(f"TTS generation failed after {max_retries} attempts: {str(e)}")
+                await asyncio.sleep(2)
     async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
+        """Improved audio combination - CRITICAL FIX #8"""
         if progress:
+            progress(0.9, "🎵 Combining audio files...")
+        add_log(f"🔗 Combining {len(audio_files)} audio files")
         try:
             combined_audio = AudioSegment.empty()
+            silence_padding = AudioSegment.silent(duration=800)  # 800ms silence
             for i, audio_file in enumerate(audio_files):
                 try:
+                    add_log(f"📁 Processing audio file {i+1}: {audio_file}")
+                    if not os.path.exists(audio_file):
+                        add_log(f"⚠️ Audio file not found: {audio_file}")
+                        continue
+                    file_size = os.path.getsize(audio_file)
+                    add_log(f"📊 File size: {file_size} bytes")
+                    if file_size < 1000:
+                        add_log(f"⚠️ Audio file too small, skipping: {audio_file}")
+                        continue
                     audio_segment = AudioSegment.from_file(audio_file)
+                    if len(audio_segment) < 100:  # Less than 100ms
+                        add_log(f"⚠️ Audio segment too short, skipping")
+                        continue
                     combined_audio += audio_segment
                     # Add silence between speakers (except for the last file)
                     if i < len(audio_files) - 1:
                         combined_audio += silence_padding
+                    add_log(f"✅ Added audio segment {i+1}, total duration: {len(combined_audio)}ms")
                 except Exception as e:
+                    add_log(f"⚠️ Could not process audio file {audio_file}: {e}")
+                    continue
                 finally:
                     # Clean up temporary file
+                    try:
+                        if os.path.exists(audio_file):
+                            os.remove(audio_file)
+                            add_log(f"🗑️ Cleaned up temp file: {audio_file}")
+                    except:
+                        pass
             if len(combined_audio) == 0:
+                raise Exception("No valid audio content was generated")
+            if len(combined_audio) < 5000:  # Less than 5 seconds
+                raise Exception("Combined audio is too short")
+            output_filename = f"podcast_output_{uuid.uuid4().hex[:8]}.wav"
             combined_audio.export(output_filename, format="wav")
+            file_size = os.path.getsize(output_filename)
+            duration = len(combined_audio) / 1000  # Duration in seconds
+            add_log(f"✅ Final podcast: {output_filename} ({file_size} bytes, {duration:.1f}s)")
             if progress:
+                progress(1.0, "🎉 Podcast generated successfully!")
             return output_filename
         except Exception as e:
+            error_msg = f"❌ Audio combination failed: {str(e)}"
+            add_log(error_msg)
             # Clean up any remaining temp files
             for audio_file in audio_files:
+                try:
+                    if os.path.exists(audio_file):
+                        os.remove(audio_file)
+                except:
+                    pass
+            raise Exception(error_msg)
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, file_obj=None, progress=None) -> str:
+        """Main podcast generation pipeline - CRITICAL FIX #9"""
+        start_time = time.time()
+        add_log("🎬 Starting podcast generation pipeline")
         try:
             if progress:
+                progress(0.1, "🚀 Starting podcast generation...")
             # Generate script
+            add_log("📝 Generating podcast script...")
             podcast_json = await self.generate_script(input_text, language, file_obj, progress)
+            if not podcast_json.get('podcast') or len(podcast_json['podcast']) == 0:
+                raise Exception("No podcast content was generated")
+            add_log(f"✅ Script generated with {len(podcast_json['podcast'])} dialogue lines")
             if progress:
+                progress(0.5, "🎙️ Converting text to speech...")
+            # Generate TTS with proper error handling
             audio_files = []
             total_lines = len(podcast_json['podcast'])
+            successful_lines = 0
             for i, item in enumerate(podcast_json['podcast']):
                 try:
+                    add_log(f"🎵 Processing line {i+1}/{total_lines}: Speaker {item['speaker']}")
                     audio_file = await self.tts_generate(
                         item['line'],
                         item['speaker'],
                         speaker1,
                         speaker2
                     )
                     audio_files.append(audio_file)
+                    successful_lines += 1
                     # Update progress
                     if progress:
                         current_progress = 0.5 + (0.4 * (i + 1) / total_lines)
+                        progress(current_progress, f"🎙️ Generated speech {successful_lines}/{total_lines}")
                 except Exception as e:
+                    add_log(f"❌ TTS failed for line {i+1}: {e}")
+                    # Continue with remaining lines rather than failing completely
                     continue
             if not audio_files:
+                raise Exception("No audio files were generated successfully")
+            if successful_lines < len(podcast_json['podcast']) / 2:
+                add_log(f"⚠️ Warning: Only {successful_lines}/{total_lines} lines processed successfully")
+            add_log(f"✅ TTS generation complete: {len(audio_files)} audio files")
             # Combine audio files
             combined_audio = await self.combine_audio_files(audio_files, progress)
+            elapsed_time = time.time() - start_time
+            add_log(f"🎉 Podcast generation completed in {elapsed_time:.1f} seconds")
             return combined_audio
         except Exception as e:
+            elapsed_time = time.time() - start_time
+            error_msg = f"❌ Podcast generation failed after {elapsed_time:.1f}s: {str(e)}"
+            add_log(error_msg)
+            add_log(f"🔍 Full traceback: {traceback.format_exc()}")
+            raise Exception(error_msg)
 # Voice mapping
 VOICE_MAPPING = {
 }
 async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, progress=None) -> str:
+    """Process input and generate podcast - MAIN ENTRY POINT"""
+    add_log("=" * 50)
+    add_log("🎬 NEW PODCAST GENERATION REQUEST")
+    add_log("=" * 50)
     try:
         if progress:
+            progress(0.05, "🔍 Processing input...")
         # Map speaker names to voice IDs
         speaker1_voice = VOICE_MAPPING.get(speaker1, "en-US-AndrewMultilingualNeural")
         speaker2_voice = VOICE_MAPPING.get(speaker2, "en-US-AvaMultilingualNeural")
+        add_log(f"🎭 Speaker 1: {speaker1} -> {speaker1_voice}")
+        add_log(f"🎭 Speaker 2: {speaker2} -> {speaker2_voice}")
         # Validate input
         if not input_text or input_text.strip() == "":
             if input_file is None:
+                raise Exception("❌ Please provide either text input or upload a file")
+            add_log("📁 No text input provided, will process uploaded file")
+        else:
+            add_log(f"📝 Text input provided: {len(input_text)} characters")
+        if input_file:
+            add_log(f"📎 File uploaded: {input_file}")
+        # Check model status
+        if not model_loaded:
+            raise Exception("❌ Model not loaded. Please restart the application.")
         podcast_generator = PodcastGenerator()
         result = await podcast_generator.generate_podcast(
             input_text, language, speaker1_voice, speaker2_voice, input_file, progress
         )
+        add_log("🎉 PODCAST GENERATION COMPLETED SUCCESSFULLY")
         return result
     except Exception as e:
+        error_msg = f"❌ CRITICAL ERROR: {str(e)}"
+        add_log(error_msg)
+        add_log(f"🔍 Traceback: {traceback.format_exc()}")
+        raise Exception(error_msg)
 def generate_podcast_gradio(input_text, input_file, language, speaker1, speaker2):
+    """Gradio interface function - CRITICAL FIX #10"""
+    global logs
+    logs = []  # Reset logs for each generation
     try:
+        add_log("🎬 Gradio function called")
+        add_log(f"📋 Parameters: text={bool(input_text)}, file={bool(input_file)}, lang={language}")
         # Validate inputs
         if not input_text and input_file is None:
+            add_log("❌ No input provided")
+            return None, "\n".join(logs)
         if input_text and len(input_text.strip()) == 0:
             input_text = None
+        # Progress tracking
         def progress_callback(value, text):
+            add_log(f"📊 Progress: {value:.1%} - {text}")
+        # Create new event loop for this request - CRITICAL FIX
         try:
+            # Try to get existing loop
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                # If loop is running, we need to run in thread
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(
+                        lambda: asyncio.run(
+                            process_input(input_text, input_file, language, speaker1, speaker2, progress_callback)
+                        )
+                    )
+                    result = future.result(timeout=300)  # 5 minute timeout
+            else:
+                result = loop.run_until_complete(
+                    process_input(input_text, input_file, language, speaker1, speaker2, progress_callback)
+                )
+        except RuntimeError:
+            # No event loop exists, create new one
+            result = asyncio.run(
                 process_input(input_text, input_file, language, speaker1, speaker2, progress_callback)
             )
+        add_log("✅ Gradio function completed successfully")
+        return result, "\n".join(logs)
     except Exception as e:
+        error_msg = f"❌ Gradio function error: {str(e)}"
+        add_log(error_msg)
+        add_log(f"🔍 Traceback: {traceback.format_exc()}")
+        return None, "\n".join(logs)
 def create_interface():
+    """Create the Gradio interface"""
     language_options = [
         "Auto Detect", "English", "German", "French", "Spanish", "Italian",
         "Portuguese", "Dutch", "Russian", "Chinese", "Japanese", "Korean"
         gr.Markdown("# 🎙️ PodcastGen 2")
         gr.Markdown("Generate professional 2-speaker podcasts from text input!")
+        # Model status indicator
+        if model_loaded:
+            gr.Markdown("✅ **Model Status: Ready**")
+        else:
+            gr.Markdown("❌ **Model Status: Failed to Load**")
         with gr.Row():
             with gr.Column(scale=2):
                 input_text = gr.Textbox(
                     label="Upload File (Optional)",
                     file_types=[".pdf", ".txt"],
                     type="filepath",
+                    info=f"Max size: {MAX_FILE_SIZE_MB}MB"
                 )
         with gr.Row():
         generate_btn = gr.Button(
             "🎙️ Generate Podcast",
             variant="primary",
+            size="lg",
+            interactive=model_loaded
         )
         log_output = gr.Textbox(
             label="🪵 Debug & Transcript Log",
             lines=15,
+            interactive=False,
+            info="Real-time generation logs and debugging information"
         )
         output_audio = gr.Audio(
         generate_btn.click(
             fn=generate_podcast_gradio,
             inputs=[input_text, input_file, language, speaker1, speaker2],
+            outputs=[output_audio, log_output],
             show_progress=True
         )
         # Add usage instructions
+        with gr.Accordion("Usage Instructions & Troubleshooting", open=False):
             gr.Markdown("""
             ### How to use:
+            1. **Input**: Enter your topic or text in the text box, or upload a PDF/TXT file