import gradio as gr
import os
import tempfile
import requests
import soundfile as sf
import json
import shutil
from pathlib import Path
import numpy as np

# ===== NEUTTS IMPORTS =====
from neuttsair.neutts import NeuTTSAir

# ===== CONFIGURATION =====
CONFIG_FILE = "voice_profiles.json"
SAMPLE_DIR = "samples"
os.makedirs(SAMPLE_DIR, exist_ok=True)

# ===== VOICE PROFILE MANAGEMENT =====
class VoiceProfileManager:
    def __init__(self, config_file=CONFIG_FILE):
        self.config_file = config_file
        self.profiles = self.load_profiles()
    
    def load_profiles(self):
        if os.path.exists(self.config_file):
            with open(self.config_file, 'r') as f:
                return json.load(f)
        return {}
    
    def save_profiles(self):
        with open(self.config_file, 'w') as f:
            json.dump(self.profiles, f, indent=2)
    
    def add_profile(self, name, audio_path, text):
        self.profiles[name] = {
            "audio_path": audio_path,
            "text": text,
            "created_at": str(np.datetime64('now'))
        }
        self.save_profiles()
        return f"✅ Voice profile '{name}' saved!"
    
    def get_profile(self, name):
        return self.profiles.get(name)
    
    def list_profiles(self):
        return list(self.profiles.keys())

# ===== SAMPLE MANAGEMENT =====
def download_default_samples():
    """Download default sample voices"""
    samples = {
        "dave": {
            "audio": "https://github.com/neophonic/neutts-air/raw/main/samples/dave.wav",
            "text": "https://raw.githubusercontent.com/neophonic/neutts-air/main/samples/dave.txt"
        },
        "andrea": {
            "audio": "https://github.com/neophonic/neutts-air/raw/main/samples/andrea.wav", 
            "text": "https://raw.githubusercontent.com/neophonic/neutts-air/main/samples/andrea.txt"
        }
    }
    
    for name, urls in samples.items():
        audio_path = f"{SAMPLE_DIR}/{name}.wav"
        text_path = f"{SAMPLE_DIR}/{name}.txt"
        
        if not os.path.exists(audio_path):
            try:
                response = requests.get(urls["audio"])
                with open(audio_path, 'wb') as f:
                    f.write(response.content)
                
                response = requests.get(urls["text"])
                with open(text_path, 'w') as f:
                    f.write(response.text)
                    
                print(f"✅ Downloaded {name} sample")
            except Exception as e:
                print(f"❌ Failed to download {name}: {e}")

# ===== TTS ENGINE =====
class TTSEngine:
    def __init__(self):
        self.tts = None
        self.voice_manager = VoiceProfileManager()
        download_default_samples()
    
    def initialize_tts(self):
        if self.tts is None:
            print("🚀 Initializing NeuTTS Q4 GGUF...")
            self.tts = NeuTTSAir(
                backbone_repo="neuphonic/neutts-air-q4-gguf",
                backbone_device="cpu", 
                codec_repo="neuphonic/neucodec",
                codec_device="cpu"
            )
        return self.tts
    
    def generate_speech(self, text, voice_name):
        try:
            tts = self.initialize_tts()
            profile = self.voice_manager.get_profile(voice_name)
            
            if not profile:
                return None, f"❌ Voice profile '{voice_name}' not found"
            
            ref_codes = tts.encode_reference(profile["audio_path"])
            ref_text = profile["text"]
            
            wav = tts.infer(text, ref_codes, ref_text)
            return wav, None
            
        except Exception as e:
            return None, f"❌ Generation error: {str(e)}"

# ===== SCRIPT PARSING =====
def parse_conversation_script(script_text):
    """Parse script with speaker labels"""
    lines = []
    for line in script_text.strip().split('\n'):
        line = line.strip()
        if ':' in line:
            speaker, dialogue = line.split(':', 1)
            lines.append({
                "speaker": speaker.strip(),
                "text": dialogue.strip()
            })
        elif line:
            # Default to Speaker A if no label
            lines.append({
                "speaker": "Speaker A",
                "text": line
            })
    return lines

def generate_script_from_prompt(prompt, style="conversational"):
    """Generate a podcast script from a prompt"""
    # Simple template-based generation
    templates = {
        "conversational": [
            "Host: Welcome to our podcast! Today we're discussing {prompt}",
            "Co-host: That's right! It's a fascinating topic that affects many people.",
            "Host: Let's start with the basics. What should our audience know about this?",
            "Co-host: Well, first of all, it's important to understand the key concepts.",
            "Host: And what about the practical applications? How can people use this in their daily lives?",
            "Co-host: Great question! There are several ways to apply this knowledge effectively."
        ],
        "interview": [
            "Interviewer: Thanks for joining us today to talk about {prompt}",
            "Guest: Happy to be here! It's a topic I'm very passionate about.",
            "Interviewer: Could you share some background on how you got involved in this field?",
            "Guest: Absolutely. It all started several years ago when I first discovered this area.",
            "Interviewer: What are the most exciting developments you're seeing right now?",
            "Guest: There are some incredible advancements happening that will change everything."
        ],
        "debate": [
            "Moderator: Welcome to our debate on {prompt}",
            "Proponent: I believe this is one of the most important issues of our time.",
            "Opponent: While I respect that view, I have some serious concerns about the approach.",
            "Proponent: Let me address those concerns with some concrete evidence.",
            "Opponent: The evidence is compelling, but we must consider the broader implications.",
            "Moderator: Let's hear from both sides about potential solutions."
        ]
    }
    
    template = templates.get(style, templates["conversational"])
    script = "\n".join([line.format(prompt=prompt) for line in template])
    return script

# ===== MAIN GENERATION FUNCTIONS =====
tts_engine = TTSEngine()

def clone_voice(voice_name, upload_audio, reference_text):
    """Clone a voice from uploaded audio"""
    if not voice_name or not upload_audio:
        return "❌ Please provide a voice name and audio file"
    
    try:
        # Save uploaded audio
        audio_ext = Path(upload_audio).suffix
        audio_path = f"{SAMPLE_DIR}/{voice_name}{audio_ext}"
        shutil.copy2(upload_audio, audio_path)
        
        # Save voice profile
        result = tts_engine.voice_manager.add_profile(voice_name, audio_path, reference_text)
        return result
    except Exception as e:
        return f"❌ Error cloning voice: {str(e)}"

def generate_podcast(script_input, speaker_a, speaker_b, prompt_input, script_style):
    """Generate a complete podcast with two speakers"""
    try:
        # Generate script if prompt is provided
        if prompt_input and (not script_input or script_input.strip() == ""):
            script_input = generate_script_from_prompt(prompt_input, script_style)
        
        if not script_input or script_input.strip() == "":
            return None, "❌ Please provide either a script or a prompt"
        
        # Parse conversation
        conversation = parse_conversation_script(script_input)
        if not conversation:
            return None, "❌ Could not parse script"
        
        # Generate audio for each line
        combined_audio = None
        current_sample_rate = 24000
        
        for i, line in enumerate(conversation):
            speaker = line["speaker"]
            text = line["text"]
            
            # Choose voice based on speaker label or A/B assignment
            if "host" in speaker.lower() or "a" in speaker.lower() or "interviewer" in speaker.lower():
                voice = speaker_a
            elif "co-host" in speaker.lower() or "b" in speaker.lower() or "guest" in speaker.lower():
                voice = speaker_b
            else:
                # Default assignment
                voice = speaker_a if i % 2 == 0 else speaker_b
            
            print(f"🎙️ {speaker} ({voice}): {text}")
            
            # Generate speech
            wav, error = tts_engine.generate_speech(text, voice)
            if error:
                return None, error
            
            # Combine audio
            if combined_audio is None:
                combined_audio = wav
            else:
                # Add a small pause between speakers
                pause = np.zeros(int(0.5 * current_sample_rate))  # 0.5 second pause
                combined_audio = np.concatenate([combined_audio, pause, wav])
        
        # Save final audio
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
            sf.write(f.name, combined_audio, current_sample_rate)
            audio_file = f.name
        
        # Save script
        script_file = audio_file.replace(".wav", "_script.txt")
        with open(script_file, 'w') as f:
            f.write(script_input)
        
        return audio_file, script_file, "✅ Podcast generated successfully!"
        
    except Exception as e:
        return None, None, f"❌ Error: {str(e)}"

# ===== GRADIO UI =====
css = """
.container { max-width: 1400px; margin: 0 auto; }
.header { background: linear-gradient(135deg, #32CD32 0%, #1E90FF 100%); color: white; padding: 30px; border-radius: 12px; margin-bottom: 25px; text-align: center; border: 3px solid #1E90FF; }
.section { border: 2px solid #32CD32; border-radius: 10px; padding: 20px; margin-bottom: 20px; background: white; }
.output-section { background: linear-gradient(135deg, #F0FFF0 0%, #F0F8FF 100%); border: 2px dashed #1E90FF; border-radius: 10px; padding: 20px; margin-top: 20px; }
.btn-primary { background: linear-gradient(135deg, #32CD32 0%, #1E90FF 100%) !important; border: 2px solid #1E90FF !important; color: white !important; font-weight: bold !important; }
.btn-secondary { background: linear-gradient(135deg, #FFA500 0%, #FF6347 100%) !important; border: 2px solid #FF6347 !important; color: white !important; }
.tab { background: #f0f8ff; padding: 15px; border-radius: 8px; margin: 10px 0; }
"""

with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    gr.HTML("""
    <div class="header">
        <h1>🎙️ 2nd-Host AI - Complete Podcast Studio</h1>
        <h3>Voice Cloning • 2-Speaker Podcasts • Script Generation • Export</h3>
    </div>
    """)
    
    # Initialize voice manager
    voice_manager = VoiceProfileManager()
    available_voices = voice_manager.list_profiles()
    
    with gr.Tab("🎭 Voice Cloning"):
        gr.Markdown("### Clone New Voices")
        with gr.Row():
            with gr.Column():
                voice_name = gr.Textbox(label="Voice Name", placeholder="e.g., 'David', 'Sarah', 'Expert'")
                upload_audio = gr.Audio(label="Reference Audio", type="filepath")
                reference_text = gr.Textbox(
                    label="Reference Text", 
                    value="Hey there, this is my voice for cloning.",
                    placeholder="Text spoken in the reference audio"
                )
                clone_btn = gr.Button("🎯 Clone Voice", variant="primary")
            
            with gr.Column():
                clone_status = gr.Textbox(label="Cloning Status", interactive=False)
                available_voices_display = gr.Dropdown(
                    label="Available Voices", 
                    choices=available_voices,
                    value=available_voices[0] if available_voices else None
                )
                refresh_btn = gr.Button("🔄 Refresh Voices")
    
    with gr.Tab("🎬 Podcast Studio"):
        gr.Markdown("### Create 2-Speaker Podcast")
        
        with gr.Row():
            with gr.Column():
                # Script input
                script_input = gr.Textbox(
                    label="Podcast Script", 
                    lines=6,
                    placeholder="""Format: Speaker: Dialogue
Example:
Host: Welcome to our show!
Co-host: Thanks for having me!
Host: Let's discuss AI voice technology...
Co-host: It's revolutionizing content creation!""",
                    value=""
                )
                
                # Script generation
                prompt_input = gr.Textbox(
                    label="Or Generate from Prompt", 
                    placeholder="e.g., 'The future of AI in education'"
                )
                script_style = gr.Radio(
                    choices=["conversational", "interview", "debate"],
                    label="Script Style",
                    value="conversational"
                )
                generate_script_btn = gr.Button("📝 Generate Script", variant="secondary")
            
            with gr.Column():
                # Speaker selection
                speaker_a = gr.Dropdown(
                    choices=available_voices,
                    label="🎤 Speaker A (Host)",
                    value=available_voices[0] if available_voices else None
                )
                speaker_b = gr.Dropdown(
                    choices=available_voices, 
                    label="🎤 Speaker B (Co-host/Guest)",
                    value=available_voices[1] if len(available_voices) > 1 else available_voices[0] if available_voices else None
                )
                
                generate_btn = gr.Button("🚀 Generate Podcast", variant="primary", size="lg")
    
    with gr.Tab("📤 Output"):
        gr.Markdown("### Generated Podcast")
        with gr.Row():
            with gr.Column():
                audio_output = gr.Audio(label="🎧 Podcast Audio", type="filepath")
                script_output = gr.File(label="📄 Script File", file_types=[".txt"])
            
            with gr.Column():
                generation_status = gr.Textbox(label="Generation Status", lines=3)
                download_btn = gr.Button("💾 Download All", variant="primary")
    
    # ===== EVENT HANDLERS =====
    def refresh_voices():
        voice_manager = VoiceProfileManager()
        voices = voice_manager.list_profiles()
        return gr.Dropdown(choices=voices, value=voices[0] if voices else None), gr.Dropdown(choices=voices, value=voices[1] if len(voices) > 1 else voices[0] if voices else None)
    
    def handle_clone_voice(voice_name, audio_path, text):
        result = clone_voice(voice_name, audio_path, text)
        return result, *refresh_voices()
    
    def handle_generate_script(prompt, style):
        if not prompt:
            return "❌ Please enter a prompt"
        script = generate_script_from_prompt(prompt, style)
        return script
    
    def handle_generate_podcast(script, speaker_a, speaker_b, prompt, style):
        return generate_podcast(script, speaker_a, speaker_b, prompt, style)
    
    # Connect events
    clone_btn.click(
        handle_clone_voice,
        inputs=[voice_name, upload_audio, reference_text],
        outputs=[clone_status, speaker_a, speaker_b]
    )
    
    refresh_btn.click(
        refresh_voices,
        outputs=[speaker_a, speaker_b]
    )
    
    generate_script_btn.click(
        handle_generate_script,
        inputs=[prompt_input, script_style],
        outputs=[script_input]
    )
    
    generate_btn.click(
        handle_generate_podcast,
        inputs=[script_input, speaker_a, speaker_b, prompt_input, script_style],
        outputs=[audio_output, script_output, generation_status]
    )

if __name__ == "__main__":
    demo.launch(share=True)