#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys

# Set UTF-8 encoding for Windows
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach())
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.detach())

# Load environment variables from .env file (optimized for HF Spaces)
try:
    # Only load .env in local development, skip in production
    if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
        from dotenv import load_dotenv
        load_dotenv()
        print("✅ Environment variables loaded from .env file")
    else:
        print("🏭 Production environment - using system environment variables")
except ImportError:
    print("⚠️  python-dotenv not installed. Using system environment variables only.")
except Exception as e:
    print(f"⚠️  Error loading .env file: {e}")

# Essential imports for HF Spaces
import numpy as np
import gradio as gr

# Try to import google-generativeai with fallback
try:
    import google.generativeai as genai
    GENAI_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ google-generativeai not available: {e}")
    GENAI_AVAILABLE = False
    genai = None

try:
    from gtts import gTTS, lang
    GTTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ gtts not available: {e}")
    GTTS_AVAILABLE = False

import tempfile
# import soundfile as sf  # Import locally to avoid startup overhead
# Kokoro not used - removed for performance
import time
import base64

# Try to import optional dependencies
try:
    import edge_tts
    EDGE_TTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ edge-tts not available: {e}")
    EDGE_TTS_AVAILABLE = False

import asyncio
import io

try:
    import PyPDF2
    PDF_AVAILABLE = True
except ImportError:
    PDF_AVAILABLE = False

try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

import shutil
import atexit
import glob
import datetime

# Librosa not used - removed for performance

# === RECORD DATA MANAGEMENT ===
RECORD_DATA_DIR = "record_data"

def create_record_data_directory():
    """Create record_data directory if it doesn't exist"""
    if not os.path.exists(RECORD_DATA_DIR):
        os.makedirs(RECORD_DATA_DIR)
        print(f"✅ Created directory: {RECORD_DATA_DIR}")
    return RECORD_DATA_DIR

def cleanup_record_data():
    """Clean up record_data directory when app closes (disabled for production)"""
    try:
        # Disable cleanup for HF Spaces and production environments
        if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
            print(f"🏭 Production environment detected - keeping {RECORD_DATA_DIR} directory")
            return
            
        # Only cleanup in local development
        if os.path.exists(RECORD_DATA_DIR):
            shutil.rmtree(RECORD_DATA_DIR)
            print(f"🧹 Cleaned up {RECORD_DATA_DIR} directory")
    except Exception as e:
        print(f"⚠️ Error cleaning up {RECORD_DATA_DIR}: {e}")

def save_recorded_audio(audio_data, original_filename=None):
    """Save audio data to record_data directory"""
    try:
        # Create directory if needed
        create_record_data_directory()
        
        # Generate filename with timestamp
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if original_filename:
            name_part = os.path.splitext(os.path.basename(original_filename))[0]
            filename = f"recorded_{name_part}_{timestamp}.wav"
        else:
            filename = f"recorded_{timestamp}.wav"
        
        filepath = os.path.join(RECORD_DATA_DIR, filename)
        
        # Handle different audio data types
        if isinstance(audio_data, str) and os.path.exists(audio_data):
            # File path - copy the file
            shutil.copy2(audio_data, filepath)
        elif isinstance(audio_data, tuple) and len(audio_data) == 2:
            # Numpy array format (sample_rate, audio_array)
            sample_rate, audio_array = audio_data
            import soundfile as sf
            sf.write(filepath, audio_array, sample_rate)
            print(f"📊 Saved numpy audio: sr={sample_rate}, shape={audio_array.shape}")
        else:
            # Raw data
            with open(filepath, 'wb') as f:
                f.write(audio_data)
        
        print(f"✅ Saved recorded audio: {filepath}")
        return filepath
        
    except Exception as e:
        print(f"❌ Error saving recorded audio: {e}")
        import traceback
        traceback.print_exc()
        return None

def get_recorded_files():
    """Get list of recorded audio files"""
    try:
        if not os.path.exists(RECORD_DATA_DIR):
            print(f"📁 Record directory does not exist: {RECORD_DATA_DIR}")
            return []
        
        # Get all audio files in record_data
        pattern = os.path.join(RECORD_DATA_DIR, "*.wav")
        files = glob.glob(pattern)
        print(f"🔍 Found {len(files)} files in {RECORD_DATA_DIR}")
        
        # Sort by modification time (newest first)
        files.sort(key=os.path.getmtime, reverse=True)
        
        # Return just filenames for display
        filenames = [os.path.basename(f) for f in files]
        print(f"📂 Returning filenames: {filenames}")
        return filenames
        
    except Exception as e:
        print(f"❌ Error getting recorded files: {e}")
        return []

def get_recorded_file_path(filename):
    """Get full path of recorded file"""
    return os.path.join(RECORD_DATA_DIR, filename)


def delete_recorded_file(filename):
    """Delete recorded file from record_data directory"""
    try:
        if not filename or not filename.strip():
            return "❌ Không có file nào được chọn để xóa"
            
        file_path = get_recorded_file_path(filename)
        print(f"🗑️ Attempting to delete: {file_path}")
        
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f"✅ Successfully deleted: {filename}")
            return f"✅ Đã xóa file: {filename}"
        else:
            print(f"❌ File not found: {file_path}")
            return f"❌ Không tìm thấy file: {filename}"
            
    except Exception as e:
        print(f"❌ Error deleting file: {e}")
        return f"❌ Lỗi khi xóa file: {str(e)}"

# Register cleanup function to run when app exits (disabled for stability)
# atexit.register(cleanup_record_data)  # Disabled to prevent data loss on deployment

# DOCX support already checked above

# Configure Gemini API - Delayed configuration for faster startup
GEMINI_API_KEY = None

def configure_gemini_api():
    """Configure Gemini API on first use to speed up startup"""
    global GEMINI_API_KEY
    if not GENAI_AVAILABLE:
        print("❌ google-generativeai not available")
        return None
        
    if GEMINI_API_KEY is None:
        GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
        if GEMINI_API_KEY:
            genai.configure(api_key=GEMINI_API_KEY)
            print("✅ Gemini API configured successfully")
        else:
            print("⚠️  GEMINI_API_KEY or GOOGLE_API_KEY not found in environment variables")
    return GEMINI_API_KEY

# Language configurations for Audio Translation (simplified)
if GTTS_AVAILABLE:
    GTTS_LANGUAGES = lang.tts_langs()
    GTTS_LANGUAGES['ja'] = 'Japanese'
else:
    GTTS_LANGUAGES = {'en': 'English', 'vi': 'Vietnamese'}

SUPPORTED_LANGUAGES = sorted(list(GTTS_LANGUAGES.values()))

# Voice mapping for Edge TTS - defined once for performance
VOICE_MAP = {
    "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "vi-VN-HoaiMyNeural",
    "🇻🇳 NamMinh - Nam Việt Chuẩn": "vi-VN-NamMinhNeural",
    "🇺🇸 Aria - Nữ Mỹ": "en-US-AriaNeural",
    "🇺🇸 Guy - Nam Mỹ": "en-US-GuyNeural",
    "🇬🇧 Sonia - Nữ Anh": "en-GB-SoniaNeural",
    "🇬🇧 Ryan - Nam Anh": "en-GB-RyanNeural",
    "🇩🇪 Katja - Deutsche Frau": "de-DE-KatjaNeural",
    "🇩🇪 Conrad - Deutscher Mann": "de-DE-ConradNeural",
    "🇫🇷 Denise - Française": "fr-FR-DeniseNeural",
    "🇫🇷 Henri - Français": "fr-FR-HenriNeural",
    "🇪🇸 Elvira - Española": "es-ES-ElviraNeural",
    "🇪🇸 Alvaro - Español": "es-ES-AlvaroNeural",
    "🇮🇹 Elsa - Italiana": "it-IT-ElsaNeural",
    "🇮🇹 Diego - Italiano": "it-IT-DiegoNeural",
    "🇯🇵 Nanami - 日本女性": "ja-JP-NanamiNeural",
    "🇯🇵 Keita - 日本男性": "ja-JP-KeitaNeural",
    "🇰🇷 SunHi - 한국 여성": "ko-KR-SunHiNeural",
    "🇰🇷 BongJin - 한국 남성": "ko-KR-BongJinNeural",
    "🇨🇳 Xiaoxiao - 中文女声": "zh-CN-XiaoxiaoNeural",
    "🇨🇳 Yunxi - 中文男声": "zh-CN-YunxiNeural",
    "🇷🇺 Svetlana - Русская": "ru-RU-SvetlanaNeural",
    "🇷🇺 Dmitry - Русский": "ru-RU-DmitryNeural",
    "🇵🇹 Francisca - Portuguesa": "pt-BR-FranciscaNeural",
    "🇵🇹 Antonio - Português": "pt-BR-AntonioNeural",
    "🇸🇦 Zariyah - عربية": "ar-SA-ZariyahNeural",
    "🇸🇦 Hamed - عربي": "ar-SA-HamedNeural"
}

# Voice RAG Functions (Tích hợp từ hf_Voice_Audio_Translation)
def read_pdf(file_path):
    """Extract text from PDF file"""
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

def read_docx(file_path):
    """Extract text from Word document"""
    try:
        if not DOCX_AVAILABLE:
            return "❌ python-docx not available"
        doc = docx.Document(file_path)
        text = ""
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
        return text
    except Exception as e:
        return f"Error reading DOCX: {str(e)}"

def read_txt(file_path):
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        return f"Error reading TXT: {str(e)}"

def extract_text_from_file(file_path):
    """Extract text from various file formats"""
    if file_path is None:
        return "No file uploaded"
    
    file_extension = os.path.splitext(file_path)[1].lower()
    
    if file_extension == '.pdf':
        return read_pdf(file_path)
    elif file_extension == '.docx':
        return read_docx(file_path)
    elif file_extension == '.txt':
        return read_txt(file_path)
    else:
        return f"Unsupported file format: {file_extension}"

def detect_language_from_text(text):
    """Detect language from text content"""
    # Vietnamese detection
    vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
    if any(char in text.lower() for char in vietnamese_chars):
        return "Vietnamese"
    
    # Chinese detection
    chinese_chars = '中文汉字學習语言'
    if any(char in text for char in chinese_chars):
        return "Chinese"
    
    # Japanese detection
    japanese_chars = 'ひらがなカタカナ日本語'
    if any(char in text for char in japanese_chars):
        return "Japanese"
    
    # Korean detection
    korean_chars = '한국어문자'
    if any(char in text for char in korean_chars):
        return "Korean"
    
    # French detection
    french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que']
    french_chars = 'àâäéèêëïîôöùûüÿç'
    if any(word in text.lower() for word in french_words) or any(char in text.lower() for char in french_chars):
        return "French"
    
    # German detection
    german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden']
    german_chars = 'äöüß'
    if any(word in text.lower() for word in german_words) or any(char in text.lower() for char in german_chars):
        return "German"
    
    # Spanish detection
    spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo']
    spanish_chars = 'ñáéíóúü'
    if any(word in text.lower() for word in spanish_words) or any(char in text.lower() for char in spanish_chars):
        return "Spanish"
    
    # English detection (default)
    english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could']
    if any(word in text.lower() for word in english_words):
        return "English"
    
    return "English"  # Default fallback

def process_with_gemini(text, question, answer_language="Vietnamese"):
    """Process text and question using Gemini with multi-language support"""
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"
            
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        # Detect document language
        detected_doc_language = detect_language_from_text(text)
        
        prompt = f"""
        Based on the following document content, please answer the question in {answer_language}:
        
        Document Content (detected language: {detected_doc_language}):
        {text}
        
        Question: {question}
        
        Please provide a comprehensive and accurate answer in {answer_language}. 
        If the document is in a different language than the question, please still answer in {answer_language}.
        Maintain the factual accuracy while adapting cultural context appropriately.
        """
        
        response = model.generate_content(prompt)
        return response.text
        
    except Exception as e:
        return f"Error processing with Gemini: {str(e)}"

def text_to_speech_rag(text, voice_selection):
    """Convert text to speech using Edge TTS for RAG results"""
    try:
        if not text or text.startswith("Error"):
            return None
        
        # Use global VOICE_MAP for performance
        voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        text_limited = text[:2000] if len(text) > 2000 else text
        
        # Generate speech using Edge TTS
        audio_data = asyncio.run(generate_speech(text_limited, voice_name, 0.0))
        
        # Save to temporary file
        fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="voice_rag_audio_")
        os.close(fd)
        
        # Write raw audio data to temporary file
        with open(temp_output_path, 'wb') as f:
            f.write(audio_data)
        
        return temp_output_path
        
    except Exception as e:
        print(f"TTS Error: {str(e)}")
        return None

def voice_rag_pipeline(uploaded_file, question, answer_language="Vietnamese", voice_selection="🇻🇳 HoaiMy - Nữ Việt Chuẩn", text_format="txt"):
    """Complete Voice RAG pipeline with multi-language support and downloadable text"""
    if uploaded_file is None:
        return "Please upload a document first.", "N/A", None, None
    
    if not question.strip():
        return "Please enter a question.", "N/A", None, None
    
    # Extract text from uploaded file
    extracted_text = extract_text_from_file(uploaded_file)
    
    if extracted_text.startswith("Error"):
        return extracted_text, "Error", None, None
    
    # Detect document language
    detected_doc_language = detect_language_from_text(extracted_text)
    
    # Process with Gemini using selected answer language
    answer = process_with_gemini(extracted_text, question, answer_language)
    
    # Generate speech using selected voice
    audio_file = text_to_speech_rag(answer, voice_selection)
    
    # Create formatted content for download
    if text_format.lower() == "md":
        # Create beautiful Markdown format
        formatted_content = format_voice_rag_response(
            question, answer, detected_doc_language, voice_selection
        )
        text_file_path = create_text_file(formatted_content, "md", "voice_rag_response")
    else:
        # Create standard text file
        text_file_path = create_text_file(answer, text_format, "voice_rag_answer")
    
    return answer, detected_doc_language, audio_file, text_file_path

def detect_language(text):
    """Detect language of input text with improved accuracy"""
    if not text.strip():
        return "unknown"
    
    text_lower = text.lower()
    
    # Vietnamese detection (more comprehensive)
    vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
    vietnamese_words = ['và', 'của', 'là', 'có', 'này', 'được', 'cho', 'từ', 'một', 'những', 'tôi', 'bạn']
    vietnamese_score = sum(1 for char in text if char in vietnamese_chars) + sum(2 for word in vietnamese_words if word in text_lower)
    
    # English detection (more comprehensive)
    english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could', 'that', 'this', 'with', 'for', 'you', 'he', 'she', 'it', 'they', 'we']
    english_score = sum(1 for word in english_words if word in text_lower)
    
    # German detection
    german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden', 'mit', 'auf', 'für', 'von']
    german_chars = 'äöüß'
    german_score = sum(1 for word in german_words if word in text_lower) + sum(1 for char in text if char in german_chars)
    
    # French detection
    french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que', 'avec', 'pour', 'dans']
    french_chars = 'àâäéèêëïîôöùûüÿç'
    french_score = sum(1 for word in french_words if word in text_lower) + sum(0.5 for char in text if char in french_chars)
    
    # Spanish detection
    spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'con', 'para']
    spanish_chars = 'ñáéíóúü'
    spanish_score = sum(1 for word in spanish_words if word in text_lower) + sum(0.5 for char in text if char in spanish_chars)
    
    # Score-based detection
    scores = {
        'Vietnamese': vietnamese_score,
        'English': english_score,
        'German': german_score,
        'French': french_score,
        'Spanish': spanish_score
    }
    
    # Find the language with highest score
    max_score = max(scores.values())
    if max_score > 0:
        detected = max(scores, key=scores.get)
        print(f"🔍 Language detection scores: {scores}")
        print(f"🎯 Detected language: {detected} (score: {max_score})")
        return detected
    
    # Default fallback
    print(f"⚠️ Could not detect language, defaulting to English")
    return "English"

async def generate_speech(text, voice_name, rate):
    """Generate speech using Edge TTS"""
    communicate = edge_tts.Communicate(text, voice_name, rate=f"{rate:+.0%}")
    
    # Create in-memory buffer
    audio_buffer = io.BytesIO()
    
    async for chunk in communicate.stream():
        if chunk["type"] == "audio":
            audio_buffer.write(chunk["data"])
    
    audio_buffer.seek(0)
    return audio_buffer.getvalue()

def create_text_file(content, file_format="txt", filename_prefix="translated_text"):
    """
    Create a downloadable text file from content in TXT, DOCX, or MD format
    """
    if not content or content.startswith("Lỗi:") or content.startswith("❌"):
        return None
    
    try:
        if file_format.lower() == "docx" and DOCX_AVAILABLE:
            # Create Word document
            fd, temp_file_path = tempfile.mkstemp(suffix=".docx", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            if not DOCX_AVAILABLE:
                return None
            from docx import Document
            doc = Document()
            doc.add_heading('Nội dung đã dịch', 0)
            doc.add_paragraph(content)
            doc.save(temp_file_path)
            
            return temp_file_path
        elif file_format.lower() == "md":
            # Create Markdown file
            fd, temp_file_path = tempfile.mkstemp(suffix=".md", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            with open(temp_file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return temp_file_path
        else:
            # Create TXT file (default)
            fd, temp_file_path = tempfile.mkstemp(suffix=".txt", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            with open(temp_file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return temp_file_path
    except Exception as e:
        return None

def format_voice_rag_response(question, answer, detected_language, voice_selection, timestamp=None):
    """
    Format Voice RAG response as beautiful Markdown
    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the answer
    formatted_answer = answer.strip()
    
    # Create beautiful Markdown document
    markdown_content = f"""# 📚 Voice RAG - Intelligent Document Q&A

---

## 📄 **Session Information**

| **Field** | **Details** |
|-----------|-------------|
| 🕒 **Timestamp** | {timestamp} |
| 🌍 **Document Language** | {detected_language} |
| 🎭 **Voice Selection** | {voice_selection} |
| 🤖 **AI Model** | Google Gemini 2.0 Flash |

---

## ❓ **Question**

> {question}

---

## 💬 **AI Response**

{formatted_answer}

---


---

## 📱 **Generated by**

**🎙️ Voice AI Platform** - Digitized Brains  
*Powered by Claude Code & Google Gemini 2.0 Flash*

> 🌐 **Voice RAG Technology** - Combining document intelligence with premium voice synthesis

---

*Generated on {timestamp} | Voice: {voice_selection} | Language: {detected_language}*
"""
    
    return markdown_content

def format_voice_studio_response(text, voice_selection, speed, detected_language="Auto-detected", timestamp=None):
    """
    Format Voice Studio response as simple Markdown
    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the text
    formatted_text = text.strip()
    
    # Create simple Markdown document
    markdown_content = f"""# Voice Studio Result

## Input Text ({detected_language})

{formatted_text}

---

*Generated on {timestamp} | Voice: {voice_selection} | Speed: {speed:.1f}x*
"""
    
    return markdown_content

def format_audio_translation_response(original_text, translated_text, source_language, target_language, voice_selection, timestamp=None):
    """
    Format Audio Translation response as simple Markdown
    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the texts
    formatted_original = original_text.strip()
    formatted_translated = translated_text.strip()
    
    # Create simple Markdown document
    markdown_content = f"""# Audio Translation Result

## Original Text ({source_language})

{formatted_original}

## Translated Text ({target_language})

{formatted_translated}

---

*Generated on {timestamp} | {source_language} → {target_language} | Voice: {voice_selection}*
"""
    
    return markdown_content

def create_audio_voice_studio(text, voice_selection, speed, text_format="txt"):
    """Voice Studio functionality with text file generation"""
    if not text.strip():
        return "❌ Vui lòng nhập văn bản / Please enter text / Bitte Text eingeben", None
    
    try:
        # Use global VOICE_MAP for performance (avoiding recreation on each call)
        voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        text_limited = text[:1000] if len(text) > 1000 else text
        
        # Convert speed (0.5-2.0) to rate percentage (-50% to +100%)
        rate_percent = (speed - 1.0)
        
        # Generate speech using Edge TTS
        audio_data = asyncio.run(generate_speech(text_limited, voice_name, rate_percent))
        
        # Convert to base64
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        
        timestamp = int(time.time())
        filename = f"voice_{voice_name}_{speed}x_{timestamp}.mp3"
        
        # Detect language
        detected_lang = detect_language(text_limited)
        
        # Mobile-optimized HTML player
        html_player = f'''
            <div style="
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                border-radius: 20px; 
                padding: 20px; 
                margin: 10px 0; 
                box-shadow: 0 8px 32px rgba(0,0,0,0.2);
                color: white;
                text-align: center;
            ">
                <div style="margin-bottom: 20px;">
                    <h3 style="color: #fff; margin: 0 0 15px 0; font-size: 1.3em; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
                        🎵 Âm thanh hoàn thành!
                    </h3>
                    <div style="
                        background: rgba(255,255,255,0.2); 
                        border-radius: 12px; 
                        padding: 12px; 
                        font-size: 0.9em; 
                        line-height: 1.5;
                        backdrop-filter: blur(10px);
                    ">
                        <div><strong>🎭 Giọng:</strong> {voice_selection}</div>
                        <div><strong>⚡ Tốc độ:</strong> {speed:.1f}x | <strong>🌍 Ngôn ngữ:</strong> {detected_lang.title()}</div>
                        <div><strong>📝 Độ dài:</strong> {len(text_limited)} ký tự</div>
                    </div>
                </div>
                
                <audio controls style="
                    width: 100%; 
                    max-width: 100%;
                    height: 50px;
                    margin: 20px 0; 
                    border-radius: 25px;
                    background: rgba(255,255,255,0.95);
                    box-shadow: 0 4px 15px rgba(0,0,0,0.2);
                ">
                    <source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg">
                    Trình duyệt không hỗ trợ audio.
                </audio>
                
                <div style="
                    display: flex; 
                    justify-content: center;
                    margin-top: 20px;
                ">
                    <a href="data:audio/mpeg;base64,{audio_base64}" download="{filename}"
                       style="
                           background: linear-gradient(45deg, #28a745, #20c997);
                           color: white; 
                           padding: 15px 30px; 
                           text-decoration: none;
                           border-radius: 25px; 
                           font-weight: 700; 
                           font-size: 1.1em;
                           display: flex;
                           align-items: center;
                           justify-content: center;
                           box-shadow: 0 4px 15px rgba(40,167,69,0.3);
                           transition: all 0.3s ease;
                           min-height: 48px;
                           min-width: 200px;
                       "
                       ontouchstart=""
                       onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 20px rgba(40,167,69,0.4)'"
                       onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 15px rgba(40,167,69,0.3)'">
                        📥 TẢI XUỐNG MP3
                    </a>
                </div>
            </div>
            '''
        
        # Create text file based on format
        text_file_path = None
        if text_format == "md":
            # Use Markdown formatting function
            detected_language = detect_language(text_limited)
            markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
            text_file_path = create_text_file(markdown_content, "md", "voice_studio")
        elif text_format == "docx":
            # Create Word document with Voice Studio formatting
            detected_language = detect_language(text_limited)
            markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
            text_file_path = create_text_file(markdown_content, "docx", "voice_studio")
        elif text_format == "txt":
            # Create simple text file
            text_file_path = create_text_file(text_limited, "txt", "voice_studio")
            
        return html_player, text_file_path
            
    except Exception as e:
        return f"❌ Error: {str(e)}", None

# Language mapping for voices - defined once for performance
VOICE_TO_LANGUAGE = {
    # Vietnamese
    "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "Vietnamese",
    "🇻🇳 NamMinh - Nam Việt Chuẩn": "Vietnamese",
    # English
    "🇺🇸 Aria - Nữ Mỹ": "English",
    "🇺🇸 Guy - Nam Mỹ": "English", 
    "🇬🇧 Sonia - Nữ Anh": "English",
    "🇬🇧 Ryan - Nam Anh": "English",
    # German
    "🇩🇪 Katja - Deutsche Frau": "German",
    "🇩🇪 Conrad - Deutscher Mann": "German",
    # French
    "🇫🇷 Denise - Française": "French",
    "🇫🇷 Henri - Français": "French",
    # Spanish
    "🇪🇸 Elvira - Española": "Spanish",
    "🇪🇸 Alvaro - Español": "Spanish",
    # Italian
    "🇮🇹 Elsa - Italiana": "Italian",
    "🇮🇹 Diego - Italiano": "Italian",
    # Japanese
    "🇯🇵 Nanami - 日本女性": "Japanese",
    "🇯🇵 Keita - 日本男性": "Japanese",
    # Korean
    "🇰🇷 SunHi - 한국 여성": "Korean",
    "🇰🇷 BongJin - 한국 남성": "Korean",
    # Chinese
    "🇨🇳 Xiaoxiao - 中文女声": "Chinese",
    "🇨🇳 Yunxi - 中文男声": "Chinese",
    # Russian
    "🇷🇺 Svetlana - Русская": "Russian",
    "🇷🇺 Dmitry - Русский": "Russian",
    # Portuguese
    "🇵🇹 Francisca - Portuguesa": "Portuguese",
    "🇵🇹 Antonio - Português": "Portuguese",
    # Arabic
    "🇸🇦 Zariyah - عربية": "Arabic",
    "🇸🇦 Hamed - عربي": "Arabic"
}

def get_target_language_from_voice(voice_selection):
    """Map voice selection to target language for translation"""
    return VOICE_TO_LANGUAGE.get(voice_selection, "Vietnamese")

def translate_text_with_gemini(text, target_language):
    """Translate text using Gemini API"""
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return f"❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"
            
        if not text.strip():
            return ""
            
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        prompt = f"""Translate the following text to {target_language}. Return ONLY the translated text, nothing else:

{text}"""

        response = model.generate_content(prompt)
        translated_text = response.text.strip()
        
        # Clean up any unwanted text that might be included
        if translated_text.lower().startswith("translation:"):
            translated_text = translated_text[12:].strip()
        if translated_text.lower().startswith("here is"):
            lines = translated_text.split('\n')
            if len(lines) > 1:
                translated_text = '\n'.join(lines[1:]).strip()
        
        return translated_text
        
    except Exception as e:
        return f"Lỗi dịch thuật: {str(e)}"

def translate_audio(audio_file, target_country, voice_selection, text_format="txt"):
    """
    Transcribe, translate and synthesize audio to target language with Voice Studio integration
    """
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables", "Không xác định", "", target_country, None, None, "", "", None
            
        if audio_file is None:
            return "Lỗi: Vui lòng tải lên file audio", "Không xác định", "", target_country, None, None, "", "", None
        
        # Save recorded audio to record_data directory
        print(f"🔍 Processing audio file type: {type(audio_file)}")
        saved_audio_path = save_recorded_audio(audio_file)
        if saved_audio_path:
            print(f"🎤 Audio saved to record_data: {os.path.basename(saved_audio_path)}")
            # Debug: check if file really exists
            if os.path.exists(saved_audio_path):
                file_size = os.path.getsize(saved_audio_path)
                print(f"✅ File confirmed: {saved_audio_path} ({file_size} bytes)")
            else:
                print(f"❌ File not found after save: {saved_audio_path}")
                return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None
        else:
            print("❌ Failed to save audio file")
            return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None
            
        # Get target language from voice selection
        target_language = get_target_language_from_voice(voice_selection)
            
        # Transcribe audio using Gemini
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        # Read audio file using saved path
        with open(saved_audio_path, 'rb') as f:
            audio_data = f.read()
        
        # Create audio blob
        audio_blob = {
            'mime_type': 'audio/wav',
            'data': audio_data
        }

        # Step 1: Transcribe audio only first
        transcribe_prompt = """Transcribe this audio accurately in its original language. Return only the transcribed text, nothing else."""
        
        response = model.generate_content([transcribe_prompt, audio_blob])
        transcription = response.text.strip()
        
        # Step 2: Detect language of transcription
        detected_lang = detect_language(transcription)
        
        # Step 3: Translate if needed (only if source is different from target)
        if detected_lang.lower() != target_language.lower():
            print(f"🔄 Translating from {detected_lang} to {target_language}")
            translated_text = translate_text_with_gemini(transcription, target_language)
            
            # Check if translation was successful
            if translated_text.startswith("❌") or translated_text.startswith("Lỗi"):
                print(f"❌ Translation failed: {translated_text}")
                # Use original transcription if translation fails
                translated_text = transcription
            else:
                print(f"✅ Translation successful")
        else:
            print(f"ℹ️ No translation needed - same language ({detected_lang})")
            translated_text = transcription
        
        # Generate audio using Edge TTS (use global VOICE_MAP for performance)
        edge_voice = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        print(f"🎙️ Generating audio with voice: {edge_voice}")
        audio_data = asyncio.run(generate_speech(translated_text, edge_voice, 0.0))
        print(f"🎵 Generated audio data: {len(audio_data)} bytes")
        
        # Save audio file
        fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="translated_audio_")
        os.close(fd)
        print(f"📁 Created temp audio file: {temp_output_path}")
        
        # Write raw audio data to temporary file
        with open(temp_output_path, 'wb') as f:
            f.write(audio_data)
        
        # Verify file was created
        if os.path.exists(temp_output_path):
            file_size = os.path.getsize(temp_output_path)
            print(f"✅ Audio file created successfully: {file_size} bytes")
        else:
            print(f"❌ Failed to create audio file: {temp_output_path}")
        
        # Create text file for download with proper formatting
        text_file_path = None
        if text_format == "md":
            # Use Markdown formatting function for Audio Translation
            markdown_content = format_audio_translation_response(
                transcription, translated_text, detected_lang, target_language, voice_selection
            )
            text_file_path = create_text_file(markdown_content, "md", "audio_translation")
        elif text_format == "docx":
            # Create Word document with Audio Translation formatting
            markdown_content = format_audio_translation_response(
                transcription, translated_text, detected_lang, target_language, voice_selection
            )
            text_file_path = create_text_file(markdown_content, "docx", "audio_translation")
        else:
            # Create simple text file
            text_file_path = create_text_file(translated_text, "txt", "audio_translation")
        
        return transcription, detected_lang, translated_text, target_language, temp_output_path, temp_output_path, transcription, translated_text, text_file_path
        
    except Exception as e:
        # Get target language for error response
        target_language = get_target_language_from_voice(voice_selection) if 'voice_selection' in locals() else "Vietnamese"
        return f"Lỗi: {str(e)}", "Lỗi", "", target_language, None, None, "", "", None

# Voice choices organized by country - ONLY OFFICIAL VOICES  
voice_choices_by_country = {
    "🇻🇳 Việt Nam": [
        "🇻🇳 HoaiMy - Nữ Việt Chuẩn",
        "🇻🇳 NamMinh - Nam Việt Chuẩn"
    ],
    "🇺🇸 Hoa Kỳ": [
        "🇺🇸 Aria - Nữ Mỹ",
        "🇺🇸 Guy - Nam Mỹ"
    ],
    "🇬🇧 Anh": [
        "🇬🇧 Sonia - Nữ Anh",
        "🇬🇧 Ryan - Nam Anh"
    ],
    "🇩🇪 Đức": [
        "🇩🇪 Katja - Deutsche Frau",
        "🇩🇪 Conrad - Deutscher Mann"
    ],
    "🇫🇷 Pháp": [
        "🇫🇷 Denise - Française",
        "🇫🇷 Henri - Français"
    ],
    "🇪🇸 Tây Ban Nha": [
        "🇪🇸 Elvira - Española",
        "🇪🇸 Alvaro - Español"
    ],
    "🇮🇹 Ý": [
        "🇮🇹 Elsa - Italiana",
        "🇮🇹 Diego - Italiano"
    ],
    "🇯🇵 Nhật Bản": [
        "🇯🇵 Nanami - 日本女性",
        "🇯🇵 Keita - 日本男性"
    ],
    "🇰🇷 Hàn Quốc": [
        "🇰🇷 SunHi - 한국 여성",
        "🇰🇷 BongJin - 한국 남성"
    ],
    "🇨🇳 Trung Quốc": [
        "🇨🇳 Xiaoxiao - 中文女声",
        "🇨🇳 Yunxi - 中文男声"
    ],
    "🇷🇺 Nga": [
        "🇷🇺 Svetlana - Русская",
        "🇷🇺 Dmitry - Русский"
    ],
    "🇵🇹 Bồ Đào Nha": [
        "🇵🇹 Francisca - Portuguesa",
        "🇵🇹 Antonio - Português"
    ],
    "🇸🇦 Ả Rập": [
        "🇸🇦 Zariyah - عربية",
        "🇸🇦 Hamed - عربي"
    ]
}

def update_voices(country):
    """Update voice choices based on selected country"""
    if country in voice_choices_by_country:
        voices = voice_choices_by_country[country]
        return gr.Dropdown(choices=voices, value=voices[0])
    else:
        # Default to Vietnamese voices
        default_voices = voice_choices_by_country["🇻🇳 Việt Nam"]
        return gr.Dropdown(choices=default_voices, value=default_voices[0])

# Lightweight CSS - optimized for performance  
css = """
* {
    font-family: system-ui, -apple-system, 'Segoe UI', Arial, sans-serif;
}

.gradio-container {
    max-width: 1200px;
    margin: 0 auto;
    position: relative;
}

/* Critical fix for dropdown interaction */
.gradio-container * {
    pointer-events: auto;
}

/* Hide Gradio footer */
.footer {
    display: none !important;
}

/* Pulsing animation for processing status */
@keyframes pulse-processing {
    0% {
        opacity: 1;
        transform: scale(1);
        box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);
    }
    50% {
        opacity: 0.8;
        transform: scale(1.02);
        box-shadow: 0 6px 25px rgba(255, 193, 7, 0.6);
    }
    100% {
        opacity: 1;
        transform: scale(1);
        box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);
    }
}

.status-processing {
    animation: pulse-processing 1.5s ease-in-out infinite;
    background: linear-gradient(135deg, #FFC107 0%, #FF9800 100%) !important;
}

/* Success status animation */
@keyframes pulse-success {
    0% {
        opacity: 1;
        transform: scale(1);
    }
    50% {
        opacity: 0.9;
        transform: scale(1.01);
    }
    100% {
        opacity: 1;
        transform: scale(1);
    }
}

.status-success {
    animation: pulse-success 2s ease-in-out 3;
    background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
}

/* Custom footer to cover Gradio attribution */
.custom-footer {
    position: fixed;
    bottom: 0;
    left: 0;
    right: 0;
    background: linear-gradient(135deg, #4A90E2 0%, #2E86AB 70%, #FF8A65 85%, #FF6B9D 100%);
    color: white;
    padding: 15px;
    text-align: center;
    font-weight: bold;
    z-index: 1000;
    box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
}

/* Add padding to body to account for fixed footer */
body {
    padding-bottom: 60px;
}

/* Mobile-first responsive design */
.input-card {
    background: rgba(255,255,255,0.95);
    border-radius: 16px;
    padding: 16px;
    margin: 10px 0;
    box-shadow: 0 4px 20px rgba(0,0,0,0.1);
    backdrop-filter: blur(10px);
}

.output-area {
    background: rgba(255,255,255,0.95);
    border-radius: 16px;
    padding: 16px;
    margin: 15px 0;
    min-height: 200px;
    box-shadow: 0 4px 20px rgba(0,0,0,0.1);
}

.examples-section {
    background: rgba(255,255,255,0.9);
    border-radius: 16px;
    padding: 16px;
    margin: 20px 0;
}

.main-header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    padding: 20px;
    border-radius: 10px;
    margin-bottom: 20px;
    text-align: center;
}

.feature-box {
    background: #f8f9fa;
    padding: 15px;
    border-radius: 8px;
    margin: 10px 0;
    border-left: 4px solid #667eea;
}

.status-indicator {
    display: inline-block;
    padding: 5px 10px;
    border-radius: 15px;
    font-size: 12px;
    font-weight: bold;
    margin: 5px;
}

.status-success {
    background-color: #d4edda;
    color: #155724;
}

.status-processing {
    background-color: #fff3cd;
    color: #856404;
}

.comparison-section {
    border: 1px solid #e0e0e0;
    border-radius: 8px;
    padding: 15px;
    margin: 10px 0;
    background: #fafafa;
}

.language-label {
    font-weight: bold;
    color: #667eea;
    padding: 5px 10px;
    background: #f0f2ff;
    border-radius: 15px;
    display: inline-block;
    margin-bottom: 10px;
    font-size: 14px;
}

.content-compare {
    background: white;
    border: 1px solid #ddd;
    border-radius: 6px;
    padding: 12px;
    min-height: 120px;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
    line-height: 1.5;
}

/* Reset any problematic dropdown styles */
.gradio-container * {
    pointer-events: auto;
}

/* Remove any potential blocking overlays */
.gradio-container::before,
.gradio-container::after {
    display: none;
}

/* Ensure all interactive elements work */
button, select, input, textarea, .gr-dropdown {
    pointer-events: auto !important;
    position: relative !important;
}

/* Simple dropdown fix without complex selectors */
[class*="dropdown"] {
    position: relative !important;
    z-index: 999 !important;
}

[class*="dropdown"] * {
    pointer-events: auto !important;
}

/* Make sure no overlay blocks clicks */
.gradio-container .gr-form {
    position: relative;
    z-index: 1;
}

.gradio-container .gr-block {
    position: relative;
    z-index: 1;
}

.mobile-button {
    width: 100% !important;
    padding: 15px !important;
    font-size: 1.1em !important;
    margin: 20px 0 !important;
    border-radius: 12px !important;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    font-weight: bold !important;
    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
    transition: all 0.3s ease !important;
    cursor: pointer !important;
    position: relative !important;
    overflow: hidden !important;
}

.mobile-button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important;
    background: linear-gradient(135deg, #5a6fd8 0%, #6b4190 100%) !important;
}

.mobile-button:active {
    transform: translateY(0px) !important;
    box-shadow: 0 2px 10px rgba(102, 126, 234, 0.3) !important;
}

/* Ripple effect for button */
.mobile-button::before {
    content: '';
    position: absolute;
    top: 50%;
    left: 50%;
    width: 0;
    height: 0;
    border-radius: 50%;
    background: rgba(255, 255, 255, 0.3);
    transform: translate(-50%, -50%);
    transition: width 0.6s, height 0.6s;
}

.mobile-button:active::before {
    width: 300px;
    height: 300px;
}

/* Loading spinner animation */
@keyframes spin {
    0% { transform: rotate(0deg); }
    100% { transform: rotate(360deg); }
}

.loading-spinner {
    display: inline-block;
    width: 20px;
    height: 20px;
    border: 3px solid rgba(255,255,255,0.3);
    border-radius: 50%;
    border-top-color: white;
    animation: spin 1s ease-in-out infinite;
    margin-right: 10px;
}

/* Button pulse effect when processing */
@keyframes pulse {
    0% {
        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
    }
    50% {
        box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6);
    }
    100% {
        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
    }
}

.button-processing {
    animation: pulse 2s ease-in-out infinite;
    background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%) !important;
}

.mobile-textbox textarea {
    border-radius: 10px !important;
    border: 2px solid #e0e0e0 !important;
    padding: 12px !important;
    font-size: 1em !important;
    line-height: 1.5 !important;
}

.mobile-compare textarea {
    border-radius: 8px !important;
    border: 1px solid #ddd !important;
    padding: 10px !important;
    background: #fafafa !important;
    font-size: 0.95em !important;
}

.mobile-audio {
    margin: 10px 0 !important;
    border-radius: 10px !important;
}

.mobile-file {
    margin: 10px 0 !important;
    border-radius: 10px !important;
}

/* Beautiful Markdown styling for Voice RAG responses */
.markdown-response {
    background: linear-gradient(135deg, #ffffff 0%, #f8fffe 100%);
    border-radius: 12px;
    padding: 20px;
    margin: 15px 0;
    box-shadow: 0 4px 20px rgba(0,0,0,0.1);
    border-left: 4px solid #4CAF50;
}

.markdown-response h1 {
    color: #2e7d32;
    border-bottom: 2px solid #4CAF50;
    padding-bottom: 10px;
    margin-bottom: 20px;
    font-size: 1.8em;
}

.markdown-response h2 {
    color: #388E3C;
    margin-top: 25px;
    margin-bottom: 15px;
    font-size: 1.4em;
    border-left: 3px solid #4CAF50;
    padding-left: 15px;
}

.markdown-response h3 {
    color: #43A047;
    margin-top: 20px;
    margin-bottom: 12px;
    font-size: 1.2em;
}

.markdown-response p {
    line-height: 1.6;
    margin-bottom: 12px;
    color: #333;
}

.markdown-response blockquote {
    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
    border-left: 4px solid #4CAF50;
    padding: 15px 20px;
    margin: 15px 0;
    border-radius: 8px;
    font-style: italic;
    color: #2e7d32;
}

.markdown-response table {
    width: 100%;
    border-collapse: collapse;
    margin: 15px 0;
    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
    border-radius: 8px;
    overflow: hidden;
}

.markdown-response table th {
    background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
    color: white;
    padding: 12px 15px;
    text-align: left;
    font-weight: bold;
}

.markdown-response table td {
    padding: 12px 15px;
    border-bottom: 1px solid #e0e0e0;
    background: white;
}

.markdown-response table tr:nth-child(even) td {
    background: #f8fffe;
}

.markdown-response table tr:hover td {
    background: #e8f5e8;
    transition: background 0.3s ease;
}

.markdown-response ul, .markdown-response ol {
    margin: 15px 0;
    padding-left: 25px;
}

.markdown-response li {
    margin-bottom: 8px;
    line-height: 1.5;
}

.markdown-response code {
    background: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-radius: 4px;
    padding: 2px 6px;
    font-family: 'Courier New', monospace;
    color: #d32f2f;
}

.markdown-response pre {
    background: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-radius: 8px;
    padding: 15px;
    overflow-x: auto;
    margin: 15px 0;
}

.markdown-response pre code {
    background: none;
    border: none;
    padding: 0;
    color: #333;
}

.markdown-response hr {
    border: none;
    height: 2px;
    background: linear-gradient(90deg, transparent, #4CAF50, transparent);
    margin: 25px 0;
}

.markdown-response strong {
    color: #2e7d32;
    font-weight: bold;
}

.markdown-response em {
    color: #388E3C;
    font-style: italic;
}

/* Responsive design for markdown */
@media (max-width: 768px) {
    .markdown-response {
        padding: 15px;
        margin: 10px 0;
    }
    
    .markdown-response table {
        font-size: 0.9em;
    }
    
    .markdown-response h1 {
        font-size: 1.6em;
    }
    
    .markdown-response h2 {
        font-size: 1.3em;
    }
}

/* Mobile responsive breakpoints */
@media (max-width: 768px) {
    .gradio-container {
        padding: 10px !important;
    }
    
    .input-card {
        padding: 12px !important;
        margin: 8px 0 !important;
    }
    
    .output-area {
        padding: 12px !important;
        margin: 10px 0 !important;
    }
    
    .examples-section {
        padding: 12px !important;
    }
    
    .main-header h2 {
        font-size: 1.5em !important;
    }
    
    .main-header p {
        font-size: 1em !important;
    }
    
    /* Mobile layout adjustments - less aggressive */
    .gr-row {
        flex-direction: column;
    }
    
    .gr-column {
        width: 100%;
        margin-bottom: 15px;
    }
}

@media (max-width: 480px) {
    .gradio-container {
        padding: 5px !important;
    }
    
    .input-card {
        padding: 10px !important;
        margin: 5px 0 !important;
    }
    
    .main-header {
        padding: 15px !important;
    }
    
    .main-header h2 {
        font-size: 1.3em !important;
    }
    
    .mobile-button {
        padding: 12px !important;
        font-size: 1em !important;
    }
}

/* JavaScript for button interactions */
"""

# Add JavaScript for button effects
js_code = """
<script>
function addButtonEffects() {
    // Find button by class since Gradio might change IDs
    const buttons = document.querySelectorAll('.mobile-button');
    
    buttons.forEach(button => {
        // Remove existing listeners to avoid duplicates
        button.removeEventListener('click', handleClick);
        
        // Add enhanced click effect
        button.addEventListener('click', handleClick);
        
        // Add hover effects for better interaction
        button.addEventListener('mouseenter', function() {
            if (!this.disabled) {
                this.style.transform = 'translateY(-2px) scale(1.02)';
            }
        });
        
        button.addEventListener('mouseleave', function() {
            if (!this.disabled) {
                this.style.transform = 'translateY(0) scale(1)';
            }
        });
    });
}

function handleClick(e) {
    const button = e.target;
    
    // Immediate visual feedback
    button.style.transform = 'scale(0.98)';
    button.style.transition = 'all 0.1s ease';
    
    setTimeout(() => {
        button.style.transform = 'scale(1)';
        button.style.transition = 'all 0.3s ease';
    }, 100);
    
    // Add processing state
    const originalText = button.innerHTML;
    button.innerHTML = '<span class="loading-spinner"></span>⏳ ĐANG XỬ LÝ...';
    button.classList.add('button-processing');
    button.disabled = true;
    
    // Monitor for completion and reset
    let checkCount = 0;
    const checkInterval = setInterval(() => {
        checkCount++;
        
        // Reset after 15 seconds max or if status changes
        const statusElements = document.querySelectorAll('[style*="Hoàn thành"]');
        if (statusElements.length > 0 || checkCount > 50) {
            clearInterval(checkInterval);
            button.innerHTML = originalText;
            button.classList.remove('button-processing');
            button.disabled = false;
            button.style.transform = 'scale(1)';
        }
    }, 300);
}

// Initialize when DOM is ready
if (document.readyState === 'loading') {
    document.addEventListener('DOMContentLoaded', addButtonEffects);
} else {
    addButtonEffects();
}

// Re-initialize periodically for Gradio updates
setInterval(addButtonEffects, 2000);
</script>
"""

# Create interface with tabs
with gr.Blocks(css=css, title="🎙️ Voice AI Platform - Voice RAG & Audio Translation") as demo:
    # Simplified header for faster loading on HF Spaces
    if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
        # Only load complex microphone permissions in local development
        gr.HTML("""
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <meta http-equiv="Permissions-Policy" content="microphone=*, camera=*, display-capture=*, autoplay=*">
        <meta http-equiv="Feature-Policy" content="microphone 'self' *; camera 'self' *; autoplay 'self' *">
        <meta name="theme-color" content="#4A90E2">
    
    <script>
    // Global microphone management
    window.microphoneStatus = {
        granted: false,
        requested: false,
        supported: false
    };
    
    // Enhanced microphone permission request for iframe and main window
    function initializeMicrophoneSupport() {
        console.log('🎤 Initializing microphone support...');
        
        // Check browser support
        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
            window.microphoneStatus.supported = true;
            console.log('✅ Browser supports microphone');
            
            // Check current permission status
            if (navigator.permissions) {
                navigator.permissions.query({name: 'microphone'}).then(function(result) {
                    console.log('🔐 Current microphone permission:', result.state);
                    window.microphoneStatus.granted = (result.state === 'granted');
                    
                    // Update UI based on permission status
                    updateMicrophoneUI(result.state);
                    
                    // Listen for permission changes
                    result.onchange = function() {
                        console.log('🔄 Microphone permission changed to:', this.state);
                        window.microphoneStatus.granted = (this.state === 'granted');
                        updateMicrophoneUI(this.state);
                    };
                }).catch(function(err) {
                    console.log('⚠️ Permission query failed:', err);
                });
            }
            
            // Auto-request permissions if we're in iframe (with user gesture simulation)
            if (window.location !== window.parent.location && !window.microphoneStatus.requested) {
                console.log('🖼️ Running in iframe - preparing microphone access');
                window.microphoneStatus.requested = true;
                
                // Add a global click listener to request permissions on first interaction
                document.addEventListener('click', function requestOnFirstClick() {
                    if (!window.microphoneStatus.granted) {
                        console.log('👆 First click detected - requesting microphone access');
                        requestMicrophonePermission();
                        document.removeEventListener('click', requestOnFirstClick);
                    }
                }, { once: true });
            }
        } else {
            console.log('❌ Browser does not support microphone');
            window.microphoneStatus.supported = false;
            updateMicrophoneUI('unsupported');
        }
    }
    
    function requestMicrophonePermission() {
        console.log('🎤 Requesting microphone permission...');
        
        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
            navigator.mediaDevices.getUserMedia({ 
                audio: {
                    echoCancellation: true,
                    noiseSuppression: true,
                    autoGainControl: true,
                    sampleRate: 44100
                } 
            })
            .then(function(stream) {
                console.log('✅ Microphone access granted');
                window.microphoneStatus.granted = true;
                
                // Stop the stream immediately (we just wanted permission)
                stream.getTracks().forEach(track => track.stop());
                
                updateMicrophoneUI('granted');
                
                // Notify other parts of the app
                window.dispatchEvent(new CustomEvent('microphoneGranted'));
                
            })
            .catch(function(err) {
                console.log('❌ Microphone access denied:', err);
                window.microphoneStatus.granted = false;
                updateMicrophoneUI('denied', err.message);
            });
        }
    }
    
    function updateMicrophoneUI(status, errorMessage = '') {
        // This will be called by the specific UI components
        console.log('🎛️ Updating microphone UI for status:', status);
        window.dispatchEvent(new CustomEvent('microphoneStatusChanged', { 
            detail: { status, errorMessage } 
        }));
    }
    
    // Initialize when DOM is ready
    if (document.readyState === 'loading') {
        document.addEventListener('DOMContentLoaded', initializeMicrophoneSupport);
    } else {
        initializeMicrophoneSupport();
    }
    
    // Also initialize on any dynamic content changes (for Gradio updates)
    if (window.MutationObserver) {
        const observer = new MutationObserver(function(mutations) {
            mutations.forEach(function(mutation) {
                if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
                    // Check if audio components were added
                    const hasAudioComponent = Array.from(mutation.addedNodes).some(node => 
                        node.nodeType === 1 && (
                            node.querySelector && (
                                node.querySelector('audio') || 
                                node.querySelector('[data-testid*="audio"]') ||
                                node.classList.contains('audio')
                            )
                        )
                    );
                    
                    if (hasAudioComponent) {
                        console.log('🔄 Audio component detected, re-initializing microphone');
                        setTimeout(initializeMicrophoneSupport, 500);
                    }
                }
            });
        });
        
        observer.observe(document.body, {
            childList: true,
            subtree: true
        });
    }
    </script>
    
    <div style="text-align: center; background: linear-gradient(135deg, #4A90E2 0%, #FF6B9D 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
        <h1>🎙️ Voice AI Platform</h1>
        <p>Voice RAG, Audio Translation và Voice Studio - Nền tảng AI giọng nói toàn diện</p>
        <div style="margin-top: 10px; font-size: 14px; opacity: 0.9;">
            ✨ Tính năng mới: Voice RAG với 24 giọng nói đa ngôn ngữ
        </div>
        <div style="margin-top: 8px;">🧠 <strong>Digitized Brains</strong></div>
    </div>
    """)
    else:
        # Production mode - minimal header
        gr.HTML('<div style="text-align:center;"><h1>🎙️ Voice AI Platform</h1></div>')
    
    with gr.Tabs():
        # Tab 1: Voice RAG
        with gr.TabItem("📚 Voice RAG"):
            # Header section with hf_voice style
            gr.HTML("""
            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
                    <h4>📚 Voice RAG</h4>
                    <p style="margin: 0; font-size: 12px;">Hỏi đáp tài liệu thông minh</p>
                </div>
                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
                    <h4>🌍 Multi-Language</h4>
                    <p style="margin: 0; font-size: 12px;">13 ngôn ngữ trả lời</p>
                </div>
                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
                    <h4>🎤 Voice Output</h4>
                    <p style="margin: 0; font-size: 12px;">24 giọng nói đa dạng</p>
                </div>
                <div style="background: linear-gradient(135deg, #A8E6CF 0%, #88D8A3 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
                    <h4>🔄 AI Gemini</h4>
                    <p style="margin: 0; font-size: 12px;">Gemini 2.0 Flash</p>
                </div>
            </div>
            """)
            
            gr.Markdown("### 📝 Upload tài liệu và đặt câu hỏi")
            
            # Input section - Mobile optimized
            with gr.Column():
                # Document upload
                with gr.Row():
                    file_upload_rag = gr.File(
                        label="📎 Tải lên tài liệu (PDF, DOCX, TXT)", 
                        file_types=[".pdf", ".docx", ".txt"]
                    )
                
                # Question input
                with gr.Row():
                    question_input_rag = gr.Textbox(
                        label="❓ Câu hỏi của bạn",
                        placeholder="Hãy đặt câu hỏi về nội dung tài liệu...",
                        lines=3
                    )
                
                # Language selection for answer
                with gr.Row():
                    answer_language_dropdown_rag = gr.Dropdown(
                        choices=SUPPORTED_LANGUAGES,
                        value="Vietnamese",
                        label="🌍 Ngôn ngữ trả lời"
                    )
                
                # Voice selection từ Voice Studio
                with gr.Row():
                    with gr.Column(scale=1):
                        rag_country_dropdown = gr.Dropdown(
                            choices=list(voice_choices_by_country.keys()),
                            value="🇻🇳 Việt Nam",
                            label="🌍 Chọn quốc gia giọng nói"
                        )
                    
                    with gr.Column(scale=1):
                        rag_voice_dropdown = gr.Dropdown(
                            choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                            value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                            label="🎭 Chọn giọng nói"
                        )
                
                # Format selection for download
                with gr.Row():
                    rag_text_format_dropdown = gr.Dropdown(
                        choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                        value="Markdown (.md)",
                        label="📄 Định dạng file trả lời"
                    )
                
                # Process button
                with gr.Row():
                    submit_btn_rag = gr.Button(
                        "🚀 Xử lý tài liệu và trả lời", 
                        variant="primary",
                        size="lg"
                    )
            
            # Results section - Mobile optimized
            with gr.Column():
                # Document info section
                with gr.Accordion("📄 Thông tin tài liệu", open=True):
                    detected_doc_language_rag = gr.Textbox(
                        label="🌐 Ngôn ngữ tài liệu được phát hiện",
                        lines=1,
                        interactive=False,
                        placeholder="Tự động nhận diện ngôn ngữ tài liệu..."
                    )
                
                # Text answer section
                with gr.Accordion("💬 Câu trả lời", open=True):
                    gr.HTML("""
                    <div style="
                        background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
                        padding: 15px;
                        border-radius: 12px;
                        margin: 15px 0;
                        border-left: 4px solid #4CAF50;
                        text-align: center;
                    ">
                        <h4 style="margin: 0 0 10px 0; color: #2e7d32;">💬 AI Response with Markdown Formatting</h4>
                        <p style="color: #388E3C; margin: 0; font-style: italic;">
                            Formatted response with tables, headers, and beautiful layout
                        </p>
                    </div>
                    """)
                    
                    answer_output_rag = gr.Markdown(
                        value="**Câu trả lời sẽ xuất hiện ở đây sau khi xử lý...**\n\n*Hỗ trợ format Markdown với tables, headers, lists và nhiều style khác*",
                        label="",
                        show_label=False,
                        elem_classes=["markdown-response"]
                    )
                
                # Downloads section - Mobile optimized
                with gr.Accordion("💾 Tải xuống kết quả", open=True):
                    gr.HTML("""
                    <div style="text-align: center; margin-bottom: 15px;">
                        <p style="color: #666; font-style: italic;">Tải xuống câu trả lời dưới dạng file và audio</p>
                    </div>
                    """)
                    
                    # Stack vertically on mobile
                    with gr.Column():
                        # Audio download section
                        with gr.Row():
                            audio_output_rag = gr.Audio(
                                label="🔊 Audio câu trả lời",
                                type="filepath"
                            )
                        
                        # Text download section  
                        with gr.Row():
                            text_output_rag = gr.File(
                                label="📄 Văn bản câu trả lời",
                                file_count="single",
                                file_types=[".md", ".txt", ".docx"]
                            )
            
            # Status indicator for RAG
            rag_status_text = gr.HTML("""
            <div style="text-align: center; margin: 20px 0;">
                <div style="
                    background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
                    color: white;
                    padding: 15px;
                    border-radius: 12px;
                    box-shadow: 0 4px 15px rgba(78,205,196,0.3);
                ">
                    <span style="font-weight: bold; font-size: 1.1em;">✅ Sẵn sàng xử lý tài liệu</span>
                </div>
            </div>
            """)
            
            # Helper function for RAG format
            def get_rag_format_from_dropdown(format_choice):
                if "Word" in format_choice or "docx" in format_choice:
                    return "docx"
                elif "Markdown" in format_choice or "md" in format_choice:
                    return "md"
                return "txt"
            
            # RAG processing function
            def update_rag_status_processing():
                return """
                <div style="text-align: center; margin: 20px 0;">
                    <div style="
                        background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%);
                        color: white;
                        padding: 15px;
                        border-radius: 12px;
                        box-shadow: 0 4px 15px rgba(255,142,83,0.3);
                    ">
                        <span style="font-weight: bold; font-size: 1.1em;">⏳ Đang xử lý tài liệu...</span>
                    </div>
                </div>
                """
            
            def update_rag_status_complete():
                return """
                <div style="text-align: center; margin: 20px 0;">
                    <div style="
                        background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
                        color: white;
                        padding: 15px;
                        border-radius: 12px;
                        box-shadow: 0 4px 15px rgba(78,205,196,0.3);
                    ">
                        <span style="font-weight: bold; font-size: 1.1em;">✅ Xử lý hoàn thành!</span>
                    </div>
                </div>
                """
            
            # Event handlers for Voice RAG
            rag_country_dropdown.change(
                fn=update_voices,
                inputs=[rag_country_dropdown],
                outputs=[rag_voice_dropdown]
            )
            
            submit_btn_rag.click(
                fn=lambda: update_rag_status_processing(),
                outputs=[rag_status_text]
            ).then(
                fn=lambda file, question, lang, voice, fmt: voice_rag_pipeline(file, question, lang, voice, get_rag_format_from_dropdown(fmt)),
                inputs=[file_upload_rag, question_input_rag, answer_language_dropdown_rag, rag_voice_dropdown, rag_text_format_dropdown],
                outputs=[answer_output_rag, detected_doc_language_rag, audio_output_rag, text_output_rag]
            ).then(
                fn=lambda: update_rag_status_complete(),
                outputs=[rag_status_text]
            )
        
        # Voice Studio Tab
        with gr.TabItem("🎤 Voice Studio"):
            gr.HTML("""
            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🇻🇳 Tiếng Việt</h4>
                    <p style="margin: 0; font-size: 12px;">2 giọng chuẩn</p>
                    <p style="margin: 0; font-size: 10px;">HoaiMy • NamMinh</p>
                </div>
                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🇺🇸🇬🇧 English</h4>
                    <p style="margin: 0; font-size: 12px;">4 giọng chuẩn</p>
                    <p style="margin: 0; font-size: 10px;">US • UK</p>
                </div>
                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🌍 Đa ngôn ngữ</h4>
                    <p style="margin: 0; font-size: 12px;">20 giọng chuẩn</p>
                    <p style="margin: 0; font-size: 10px;">10 ngôn ngữ</p>
                </div>
            </div>
            """)
            
            gr.Markdown("### 📝 Nhập nội dung và chọn giọng nói")
            
            with gr.Row():
                text_input = gr.Textbox(
                    placeholder="Nhập văn bản cần chuyển thành giọng nói...",
                    lines=4,
                    label="Văn bản",
                    scale=2
                )
            
            with gr.Row():
                with gr.Column(scale=1):
                    country_dropdown = gr.Dropdown(
                        choices=list(voice_choices_by_country.keys()),
                        value="🇻🇳 Việt Nam",
                        label="🌍 Chọn quốc gia"
                    )
                
                with gr.Column(scale=1):
                    voice_dropdown = gr.Dropdown(
                        choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                        value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                        label="🎭 Chọn giọng nói"
                    )
            
            with gr.Row():
                with gr.Column(scale=2):
                    speed_slider = gr.Slider(
                        minimum=0.5,
                        maximum=2.0,
                        value=1.0,
                        step=0.1,
                        label="⚡ Tốc độ phát"
                    )
                with gr.Column(scale=1):
                    voice_studio_format_dropdown = gr.Dropdown(
                        choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                        value="Markdown (.md)",
                        label="📄 Định dạng file tải xuống"
                    )
            
            # Translation feature
            with gr.Row():
                with gr.Column(scale=1):
                    translate_checkbox = gr.Checkbox(
                        label="🌍 Dịch văn bản trước khi tạo giọng nói",
                        value=False
                    )
                with gr.Column(scale=2):
                    translate_btn = gr.Button("🔄 DỊCH VĂN BẢN", variant="secondary", size="lg", visible=False)
                
            # Show translated text when translation is enabled
            translated_text_output = gr.Textbox(
                label="📝 Văn bản đã dịch",
                lines=3,
                interactive=True,
                visible=False,
                placeholder="Văn bản sau khi dịch sẽ hiển thị ở đây..."
            )
                
            generate_btn = gr.Button("🎵 TẠO GIỌNG NÓI", variant="primary", size="lg")
            
            # Status indicator for Voice Studio
            studio_status_text = gr.HTML("""
            <div style="text-align: center; margin: 20px 0;">
                <div style="
                    background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
                    color: white;
                    padding: 15px;
                    border-radius: 12px;
                    box-shadow: 0 4px 15px rgba(78,205,196,0.3);
                ">
                    <span style="font-weight: bold; font-size: 1.1em;">⚡ Sẵn sàng tạo giọng nói</span>
                </div>
            </div>
            """)
            
            gr.Markdown("### 🎧 Kết quả âm thanh")
            audio_output_vs = gr.HTML(
                value="<p style='text-align: center; color: #666; padding: 40px;'>Nhấn 'TẠO GIỌNG NÓI' để bắt đầu 🎤</p>"
            )
            
            # Download section for Voice Studio
            with gr.Accordion("💾 Tải xuống kết quả", open=False):
                gr.HTML("""
                <div style="
                    background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
                    padding: 15px;
                    border-radius: 12px;
                    margin: 15px 0;
                    border-left: 4px solid #2196F3;
                    text-align: center;
                ">
                    <h4 style="margin: 0 0 10px 0; color: #1976D2;">📄 Tải xuống văn bản với Markdown formatting</h4>
                    <p style="color: #1565C0; margin: 0; font-style: italic;">
                        File chứa thông tin session, cấu hình giọng nói và technical details
                    </p>
                </div>
                """)
                
                voice_studio_text_output = gr.File(
                    label="📄 Văn bản với thông tin chi tiết",
                    file_count="single",
                    file_types=[".md", ".txt", ".docx"]
                )
            
            # Examples section
            gr.Markdown("### 📚 Ví dụ nhanh")
            with gr.Row():
                example_vn = gr.Button("🇻🇳 Tiếng Việt", size="sm")
                example_en = gr.Button("🇺🇸 English", size="sm") 
                example_de = gr.Button("🇩🇪 Deutsch", size="sm")
                example_translate = gr.Button("🌍 Dịch thuật", size="sm")
            
            # Example button functions
            def load_vn_example():
                return "Xin chào! Chào mừng bạn đến với studio giọng nói.", "🇻🇳 Việt Nam"
            
            def load_en_example():
                return "Hello! Welcome to our voice studio.", "🇺🇸 Hoa Kỳ"
                
            def load_de_example():
                return "Hallo! Willkommen in unserem Sprachstudio.", "🇩🇪 Đức"
            
            def load_translate_example():
                return "Hello! This is an example text for translation.", "🇺🇸 Hoa Kỳ", True
            
            # Translation functions
            def toggle_translation_ui(translate_enabled):
                """Show/hide translation UI elements"""
                return (
                    gr.update(visible=translate_enabled),  # translate_btn
                    gr.update(visible=translate_enabled)   # translated_text_output
                )
            
            def translate_text_interface(text, voice_selection):
                """Translate text for Voice Studio"""
                if not text.strip():
                    return "Vui lòng nhập văn bản trước khi dịch"
                
                target_language = get_target_language_from_voice(voice_selection)
                translated = translate_text_with_gemini(text, target_language)
                return translated
            
            def create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format="txt"):
                """Create voice using original or translated text"""
                if translate_enabled and translated_text.strip() and not translated_text.startswith("Lỗi"):
                    # Use translated text
                    return create_audio_voice_studio(translated_text, voice_selection, speed, text_format)
                else:
                    # Use original text
                    return create_audio_voice_studio(original_text, voice_selection, speed, text_format)
                
            # Event handlers for Voice Studio
            country_dropdown.change(
                fn=update_voices,
                inputs=[country_dropdown],
                outputs=[voice_dropdown]
            )
            
            example_vn.click(
                fn=load_vn_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_en.click(
                fn=load_en_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_de.click(
                fn=load_de_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_translate.click(
                fn=load_translate_example,
                outputs=[text_input, country_dropdown, translate_checkbox]
            )
            
            # Translation UI toggle
            translate_checkbox.change(
                fn=toggle_translation_ui,
                inputs=[translate_checkbox],
                outputs=[translate_btn, translated_text_output]
            )
            
            # Translation button
            translate_btn.click(
                fn=translate_text_interface,
                inputs=[text_input, voice_dropdown],
                outputs=[translated_text_output]
            )
            
            # Helper function to extract format and process Voice Studio
            def process_voice_studio(original_text, translated_text, translate_enabled, voice_selection, speed, format_choice):
                """Process Voice Studio with format support"""
                # Extract format from dropdown
                if "Markdown" in format_choice:
                    text_format = "md"
                elif "Word" in format_choice:
                    text_format = "docx"
                else:
                    text_format = "txt"
                
                return create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format)
            
            # Generate voice with translation support
            generate_btn.click(
                fn=process_voice_studio,
                inputs=[text_input, translated_text_output, translate_checkbox, voice_dropdown, speed_slider, voice_studio_format_dropdown],
                outputs=[audio_output_vs, voice_studio_text_output]
            )
        
        # Audio Translation Tab
        with gr.TabItem("🎙️ Audio Translation"):
            # Colorful feature cards like Voice Studio
            gr.HTML("""
            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🎤 Ghi âm</h4>
                    <p style="margin: 0; font-size: 12px;">Microphone</p>
                    <p style="margin: 0; font-size: 10px;">Real-time</p>
                </div>
                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>📁 Upload</h4>
                    <p style="margin: 0; font-size: 12px;">Audio Files</p>
                    <p style="margin: 0; font-size: 10px;">WAV • MP3</p>
                </div>
                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🔄 AI Dịch</h4>
                    <p style="margin: 0; font-size: 12px;">13 ngôn ngữ</p>
                    <p style="margin: 0; font-size: 10px;">Gemini 2.0</p>
                </div>
                <div style="background: linear-gradient(135deg, #A855F7 0%, #EC4899 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
                    <h4>🎵 Tổng hợp</h4>
                    <p style="margin: 0; font-size: 12px;">Neural TTS</p>
                    <p style="margin: 0; font-size: 10px;">26 giọng</p>
                </div>
            </div>
            """)
            
            # Input section with colorful design
            gr.HTML("""
            <div style="
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                padding: 20px;
                border-radius: 15px;
                margin: 20px 0;
                text-align: center;
                box-shadow: 0 8px 32px rgba(0,0,0,0.2);
            ">
                <h3 style="margin: 0 0 10px 0;">🎤 Tải lên file audio hoặc ghi âm trực tiếp</h3>
                <p style="margin: 0; opacity: 0.9; font-size: 0.95em;">
                    Hỗ trợ file WAV, MP3 hoặc ghi âm real-time qua microphone
                </p>
            </div>
            """)
            
            # Enhanced microphone permission notice and controls
            if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
                gr.HTML("""
                <div id="microphone-section" style="margin: 15px 0;">
                    <!-- Microphone Status Indicator -->
                    <div id="mic-status" style="
                    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
                    color: #2e7d32;
                    padding: 12px;
                    border-radius: 8px;
                    margin-bottom: 10px;
                    text-align: center;
                    border: 1px solid #4caf50;
                    display: none;
                ">
                    <strong>🎤 Microphone Ready</strong> - Bạn có thể ghi âm trực tiếp
                </div>
                
                <!-- Microphone Error/Permission Notice -->
                <div id="microphone-notice" style="
                    background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);
                    color: #856404;
                    padding: 15px;
                    border-radius: 10px;
                    border: 1px solid #ffeaa7;
                    text-align: center;
                    display: none;
                ">
                    <strong>🎤 Microphone Access Required</strong><br>
                    Để sử dụng ghi âm, vui lòng cho phép truy cập microphone.<br>
                    <button onclick="requestMicrophoneAccess()" style="
                        background: #4caf50; 
                        color: white; 
                        padding: 8px 16px; 
                        border: none;
                        border-radius: 6px;
                        cursor: pointer;
                        margin: 8px 4px;
                    ">🎤 Kích hoạt Microphone</button>
                    <a href="#" onclick="window.open(window.location.href, '_blank')" style="
                        background: #667eea; 
                        color: white; 
                        padding: 8px 16px; 
                        text-decoration: none; 
                        border-radius: 6px;
                        display: inline-block;
                        margin: 8px 4px;
                    ">🔗 Mở cửa sổ mới</a>
                </div>
                
                <!-- Iframe Warning -->
                <div id="iframe-warning" style="
                    background: linear-gradient(135deg, #ffebee 0%, #ffcdd2 100%);
                    color: #c62828;
                    padding: 12px;
                    border-radius: 8px;
                    border: 1px solid #f44336;
                    text-align: center;
                    display: none;
                ">
                    <strong>⚠️ Iframe Restriction</strong><br>
                    Microphone có thể bị hạn chế trong iframe. 
                    <a href="#" onclick="window.open(window.location.href, '_blank')" style="color: #c62828; text-decoration: underline;">
                        Mở trong cửa sổ mới
                    </a> để sử dụng đầy đủ tính năng.
                </div>
            </div>
            
            <script>
            // Enhanced microphone permission handling
            let microphoneAccess = false;
            
            function requestMicrophoneAccess() {
                console.log('🎤 Audio Translation: Requesting microphone access...');
                
                // Use global microphone function if available
                if (window.requestMicrophonePermission) {
                    window.requestMicrophonePermission();
                    return;
                }
                
                // Fallback to local implementation
                if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
                    navigator.mediaDevices.getUserMedia({ 
                        audio: {
                            echoCancellation: true,
                            noiseSuppression: true,
                            autoGainControl: true,
                            sampleRate: 44100
                        } 
                    })
                    .then(function(stream) {
                        console.log('✅ Audio Translation: Microphone access granted');
                        microphoneAccess = true;
                        
                        // Show success status
                        updateLocalMicrophoneUI('granted');
                        
                        // Stop the stream (we just wanted permission)
                        stream.getTracks().forEach(track => track.stop());
                        
                        // Trigger Gradio audio component refresh
                        setTimeout(() => {
                            const audioComponents = document.querySelectorAll('[data-testid*="audio"]');
                            audioComponents.forEach(comp => {
                                // Try to trigger a refresh or re-initialization
                                if (comp.click) comp.click();
                            });
                        }, 500);
                        
                        // Update global status if available
                        if (window.microphoneStatus) {
                            window.microphoneStatus.granted = true;
                        }
                        
                    })
                    .catch(function(err) {
                        console.log('❌ Audio Translation: Microphone access denied:', err);
                        updateLocalMicrophoneUI('denied', err.message);
                    });
                } else {
                    console.log('❌ getUserMedia not supported');
                    updateLocalMicrophoneUI('unsupported');
                }
            }
            
            function updateLocalMicrophoneUI(status, errorMessage = '') {
                const micStatus = document.getElementById('mic-status');
                const micNotice = document.getElementById('microphone-notice');
                
                switch(status) {
                    case 'granted':
                        if (micStatus) micStatus.style.display = 'block';
                        if (micNotice) micNotice.style.display = 'none';
                        microphoneAccess = true;
                        break;
                        
                    case 'denied':
                        if (micNotice) {
                            micNotice.style.display = 'block';
                            micNotice.innerHTML = `
                                <strong>❌ Microphone Access Denied</strong><br>
                                Lỗi: ${errorMessage}<br>
                                Vui lòng kiểm tra cài đặt trình duyệt và cho phép microphone.
                                <br><br>
                                <button onclick="requestMicrophoneAccess()" style="
                                    background: #ff9800; 
                                    color: white; 
                                    padding: 8px 16px; 
                                    border: none;
                                    border-radius: 6px;
                                    cursor: pointer;
                                    margin: 4px;
                                ">🔄 Thử lại</button>
                                <button onclick="window.open(window.location.href, '_blank')" style="
                                    background: #2196f3; 
                                    color: white; 
                                    padding: 8px 16px; 
                                    border: none;
                                    border-radius: 6px;
                                    cursor: pointer;
                                    margin: 4px;
                                ">🔗 Mở cửa sổ mới</button>
                            `;
                        }
                        break;
                        
                    case 'unsupported':
                        if (micNotice) {
                            micNotice.style.display = 'block';
                            micNotice.innerHTML = `
                                <strong>❌ Microphone Not Supported</strong><br>
                                Trình duyệt của bạn không hỗ trợ ghi âm.<br>
                                Vui lòng sử dụng Chrome, Firefox, Safari hoặc Edge phiên bản mới.
                                <br><br>
                                <a href="https://caniuse.com/stream" target="_blank" style="
                                    color: #856404;
                                    text-decoration: underline;
                                ">Kiểm tra tương thích trình duyệt</a>
                            `;
                        }
                        break;
                        
                    default:
                        if (micNotice) {
                            micNotice.style.display = 'block';
                        }
                        break;
                }
            }
            
            // Listen for global microphone events
            window.addEventListener('microphoneStatusChanged', function(event) {
                console.log('🔄 Audio Translation: Received microphone status update:', event.detail);
                updateLocalMicrophoneUI(event.detail.status, event.detail.errorMessage);
            });
            
            window.addEventListener('microphoneGranted', function() {
                console.log('✅ Audio Translation: Global microphone granted');
                updateLocalMicrophoneUI('granted');
            });
            
            // Check microphone availability on load
            function checkMicrophoneAvailability() {
                console.log('🔍 Audio Translation: Checking microphone availability...');
                
                // Check global status first
                if (window.microphoneStatus) {
                    if (window.microphoneStatus.granted) {
                        updateLocalMicrophoneUI('granted');
                        return;
                    } else if (!window.microphoneStatus.supported) {
                        updateLocalMicrophoneUI('unsupported');
                        return;
                    }
                }
                
                // Check if we're in an iframe
                if (window.location !== window.parent.location) {
                    console.log('Running in iframe');
                    const iframeWarning = document.getElementById('iframe-warning');
                    if (iframeWarning) {
                        setTimeout(() => {
                            iframeWarning.style.display = 'block';
                        }, 1000);
                    }
                }
                
                // Try to get microphone permissions
                if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
                    // Check if we already have permission
                    navigator.permissions.query({name: 'microphone'}).then(function(result) {
                        console.log('Microphone permission status:', result.state);
                        
                        if (result.state === 'granted') {
                            const micStatus = document.getElementById('mic-status');
                            if (micStatus) micStatus.style.display = 'block';
                            microphoneAccess = true;
                        } else if (result.state === 'prompt' || result.state === 'denied') {
                            const micNotice = document.getElementById('microphone-notice');
                            if (micNotice) {
                                setTimeout(() => {
                                    micNotice.style.display = 'block';
                                }, 1500);
                            }
                        }
                        
                        // Listen for permission changes
                        result.onchange = function() {
                            console.log('Microphone permission changed to:', this.state);
                            if (this.state === 'granted') {
                                const micStatus = document.getElementById('mic-status');
                                const micNotice = document.getElementById('microphone-notice');
                                if (micStatus) micStatus.style.display = 'block';
                                if (micNotice) micNotice.style.display = 'none';
                                microphoneAccess = true;
                            }
                        };
                    }).catch(function(err) {
                        console.log('Permission query failed:', err);
                        // Fallback to showing the notice
                        setTimeout(() => {
                            const micNotice = document.getElementById('microphone-notice');
                            if (micNotice) micNotice.style.display = 'block';
                        }, 2000);
                    });
                } else {
                    // Browser doesn't support getUserMedia
                    setTimeout(() => {
                        const micNotice = document.getElementById('microphone-notice');
                        if (micNotice) {
                            micNotice.style.display = 'block';
                            micNotice.innerHTML = `
                                <strong>❌ Microphone Not Supported</strong><br>
                                Trình duyệt không hỗ trợ ghi âm. Vui lòng cập nhật trình duyệt.
                            `;
                        }
                    }, 1000);
                }
            }
            
            // Initialize when DOM is ready
            if (document.readyState === 'loading') {
                document.addEventListener('DOMContentLoaded', checkMicrophoneAvailability);
            } else {
                checkMicrophoneAvailability();
            }
            
            // Re-check periodically for dynamic content
            setInterval(checkMicrophoneAvailability, 5000);
            </script>
            """)
            else:
                # Production mode - simple microphone notice
                gr.HTML('<div style="text-align:center;color:#666;padding:10px;">📎 Upload audio file or use microphone</div>')
            
            audio_input = gr.Audio(
                label="📎 Tải lên file audio hoặc ghi âm trực tiếp",
                type="numpy",  # Use numpy to avoid temp file issues
                sources=["upload", "microphone"],
                show_label=True,
                interactive=True,
                elem_id="audio-input-translation"
            )
            
            # Audio Recording Control Buttons
            with gr.Row():
                save_recording_btn = gr.Button(
                    "💾 Save Recording",
                    variant="secondary",
                    size="sm"
                )
                new_recording_btn = gr.Button(
                    "🎙️ New Record", 
                    variant="primary",
                    size="sm"
                )
            
            # Button descriptions
            gr.HTML("""
            <div style="display: flex; justify-content: space-between; margin: 5px 0 15px 0; font-size: 0.8em; color: #666;">
                <span>💾 Lưu file audio hiện tại vào record_data</span>
                <span>🎙️ Xóa audio hiện tại để ghi âm mới</span>
            </div>
            """)
            
            # Status for recording actions
            recording_status = gr.HTML(
                value="<p style='text-align: center; color: #666; font-style: italic;'>Sẵn sàng ghi âm hoặc tải lên file</p>"
            )
            
            # === RECORDED FILES FUNCTIONS ===
            def refresh_recorded_files():
                """Refresh the list of recorded files"""
                files = get_recorded_files()
                print(f"🔄 Refreshing dropdown - found files: {files}")
                return gr.Dropdown(choices=files, value=None)
            
            def load_recorded_file(filename):
                """Load selected recorded file for playback"""
                print(f"🎵 Loading recorded file: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Full path: {file_path}")
                    if os.path.exists(file_path):
                        file_size = os.path.getsize(file_path)
                        print(f"✅ File exists, size: {file_size} bytes")
                        
                        try:
                            # Load audio as numpy array for Gradio compatibility
                            import soundfile as sf
                            audio_data, sample_rate = sf.read(file_path)
                            print(f"🎵 Loaded audio: shape={audio_data.shape}, sr={sample_rate}")
                            # Return tuple (sample_rate, audio_data) for Gradio numpy type
                            return (sample_rate, audio_data)
                        except Exception as e:
                            print(f"❌ Error loading audio: {e}")
                            return None
                    else:
                        print(f"❌ File not found: {file_path}")
                        print(f"📁 Directory contents: {os.listdir(os.path.dirname(file_path)) if os.path.exists(os.path.dirname(file_path)) else 'Directory not found'}")
                else:
                    print("❌ No filename provided")
                return None
            
            def use_recorded_for_translation(filename, country, voice, fmt):
                """Use selected recorded file for translation"""
                print(f"🔄 Using recorded file for translation: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Translation file path: {file_path}")
                    if os.path.exists(file_path):
                        print(f"✅ Starting translation for: {filename}")
                        # Use the same translation function
                        return translate_audio(file_path, country, voice, get_format_from_dropdown(fmt))
                    else:
                        print(f"❌ File not found for translation: {file_path}")
                # Return empty results if no file selected
                print("❌ No file selected for translation")
                return "", "", "", "", None, "", "", None
            
            def prepare_recorded_file_download(filename):
                """Prepare recorded file for download"""
                print(f"📥 Preparing download for: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Download file path: {file_path}")
                    if os.path.exists(file_path):
                        print(f"✅ File ready for download: {filename}")
                        return file_path
                    else:
                        print(f"❌ Download file not found: {file_path}")
                print("❌ No file selected for download")
                return None
            
            def save_current_recording(audio_file):
                """Save current audio recording to record_data"""
                if audio_file is None:
                    current_files = get_recorded_files()
                    return (
                        "<p style='color: #e74c3c; text-align: center;'>❌ Không có file audio để lưu</p>",
                        gr.Dropdown(choices=current_files, value=None)
                    )
                
                try:
                    saved_path = save_recorded_audio(audio_file)
                    if saved_path:
                        saved_filename = os.path.basename(saved_path)
                        # Get updated file list after saving
                        updated_files = get_recorded_files()
                        print(f"🔄 After save - updated files: {updated_files}")
                        return (
                            f"<p style='color: #27ae60; text-align: center;'>✅ Đã lưu: {saved_filename}</p>",
                            gr.Dropdown(choices=updated_files, value=saved_filename)
                        )
                    else:
                        current_files = get_recorded_files()
                        return (
                            "<p style='color: #e74c3c; text-align: center;'>❌ Lỗi khi lưu file</p>",
                            gr.Dropdown(choices=current_files, value=None)
                        )
                except Exception as e:
                    current_files = get_recorded_files()
                    return (
                        f"<p style='color: #e74c3c; text-align: center;'>❌ Lỗi: {str(e)}</p>",
                        gr.Dropdown(choices=current_files, value=None)
                    )
            
            def clear_audio_for_new_recording():
                """Clear audio input for new recording"""
                return (
                    None,  # Clear audio input
                    "<p style='color: #3498db; text-align: center;'>🎙️ Sẵn sàng ghi âm mới</p>"
                )
            
            def delete_selected_file(filename):
                """Delete selected file and refresh dropdown"""
                if not filename or not filename.strip():
                    current_files = get_recorded_files()
                    return (
                        "<p style='color: #e74c3c; text-align: center;'>❌ Vui lòng chọn file để xóa</p>",
                        gr.Dropdown(choices=current_files, value=None),
                        None  # Clear audio player
                    )
                
                # Delete the file
                delete_result = delete_recorded_file(filename)
                
                # Refresh file list
                updated_files = get_recorded_files()
                
                # Determine status color based on result
                if "✅" in delete_result:
                    status_html = f"<p style='color: #27ae60; text-align: center;'>{delete_result}</p>"
                else:
                    status_html = f"<p style='color: #e74c3c; text-align: center;'>{delete_result}</p>"
                
                return (
                    status_html,
                    gr.Dropdown(choices=updated_files, value=None),
                    None  # Clear audio player
                )
            
            # Recorded Files Management Section
            with gr.Accordion("🎤 File đã ghi âm", open=False):
                gr.HTML("""
                <div style="
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white;
                    padding: 15px;
                    border-radius: 12px;
                    margin: 15px 0;
                    text-align: center;
                ">
                    <h4 style="margin: 0 0 8px 0;">📁 Quản lý file đã ghi</h4>
                    <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
                        Chọn file từ danh sách để phát lại hoặc dịch thuật
                    </p>
                </div>
                """)
                
                # Refresh button for recorded files
                refresh_files_btn = gr.Button(
                    "🔄 Làm mới danh sách",
                    variant="secondary",
                    size="sm"
                )
                
                # Status display for file operations
                file_operation_status = gr.HTML(
                    value="<p style='text-align: center; color: #666; font-style: italic;'>Chọn file để thực hiện thao tác</p>"
                )
                
                # Dropdown for recorded files  
                initial_files = get_recorded_files()
                print(f"🔍 Initial recorded files: {initial_files}")
                recorded_files_dropdown = gr.Dropdown(
                    choices=initial_files,
                    label="📂 Chọn file đã ghi",
                    info="Các file audio đã được ghi âm trước đó"
                )
                
                # Preview and controls for selected file
                with gr.Row():
                    with gr.Column():
                        # Audio player for selected file
                        recorded_audio_player = gr.Audio(
                            label="🎵 Phát lại file đã chọn",
                            interactive=False,
                            show_label=True,
                            type="numpy"  # Use numpy for better compatibility
                        )
                    
                    with gr.Column():
                        # Action buttons
                        use_for_translation_btn = gr.Button(
                            "🔄 Sử dụng để dịch thuật",
                            variant="primary",
                            size="sm"
                        )
                        
                        with gr.Row():
                            download_recorded_btn = gr.Button(
                                "📥 Tải xuống",
                                variant="secondary", 
                                size="sm"
                            )
                            
                            delete_recorded_btn = gr.Button(
                                "🗑️ Xóa file",
                                variant="stop",
                                size="sm"
                            )
                        
                        # Download link for recorded file
                        download_recorded_file = gr.File(
                            label="📥 File tải xuống",
                            visible=True,
                            file_count="single"
                        )
            
            # Settings section with gradient header
            gr.HTML("""
            <div style="
                background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%);
                color: white;
                padding: 18px;
                border-radius: 12px;
                margin: 25px 0 20px 0;
                text-align: center;
                box-shadow: 0 6px 24px rgba(255,107,107,0.3);
            ">
                <h3 style="margin: 0 0 8px 0;">🌍 Cài đặt dịch thuật</h3>
                <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
                    Chọn ngôn ngữ đích và giọng nói cho kết quả dịch thuật
                </p>
            </div>
            """)
                
            # Separate dropdowns without complex wrappers to avoid CSS conflicts
            target_country_dropdown = gr.Dropdown(
                choices=list(voice_choices_by_country.keys()),
                value="🇻🇳 Việt Nam",
                label="🌍 Chọn quốc gia đích"
            )
            
            target_voice_dropdown = gr.Dropdown(
                choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                label="🎭 Chọn giọng nói đích"
            )
            
            text_format_dropdown = gr.Dropdown(
                choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                value="Markdown (.md)",
                label="📄 Định dạng file văn bản"
            )
            
            # Colorful action button
            gr.HTML("""
            """)
            
            # Auto-translate on audio upload - no manual button needed
            
            # Results section with colorful headers
            gr.HTML("""
            <div style="
                background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%);
                color: white;
                padding: 18px;
                border-radius: 12px;
                margin: 30px 0 20px 0;
                text-align: center;
                box-shadow: 0 6px 24px rgba(69,183,209,0.3);
            ">
                <h3 style="margin: 0 0 8px 0;">📊 Kết quả xử lý</h3>
                <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
                    Phiên âm, dịch thuật và tổng hợp giọng nói
                </p>
            </div>
            """)
            
            # Dynamic status indicator
            status_text = gr.HTML("")
            
            # Card-based layout for mobile
            with gr.Column(elem_classes=["output-area"]):
                # Original content card
                gr.HTML("""
                <div style="
                    background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
                    padding: 15px;
                    border-radius: 12px;
                    margin: 15px 0;
                    border-left: 4px solid #2196F3;
                ">
                    <h4 style="margin: 0 0 10px 0; color: #1976D2;">📝 Nội dung gốc từ audio</h4>
                </div>
                """)
                
                transcription_output = gr.Textbox(
                    label="🎯 Phiên âm từ audio",
                    lines=4,
                    interactive=False,
                    placeholder="Nội dung phiên âm từ file audio sẽ hiển thị ở đây...",
                    elem_classes=["mobile-textbox"]
                )
                
                detected_language = gr.Textbox(
                    label="🌐 Ngôn ngữ được phát hiện",
                    lines=1,
                    interactive=False,
                    placeholder="Tự động nhận diện...",
                    elem_classes=["mobile-textbox"]
                )
                            
                
                # Translation result card
                gr.HTML("""
                <div style="
                    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
                    padding: 15px;
                    border-radius: 12px;
                    margin: 15px 0;
                    border-left: 4px solid #4CAF50;
                ">
                    <h4 style="margin: 0 0 10px 0; color: #388E3C;">✨ Kết quả dịch thuật</h4>
                </div>
                """)
                
                translation_output = gr.Textbox(
                    label="🔄 Nội dung đã dịch",
                    lines=4,
                    interactive=False,
                    placeholder="Bản dịch sẽ hiển thị ở đây...",
                    elem_classes=["mobile-textbox"]
                )
                
                target_language_display = gr.Textbox(
                    label="🎯 Ngôn ngữ đích",
                    lines=1,
                    interactive=False,
                    placeholder="Chưa chọn...",
                    elem_classes=["mobile-textbox"]
                )
                
                # Mobile-friendly comparison section
                with gr.Accordion("🔍 So sánh nội dung", open=False):
                    gr.HTML("""
                    <div style="
                        text-align: center; 
                        margin-bottom: 15px;
                        padding: 10px;
                        background: #f5f5f5;
                        border-radius: 8px;
                    ">
                        <p style="color: #666; font-style: italic; margin: 0;">
                            Xem nội dung gốc và bản dịch để so sánh
                        </p>
                    </div>
                    """)
                    
                    # Stack vertically on mobile for better readability
                    with gr.Column():
                        gr.HTML("""
                        <div style="
                            background: #e3f2fd;
                            padding: 10px;
                            border-radius: 8px;
                            margin: 10px 0;
                            text-align: center;
                            font-weight: bold;
                            color: #1976D2;
                        ">📝 Ngôn ngữ gốc</div>
                        """)
                        original_compare = gr.Textbox(
                            label="",
                            lines=4,
                            interactive=False,
                            show_label=False,
                            placeholder="Nội dung phiên âm từ audio sẽ hiển thị ở đây...",
                            elem_classes=["mobile-compare"]
                        )
                        
                        gr.HTML("""
                        <div style="
                            background: #e8f5e8;
                            padding: 10px;
                            border-radius: 8px;
                            margin: 15px 0 5px 0;
                            text-align: center;
                            font-weight: bold;
                            color: #388E3C;
                        ">✨ Sau khi dịch</div>
                        """)
                        translated_compare = gr.Textbox(
                            label="",
                            lines=4,
                            interactive=False,
                            show_label=False,
                            placeholder="Nội dung sau khi dịch sẽ hiển thị ở đây...",
                            elem_classes=["mobile-compare"]
                        )
                
                # Mobile-optimized download section
                with gr.Accordion("💾 Tải xuống kết quả", open=True):
                    gr.HTML("""
                    <div style="
                        background: linear-gradient(135deg, #fff3e0 0%, #ffcc80 100%);
                        padding: 15px;
                        border-radius: 12px;
                        margin: 15px 0;
                        border-left: 4px solid #FF9800;
                        text-align: center;
                    ">
                        <h4 style="margin: 0 0 10px 0; color: #E65100;">💾 Tải xuống kết quả</h4>
                        <p style="color: #BF360C; margin: 0; font-style: italic;">
                            File audio và văn bản đã dịch
                        </p>
                    </div>
                    """)
                    
                    # Stack downloads vertically for mobile
                    with gr.Column():
                        gr.HTML("""
                        <div style="
                            background: #e3f2fd;
                            padding: 12px;
                            border-radius: 8px;
                            margin: 15px 0 10px 0;
                            text-align: center;
                            font-weight: bold;
                            color: #1976D2;
                        ">🔊 Audio đã dịch</div>
                        """)
                        audio_output_at = gr.Audio(
                            label="🎵 Audio đã dịch",
                            type="filepath",
                            show_label=True,
                            elem_classes=["mobile-audio"],
                            format="wav"  # Specify format explicitly
                        )
                        
                        # Explicit download component for translated audio
                        audio_download_at = gr.File(
                            label="📥 Tải xuống audio đã dịch",
                            file_count="single",
                            file_types=[".wav"],
                            visible=True
                        )
                        
                        gr.HTML("""
                        <div style="
                            background: #e8f5e8;
                            padding: 12px;
                            border-radius: 8px;
                            margin: 25px 0 10px 0;
                            text-align: center;
                            font-weight: bold;
                            color: #388E3C;
                        ">📄 Văn bản đã dịch</div>
                        """)
                        text_output = gr.File(
                            label="",
                            file_count="single",
                            file_types=[".txt", ".docx"],
                            show_label=False,
                            elem_classes=["mobile-file"]
                        )
            
            # Event handlers for Audio Translation with colorful status
            def update_status_processing():
                return """
                <div class="status-processing" style="
                    text-align: center; 
                    margin: 20px 0;
                    padding: 15px;
                    border-radius: 12px;
                    color: white;
                    transition: all 0.3s ease;
                ">
                    <span style="font-weight: bold; font-size: 1.1em;">
                        ⚡ Đang tự động dịch thuật...
                    </span>
                </div>
                """
            
            def update_status_complete():
                return """
                <div class="status-success" style="
                    text-align: center; 
                    margin: 20px 0;
                    padding: 15px;
                    border-radius: 12px;
                    color: white;
                    transition: all 0.3s ease;
                ">
                    <span style="font-weight: bold; font-size: 1.1em;">
                        ✅ Dịch thuật hoàn thành!
                    </span>
                </div>
                """
            
            target_country_dropdown.change(
                fn=update_voices,
                inputs=[target_country_dropdown],
                outputs=[target_voice_dropdown]
            )
            
            # Update target language display when dropdown changes
            target_voice_dropdown.change(
                fn=lambda voice: voice,
                inputs=[target_voice_dropdown],
                outputs=[target_language_display]
            )
            
            # Helper function to extract format
            def get_format_from_dropdown(format_choice):
                if "Markdown" in format_choice:
                    return "md"
                elif "Word" in format_choice:
                    return "docx"
                return "txt"
            
            # Auto-translate when audio is uploaded or changed
            audio_input.change(
                fn=lambda: update_status_processing(),
                outputs=[status_text]
            ).then(
                fn=lambda audio, country, voice, fmt: translate_audio(audio, country, voice, get_format_from_dropdown(fmt)) if audio is not None else ("", "", "📎 Vui lòng tải lên file audio hoặc ghi âm", country, None, "", "", None),
                inputs=[audio_input, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
                outputs=[
                    transcription_output, 
                    detected_language, 
                    translation_output, 
                    target_language_display, 
                    audio_output_at,
                    audio_download_at,
                    original_compare,
                    translated_compare,
                    text_output
                ]
            ).then(
                fn=lambda: update_status_complete(),
                outputs=[status_text]
            ).then(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            # === RECORDED FILES EVENT HANDLERS ===
            
            # Save current recording
            save_recording_btn.click(
                fn=save_current_recording,
                inputs=[audio_input],
                outputs=[recording_status, recorded_files_dropdown]
            )
            
            # New recording (clear audio)
            new_recording_btn.click(
                fn=clear_audio_for_new_recording,
                outputs=[audio_input, recording_status]
            )
            
            refresh_files_btn.click(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            recorded_files_dropdown.change(
                fn=load_recorded_file,
                inputs=[recorded_files_dropdown],
                outputs=[recorded_audio_player]
            )
            
            use_for_translation_btn.click(
                fn=lambda: update_status_processing(),
                outputs=[status_text]
            ).then(
                fn=use_recorded_for_translation,
                inputs=[recorded_files_dropdown, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
                outputs=[
                    transcription_output, 
                    detected_language, 
                    translation_output, 
                    target_language_display, 
                    audio_output_at,
                    audio_download_at,
                    original_compare,
                    translated_compare,
                    text_output
                ]
            ).then(
                fn=lambda: update_status_complete(),
                outputs=[status_text]
            ).then(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            download_recorded_btn.click(
                fn=prepare_recorded_file_download,
                inputs=[recorded_files_dropdown],
                outputs=[download_recorded_file]
            )
            
            delete_recorded_btn.click(
                fn=delete_selected_file,
                inputs=[recorded_files_dropdown],
                outputs=[file_operation_status, recorded_files_dropdown, recorded_audio_player]
            )
    
    # Features section cho Voice RAG
    gr.Markdown("### 📚 Tính năng chính")
    
    with gr.Row():
        with gr.Column():
            gr.HTML("""
            <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
                <h3>📚 Voice RAG</h3>
                <p>Upload tài liệu và đặt câu hỏi. Nhận trả lời bằng giọng nói đa ngôn ngữ.</p>
                <div style="margin-top: 15px;">
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ Hỗ trợ PDF, DOCX, TXT
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ AI Gemini 2.0 Flash
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ 24 giọng nói đa quốc gia
                    </div>
                </div>
            </div>
            """)
        
        with gr.Column():
            gr.HTML("""
            <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
                <h3>🌍 Audio Translation</h3>
                <p>Dịch thuật âm thanh sang nhiều ngôn ngữ với giọng nói tự nhiên.</p>
                <div style="margin-top: 15px;">
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ Ghi âm real-time
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ 13 ngôn ngữ chính
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ Edge TTS Neural
                    </div>
                </div>
            </div>
            """)
    
    with gr.Row():
        with gr.Column():
            gr.HTML("""
            <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
                <h3>🎤 Voice Studio</h3>
                <p>Chuyển văn bản thành giọng nói với nhiều lựa chọn quốc gia và giọng nói.</p>
                <div style="margin-top: 15px;">
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ 13 quốc gia
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ Tích hợp dịch thuật
                    </div>
                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
                        ✓ Điều chỉnh tốc độ
                    </div>
                </div>
            </div>
            """)
    
    
    # Footer
    gr.HTML("""
    <div class="custom-footer">
        <div style="display: flex; justify-content: center; align-items: center; gap: 15px; flex-wrap: wrap;">
            <div style="display: flex; align-items: center; gap: 8px;">
                <div style="background: rgba(255,255,255,0.2); padding: 8px 15px; border-radius: 20px; font-size: 16px;">
                    🧠 DB
                </div>
                <span style="font-size: 18px; font-weight: bold;">Digitized Brains</span>
            </div>
            <div style="font-size: 14px; opacity: 0.9;">
                Voice Studio - AI Powered
            </div>
        </div>
    </div>
    """)
    
    # Add JavaScript for button effects
    gr.HTML(js_code)

if __name__ == "__main__":
    import sys
    import locale
    import os
    
    # Ensure UTF-8 encoding
    if sys.platform == 'win32':
        os.environ['PYTHONIOENCODING'] = 'utf-8'
    
    # Optimize startup for HF Spaces
    print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
    
    # Only create record_data directory when actually needed to speed up startup
    if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
        create_record_data_directory()
        print(f"📁 Record data directory ready: {RECORD_DATA_DIR}")
    else:
        print(f"🏭 Production mode - record_data will be created on first use")
    
    # Set environment variables for iframe support
    os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
    # Disable Gradio temp directory to prevent file serving issues
    # os.environ['GRADIO_TEMP_DIR'] = '/tmp'
    
    # Hugging Face Spaces configuration - Use standard port 7860 for HF
    if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
        # HF Spaces standard configuration
        port = 7860
        print("🏭 Using HF Spaces standard port 7860")
    else:
        # Local development
        port = int(os.environ.get("GRADIO_SERVER_PORT", 7880))
        print(f"🖥️ Using local development port {port}")
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        show_error=True,
        ssr_mode=False,  # Disable SSR to prevent timeout issues on HF Spaces
        enable_monitoring=False  # Disable monitoring for faster startup
    )