Spaces:

ducnguyen1978
/

Voice_Agent

Running

File size: 138,304 Bytes

9a2046b

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys

# Set UTF-8 encoding for Windows
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach())
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.detach())

# Load environment variables from .env file (optimized for HF Spaces)
try:
    # Only load .env in local development, skip in production
    if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
        from dotenv import load_dotenv
        load_dotenv()
        print("✅ Environment variables loaded from .env file")
    else:
        print("🏭 Production environment - using system environment variables")
except ImportError:
    print("⚠️  python-dotenv not installed. Using system environment variables only.")
except Exception as e:
    print(f"⚠️  Error loading .env file: {e}")

# Essential imports for HF Spaces
import numpy as np
import gradio as gr

# Try to import google-generativeai with fallback
try:
    import google.generativeai as genai
    GENAI_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ google-generativeai not available: {e}")
    GENAI_AVAILABLE = False
    genai = None

try:
    from gtts import gTTS, lang
    GTTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ gtts not available: {e}")
    GTTS_AVAILABLE = False

import tempfile
# import soundfile as sf  # Import locally to avoid startup overhead
# Kokoro not used - removed for performance
import time
import base64

# Try to import optional dependencies
try:
    import edge_tts
    EDGE_TTS_AVAILABLE = True
except ImportError as e:
    print(f"⚠️ edge-tts not available: {e}")
    EDGE_TTS_AVAILABLE = False

import asyncio
import io

try:
    import PyPDF2
    PDF_AVAILABLE = True
except ImportError:
    PDF_AVAILABLE = False

try:
    import docx
    DOCX_AVAILABLE = True
except ImportError:
    DOCX_AVAILABLE = False

import shutil
import atexit
import glob
import datetime

# Librosa not used - removed for performance

# === RECORD DATA MANAGEMENT ===
RECORD_DATA_DIR = "record_data"

def create_record_data_directory():
    """Create record_data directory if it doesn't exist"""
    if not os.path.exists(RECORD_DATA_DIR):
        os.makedirs(RECORD_DATA_DIR)
        print(f"✅ Created directory: {RECORD_DATA_DIR}")
    return RECORD_DATA_DIR

def cleanup_record_data():
    """Clean up record_data directory when app closes (disabled for production)"""
    try:
        # Disable cleanup for HF Spaces and production environments
        if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
            print(f"🏭 Production environment detected - keeping {RECORD_DATA_DIR} directory")
            return
            
        # Only cleanup in local development
        if os.path.exists(RECORD_DATA_DIR):
            shutil.rmtree(RECORD_DATA_DIR)
            print(f"🧹 Cleaned up {RECORD_DATA_DIR} directory")
    except Exception as e:
        print(f"⚠️ Error cleaning up {RECORD_DATA_DIR}: {e}")

def save_recorded_audio(audio_data, original_filename=None):
    """Save audio data to record_data directory"""
    try:
        # Create directory if needed
        create_record_data_directory()
        
        # Generate filename with timestamp
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if original_filename:
            name_part = os.path.splitext(os.path.basename(original_filename))[0]
            filename = f"recorded_{name_part}_{timestamp}.wav"
        else:
            filename = f"recorded_{timestamp}.wav"
        
        filepath = os.path.join(RECORD_DATA_DIR, filename)
        
        # Handle different audio data types
        if isinstance(audio_data, str) and os.path.exists(audio_data):
            # File path - copy the file
            shutil.copy2(audio_data, filepath)
        elif isinstance(audio_data, tuple) and len(audio_data) == 2:
            # Numpy array format (sample_rate, audio_array)
            sample_rate, audio_array = audio_data
            import soundfile as sf
            sf.write(filepath, audio_array, sample_rate)
            print(f"📊 Saved numpy audio: sr={sample_rate}, shape={audio_array.shape}")
        else:
            # Raw data
            with open(filepath, 'wb') as f:
                f.write(audio_data)
        
        print(f"✅ Saved recorded audio: {filepath}")
        return filepath
        
    except Exception as e:
        print(f"❌ Error saving recorded audio: {e}")
        import traceback
        traceback.print_exc()
        return None

def get_recorded_files():
    """Get list of recorded audio files"""
    try:
        if not os.path.exists(RECORD_DATA_DIR):
            print(f"📁 Record directory does not exist: {RECORD_DATA_DIR}")
            return []
        
        # Get all audio files in record_data
        pattern = os.path.join(RECORD_DATA_DIR, "*.wav")
        files = glob.glob(pattern)
        print(f"🔍 Found {len(files)} files in {RECORD_DATA_DIR}")
        
        # Sort by modification time (newest first)
        files.sort(key=os.path.getmtime, reverse=True)
        
        # Return just filenames for display
        filenames = [os.path.basename(f) for f in files]
        print(f"📂 Returning filenames: {filenames}")
        return filenames
        
    except Exception as e:
        print(f"❌ Error getting recorded files: {e}")
        return []

def get_recorded_file_path(filename):
    """Get full path of recorded file"""
    return os.path.join(RECORD_DATA_DIR, filename)


def delete_recorded_file(filename):
    """Delete recorded file from record_data directory"""
    try:
        if not filename or not filename.strip():
            return "❌ Không có file nào được chọn để xóa"
            
        file_path = get_recorded_file_path(filename)
        print(f"🗑️ Attempting to delete: {file_path}")
        
        if os.path.exists(file_path):
            os.remove(file_path)
            print(f"✅ Successfully deleted: {filename}")
            return f"✅ Đã xóa file: {filename}"
        else:
            print(f"❌ File not found: {file_path}")
            return f"❌ Không tìm thấy file: {filename}"
            
    except Exception as e:
        print(f"❌ Error deleting file: {e}")
        return f"❌ Lỗi khi xóa file: {str(e)}"

# Register cleanup function to run when app exits (disabled for stability)
# atexit.register(cleanup_record_data)  # Disabled to prevent data loss on deployment

# DOCX support already checked above

# Configure Gemini API - Delayed configuration for faster startup
GEMINI_API_KEY = None

def configure_gemini_api():
    """Configure Gemini API on first use to speed up startup"""
    global GEMINI_API_KEY
    if not GENAI_AVAILABLE:
        print("❌ google-generativeai not available")
        return None
        
    if GEMINI_API_KEY is None:
        GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
        if GEMINI_API_KEY:
            genai.configure(api_key=GEMINI_API_KEY)
            print("✅ Gemini API configured successfully")
        else:
            print("⚠️  GEMINI_API_KEY or GOOGLE_API_KEY not found in environment variables")
    return GEMINI_API_KEY

# Language configurations for Audio Translation (simplified)
if GTTS_AVAILABLE:
    GTTS_LANGUAGES = lang.tts_langs()
    GTTS_LANGUAGES['ja'] = 'Japanese'
else:
    GTTS_LANGUAGES = {'en': 'English', 'vi': 'Vietnamese'}

SUPPORTED_LANGUAGES = sorted(list(GTTS_LANGUAGES.values()))

# Voice mapping for Edge TTS - defined once for performance
VOICE_MAP = {
    "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "vi-VN-HoaiMyNeural",
    "🇻🇳 NamMinh - Nam Việt Chuẩn": "vi-VN-NamMinhNeural",
    "🇺🇸 Aria - Nữ Mỹ": "en-US-AriaNeural",
    "🇺🇸 Guy - Nam Mỹ": "en-US-GuyNeural",
    "🇬🇧 Sonia - Nữ Anh": "en-GB-SoniaNeural",
    "🇬🇧 Ryan - Nam Anh": "en-GB-RyanNeural",
    "🇩🇪 Katja - Deutsche Frau": "de-DE-KatjaNeural",
    "🇩🇪 Conrad - Deutscher Mann": "de-DE-ConradNeural",
    "🇫🇷 Denise - Française": "fr-FR-DeniseNeural",
    "🇫🇷 Henri - Français": "fr-FR-HenriNeural",
    "🇪🇸 Elvira - Española": "es-ES-ElviraNeural",
    "🇪🇸 Alvaro - Español": "es-ES-AlvaroNeural",
    "🇮🇹 Elsa - Italiana": "it-IT-ElsaNeural",
    "🇮🇹 Diego - Italiano": "it-IT-DiegoNeural",
    "🇯🇵 Nanami - 日本女性": "ja-JP-NanamiNeural",
    "🇯🇵 Keita - 日本男性": "ja-JP-KeitaNeural",
    "🇰🇷 SunHi - 한국 여성": "ko-KR-SunHiNeural",
    "🇰🇷 BongJin - 한국 남성": "ko-KR-BongJinNeural",
    "🇨🇳 Xiaoxiao - 中文女声": "zh-CN-XiaoxiaoNeural",
    "🇨🇳 Yunxi - 中文男声": "zh-CN-YunxiNeural",
    "🇷🇺 Svetlana - Русская": "ru-RU-SvetlanaNeural",
    "🇷🇺 Dmitry - Русский": "ru-RU-DmitryNeural",
    "🇵🇹 Francisca - Portuguesa": "pt-BR-FranciscaNeural",
    "🇵🇹 Antonio - Português": "pt-BR-AntonioNeural",
    "🇸🇦 Zariyah - عربية": "ar-SA-ZariyahNeural",
    "🇸🇦 Hamed - عربي": "ar-SA-HamedNeural"
}

# Voice RAG Functions (Tích hợp từ hf_Voice_Audio_Translation)
def read_pdf(file_path):
    """Extract text from PDF file"""
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

def read_docx(file_path):
    """Extract text from Word document"""
    try:
        if not DOCX_AVAILABLE:
            return "❌ python-docx not available"
        doc = docx.Document(file_path)
        text = ""
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
        return text
    except Exception as e:
        return f"Error reading DOCX: {str(e)}"

def read_txt(file_path):
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        return f"Error reading TXT: {str(e)}"

def extract_text_from_file(file_path):
    """Extract text from various file formats"""
    if file_path is None:
        return "No file uploaded"
    
    file_extension = os.path.splitext(file_path)[1].lower()
    
    if file_extension == '.pdf':
        return read_pdf(file_path)
    elif file_extension == '.docx':
        return read_docx(file_path)
    elif file_extension == '.txt':
        return read_txt(file_path)
    else:
        return f"Unsupported file format: {file_extension}"

def detect_language_from_text(text):
    """Detect language from text content"""
    # Vietnamese detection
    vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
    if any(char in text.lower() for char in vietnamese_chars):
        return "Vietnamese"
    
    # Chinese detection
    chinese_chars = '中文汉字學習语言'
    if any(char in text for char in chinese_chars):
        return "Chinese"
    
    # Japanese detection
    japanese_chars = 'ひらがなカタカナ日本語'
    if any(char in text for char in japanese_chars):
        return "Japanese"
    
    # Korean detection
    korean_chars = '한국어문자'
    if any(char in text for char in korean_chars):
        return "Korean"
    
    # French detection
    french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que']
    french_chars = 'àâäéèêëïîôöùûüÿç'
    if any(word in text.lower() for word in french_words) or any(char in text.lower() for char in french_chars):
        return "French"
    
    # German detection
    german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden']
    german_chars = 'äöüß'
    if any(word in text.lower() for word in german_words) or any(char in text.lower() for char in german_chars):
        return "German"
    
    # Spanish detection
    spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo']
    spanish_chars = 'ñáéíóúü'
    if any(word in text.lower() for word in spanish_words) or any(char in text.lower() for char in spanish_chars):
        return "Spanish"
    
    # English detection (default)
    english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could']
    if any(word in text.lower() for word in english_words):
        return "English"
    
    return "English"  # Default fallback

def process_with_gemini(text, question, answer_language="Vietnamese"):
    """Process text and question using Gemini with multi-language support"""
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"
            
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        # Detect document language
        detected_doc_language = detect_language_from_text(text)
        
        prompt = f"""

        Based on the following document content, please answer the question in {answer_language}:

        

        Document Content (detected language: {detected_doc_language}):

        {text}

        

        Question: {question}

        

        Please provide a comprehensive and accurate answer in {answer_language}. 

        If the document is in a different language than the question, please still answer in {answer_language}.

        Maintain the factual accuracy while adapting cultural context appropriately.

        """
        
        response = model.generate_content(prompt)
        return response.text
        
    except Exception as e:
        return f"Error processing with Gemini: {str(e)}"

def text_to_speech_rag(text, voice_selection):
    """Convert text to speech using Edge TTS for RAG results"""
    try:
        if not text or text.startswith("Error"):
            return None
        
        # Use global VOICE_MAP for performance
        voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        text_limited = text[:2000] if len(text) > 2000 else text
        
        # Generate speech using Edge TTS
        audio_data = asyncio.run(generate_speech(text_limited, voice_name, 0.0))
        
        # Save to temporary file
        fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="voice_rag_audio_")
        os.close(fd)
        
        # Write raw audio data to temporary file
        with open(temp_output_path, 'wb') as f:
            f.write(audio_data)
        
        return temp_output_path
        
    except Exception as e:
        print(f"TTS Error: {str(e)}")
        return None

def voice_rag_pipeline(uploaded_file, question, answer_language="Vietnamese", voice_selection="🇻🇳 HoaiMy - Nữ Việt Chuẩn", text_format="txt"):
    """Complete Voice RAG pipeline with multi-language support and downloadable text"""
    if uploaded_file is None:
        return "Please upload a document first.", "N/A", None, None
    
    if not question.strip():
        return "Please enter a question.", "N/A", None, None
    
    # Extract text from uploaded file
    extracted_text = extract_text_from_file(uploaded_file)
    
    if extracted_text.startswith("Error"):
        return extracted_text, "Error", None, None
    
    # Detect document language
    detected_doc_language = detect_language_from_text(extracted_text)
    
    # Process with Gemini using selected answer language
    answer = process_with_gemini(extracted_text, question, answer_language)
    
    # Generate speech using selected voice
    audio_file = text_to_speech_rag(answer, voice_selection)
    
    # Create formatted content for download
    if text_format.lower() == "md":
        # Create beautiful Markdown format
        formatted_content = format_voice_rag_response(
            question, answer, detected_doc_language, voice_selection
        )
        text_file_path = create_text_file(formatted_content, "md", "voice_rag_response")
    else:
        # Create standard text file
        text_file_path = create_text_file(answer, text_format, "voice_rag_answer")
    
    return answer, detected_doc_language, audio_file, text_file_path

def detect_language(text):
    """Detect language of input text with improved accuracy"""
    if not text.strip():
        return "unknown"
    
    text_lower = text.lower()
    
    # Vietnamese detection (more comprehensive)
    vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
    vietnamese_words = ['và', 'của', 'là', 'có', 'này', 'được', 'cho', 'từ', 'một', 'những', 'tôi', 'bạn']
    vietnamese_score = sum(1 for char in text if char in vietnamese_chars) + sum(2 for word in vietnamese_words if word in text_lower)
    
    # English detection (more comprehensive)
    english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could', 'that', 'this', 'with', 'for', 'you', 'he', 'she', 'it', 'they', 'we']
    english_score = sum(1 for word in english_words if word in text_lower)
    
    # German detection
    german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden', 'mit', 'auf', 'für', 'von']
    german_chars = 'äöüß'
    german_score = sum(1 for word in german_words if word in text_lower) + sum(1 for char in text if char in german_chars)
    
    # French detection
    french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que', 'avec', 'pour', 'dans']
    french_chars = 'àâäéèêëïîôöùûüÿç'
    french_score = sum(1 for word in french_words if word in text_lower) + sum(0.5 for char in text if char in french_chars)
    
    # Spanish detection
    spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'con', 'para']
    spanish_chars = 'ñáéíóúü'
    spanish_score = sum(1 for word in spanish_words if word in text_lower) + sum(0.5 for char in text if char in spanish_chars)
    
    # Score-based detection
    scores = {
        'Vietnamese': vietnamese_score,
        'English': english_score,
        'German': german_score,
        'French': french_score,
        'Spanish': spanish_score
    }
    
    # Find the language with highest score
    max_score = max(scores.values())
    if max_score > 0:
        detected = max(scores, key=scores.get)
        print(f"🔍 Language detection scores: {scores}")
        print(f"🎯 Detected language: {detected} (score: {max_score})")
        return detected
    
    # Default fallback
    print(f"⚠️ Could not detect language, defaulting to English")
    return "English"

async def generate_speech(text, voice_name, rate):
    """Generate speech using Edge TTS"""
    communicate = edge_tts.Communicate(text, voice_name, rate=f"{rate:+.0%}")
    
    # Create in-memory buffer
    audio_buffer = io.BytesIO()
    
    async for chunk in communicate.stream():
        if chunk["type"] == "audio":
            audio_buffer.write(chunk["data"])
    
    audio_buffer.seek(0)
    return audio_buffer.getvalue()

def create_text_file(content, file_format="txt", filename_prefix="translated_text"):
    """

    Create a downloadable text file from content in TXT, DOCX, or MD format

    """
    if not content or content.startswith("Lỗi:") or content.startswith("❌"):
        return None
    
    try:
        if file_format.lower() == "docx" and DOCX_AVAILABLE:
            # Create Word document
            fd, temp_file_path = tempfile.mkstemp(suffix=".docx", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            if not DOCX_AVAILABLE:
                return None
            from docx import Document
            doc = Document()
            doc.add_heading('Nội dung đã dịch', 0)
            doc.add_paragraph(content)
            doc.save(temp_file_path)
            
            return temp_file_path
        elif file_format.lower() == "md":
            # Create Markdown file
            fd, temp_file_path = tempfile.mkstemp(suffix=".md", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            with open(temp_file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return temp_file_path
        else:
            # Create TXT file (default)
            fd, temp_file_path = tempfile.mkstemp(suffix=".txt", prefix=f"{filename_prefix}_")
            os.close(fd)
            
            with open(temp_file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return temp_file_path
    except Exception as e:
        return None

def format_voice_rag_response(question, answer, detected_language, voice_selection, timestamp=None):
    """

    Format Voice RAG response as beautiful Markdown

    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the answer
    formatted_answer = answer.strip()
    
    # Create beautiful Markdown document
    markdown_content = f"""# 📚 Voice RAG - Intelligent Document Q&A



---



## 📄 **Session Information**



| **Field** | **Details** |

|-----------|-------------|

| 🕒 **Timestamp** | {timestamp} |

| 🌍 **Document Language** | {detected_language} |

| 🎭 **Voice Selection** | {voice_selection} |

| 🤖 **AI Model** | Google Gemini 2.0 Flash |



---



## ❓ **Question**



> {question}



---



## 💬 **AI Response**



{formatted_answer}



---





---



## 📱 **Generated by**



**🎙️ Voice AI Platform** - Digitized Brains  

*Powered by Claude Code & Google Gemini 2.0 Flash*



> 🌐 **Voice RAG Technology** - Combining document intelligence with premium voice synthesis



---



*Generated on {timestamp} | Voice: {voice_selection} | Language: {detected_language}*

"""
    
    return markdown_content

def format_voice_studio_response(text, voice_selection, speed, detected_language="Auto-detected", timestamp=None):
    """

    Format Voice Studio response as simple Markdown

    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the text
    formatted_text = text.strip()
    
    # Create simple Markdown document
    markdown_content = f"""# Voice Studio Result



## Input Text ({detected_language})



{formatted_text}



---



*Generated on {timestamp} | Voice: {voice_selection} | Speed: {speed:.1f}x*

"""
    
    return markdown_content

def format_audio_translation_response(original_text, translated_text, source_language, target_language, voice_selection, timestamp=None):
    """

    Format Audio Translation response as simple Markdown

    """
    if timestamp is None:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    
    # Clean and format the texts
    formatted_original = original_text.strip()
    formatted_translated = translated_text.strip()
    
    # Create simple Markdown document
    markdown_content = f"""# Audio Translation Result



## Original Text ({source_language})



{formatted_original}



## Translated Text ({target_language})



{formatted_translated}



---



*Generated on {timestamp} | {source_language} → {target_language} | Voice: {voice_selection}*

"""
    
    return markdown_content

def create_audio_voice_studio(text, voice_selection, speed, text_format="txt"):
    """Voice Studio functionality with text file generation"""
    if not text.strip():
        return "❌ Vui lòng nhập văn bản / Please enter text / Bitte Text eingeben", None
    
    try:
        # Use global VOICE_MAP for performance (avoiding recreation on each call)
        voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        text_limited = text[:1000] if len(text) > 1000 else text
        
        # Convert speed (0.5-2.0) to rate percentage (-50% to +100%)
        rate_percent = (speed - 1.0)
        
        # Generate speech using Edge TTS
        audio_data = asyncio.run(generate_speech(text_limited, voice_name, rate_percent))
        
        # Convert to base64
        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
        
        timestamp = int(time.time())
        filename = f"voice_{voice_name}_{speed}x_{timestamp}.mp3"
        
        # Detect language
        detected_lang = detect_language(text_limited)
        
        # Mobile-optimized HTML player
        html_player = f'''

            <div style="

                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

                border-radius: 20px; 

                padding: 20px; 

                margin: 10px 0; 

                box-shadow: 0 8px 32px rgba(0,0,0,0.2);

                color: white;

                text-align: center;

            ">

                <div style="margin-bottom: 20px;">

                    <h3 style="color: #fff; margin: 0 0 15px 0; font-size: 1.3em; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">

                        🎵 Âm thanh hoàn thành!

                    </h3>

                    <div style="

                        background: rgba(255,255,255,0.2); 

                        border-radius: 12px; 

                        padding: 12px; 

                        font-size: 0.9em; 

                        line-height: 1.5;

                        backdrop-filter: blur(10px);

                    ">

                        <div><strong>🎭 Giọng:</strong> {voice_selection}</div>

                        <div><strong>⚡ Tốc độ:</strong> {speed:.1f}x | <strong>🌍 Ngôn ngữ:</strong> {detected_lang.title()}</div>

                        <div><strong>📝 Độ dài:</strong> {len(text_limited)} ký tự</div>

                    </div>

                </div>

                

                <audio controls style="

                    width: 100%; 

                    max-width: 100%;

                    height: 50px;

                    margin: 20px 0; 

                    border-radius: 25px;

                    background: rgba(255,255,255,0.95);

                    box-shadow: 0 4px 15px rgba(0,0,0,0.2);

                ">

                    <source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg">

                    Trình duyệt không hỗ trợ audio.

                </audio>

                

                <div style="

                    display: flex; 

                    justify-content: center;

                    margin-top: 20px;

                ">

                    <a href="data:audio/mpeg;base64,{audio_base64}" download="{filename}"

                       style="

                           background: linear-gradient(45deg, #28a745, #20c997);

                           color: white; 

                           padding: 15px 30px; 

                           text-decoration: none;

                           border-radius: 25px; 

                           font-weight: 700; 

                           font-size: 1.1em;

                           display: flex;

                           align-items: center;

                           justify-content: center;

                           box-shadow: 0 4px 15px rgba(40,167,69,0.3);

                           transition: all 0.3s ease;

                           min-height: 48px;

                           min-width: 200px;

                       "

                       ontouchstart=""

                       onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 20px rgba(40,167,69,0.4)'"

                       onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 15px rgba(40,167,69,0.3)'">

                        📥 TẢI XUỐNG MP3

                    </a>

                </div>

            </div>

            '''
        
        # Create text file based on format
        text_file_path = None
        if text_format == "md":
            # Use Markdown formatting function
            detected_language = detect_language(text_limited)
            markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
            text_file_path = create_text_file(markdown_content, "md", "voice_studio")
        elif text_format == "docx":
            # Create Word document with Voice Studio formatting
            detected_language = detect_language(text_limited)
            markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
            text_file_path = create_text_file(markdown_content, "docx", "voice_studio")
        elif text_format == "txt":
            # Create simple text file
            text_file_path = create_text_file(text_limited, "txt", "voice_studio")
            
        return html_player, text_file_path
            
    except Exception as e:
        return f"❌ Error: {str(e)}", None

# Language mapping for voices - defined once for performance
VOICE_TO_LANGUAGE = {
    # Vietnamese
    "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "Vietnamese",
    "🇻🇳 NamMinh - Nam Việt Chuẩn": "Vietnamese",
    # English
    "🇺🇸 Aria - Nữ Mỹ": "English",
    "🇺🇸 Guy - Nam Mỹ": "English", 
    "🇬🇧 Sonia - Nữ Anh": "English",
    "🇬🇧 Ryan - Nam Anh": "English",
    # German
    "🇩🇪 Katja - Deutsche Frau": "German",
    "🇩🇪 Conrad - Deutscher Mann": "German",
    # French
    "🇫🇷 Denise - Française": "French",
    "🇫🇷 Henri - Français": "French",
    # Spanish
    "🇪🇸 Elvira - Española": "Spanish",
    "🇪🇸 Alvaro - Español": "Spanish",
    # Italian
    "🇮🇹 Elsa - Italiana": "Italian",
    "🇮🇹 Diego - Italiano": "Italian",
    # Japanese
    "🇯🇵 Nanami - 日本女性": "Japanese",
    "🇯🇵 Keita - 日本男性": "Japanese",
    # Korean
    "🇰🇷 SunHi - 한국 여성": "Korean",
    "🇰🇷 BongJin - 한국 남성": "Korean",
    # Chinese
    "🇨🇳 Xiaoxiao - 中文女声": "Chinese",
    "🇨🇳 Yunxi - 中文男声": "Chinese",
    # Russian
    "🇷🇺 Svetlana - Русская": "Russian",
    "🇷🇺 Dmitry - Русский": "Russian",
    # Portuguese
    "🇵🇹 Francisca - Portuguesa": "Portuguese",
    "🇵🇹 Antonio - Português": "Portuguese",
    # Arabic
    "🇸🇦 Zariyah - عربية": "Arabic",
    "🇸🇦 Hamed - عربي": "Arabic"
}

def get_target_language_from_voice(voice_selection):
    """Map voice selection to target language for translation"""
    return VOICE_TO_LANGUAGE.get(voice_selection, "Vietnamese")

def translate_text_with_gemini(text, target_language):
    """Translate text using Gemini API"""
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return f"❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"
            
        if not text.strip():
            return ""
            
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        prompt = f"""Translate the following text to {target_language}. Return ONLY the translated text, nothing else:



{text}"""

        response = model.generate_content(prompt)
        translated_text = response.text.strip()
        
        # Clean up any unwanted text that might be included
        if translated_text.lower().startswith("translation:"):
            translated_text = translated_text[12:].strip()
        if translated_text.lower().startswith("here is"):
            lines = translated_text.split('\n')
            if len(lines) > 1:
                translated_text = '\n'.join(lines[1:]).strip()
        
        return translated_text
        
    except Exception as e:
        return f"Lỗi dịch thuật: {str(e)}"

def translate_audio(audio_file, target_country, voice_selection, text_format="txt"):
    """

    Transcribe, translate and synthesize audio to target language with Voice Studio integration

    """
    try:
        api_key = configure_gemini_api()
        if not api_key:
            return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables", "Không xác định", "", target_country, None, None, "", "", None
            
        if audio_file is None:
            return "Lỗi: Vui lòng tải lên file audio", "Không xác định", "", target_country, None, None, "", "", None
        
        # Save recorded audio to record_data directory
        print(f"🔍 Processing audio file type: {type(audio_file)}")
        saved_audio_path = save_recorded_audio(audio_file)
        if saved_audio_path:
            print(f"🎤 Audio saved to record_data: {os.path.basename(saved_audio_path)}")
            # Debug: check if file really exists
            if os.path.exists(saved_audio_path):
                file_size = os.path.getsize(saved_audio_path)
                print(f"✅ File confirmed: {saved_audio_path} ({file_size} bytes)")
            else:
                print(f"❌ File not found after save: {saved_audio_path}")
                return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None
        else:
            print("❌ Failed to save audio file")
            return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None
            
        # Get target language from voice selection
        target_language = get_target_language_from_voice(voice_selection)
            
        # Transcribe audio using Gemini
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        # Read audio file using saved path
        with open(saved_audio_path, 'rb') as f:
            audio_data = f.read()
        
        # Create audio blob
        audio_blob = {
            'mime_type': 'audio/wav',
            'data': audio_data
        }

        # Step 1: Transcribe audio only first
        transcribe_prompt = """Transcribe this audio accurately in its original language. Return only the transcribed text, nothing else."""
        
        response = model.generate_content([transcribe_prompt, audio_blob])
        transcription = response.text.strip()
        
        # Step 2: Detect language of transcription
        detected_lang = detect_language(transcription)
        
        # Step 3: Translate if needed (only if source is different from target)
        if detected_lang.lower() != target_language.lower():
            print(f"🔄 Translating from {detected_lang} to {target_language}")
            translated_text = translate_text_with_gemini(transcription, target_language)
            
            # Check if translation was successful
            if translated_text.startswith("❌") or translated_text.startswith("Lỗi"):
                print(f"❌ Translation failed: {translated_text}")
                # Use original transcription if translation fails
                translated_text = transcription
            else:
                print(f"✅ Translation successful")
        else:
            print(f"ℹ️ No translation needed - same language ({detected_lang})")
            translated_text = transcription
        
        # Generate audio using Edge TTS (use global VOICE_MAP for performance)
        edge_voice = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
        print(f"🎙️ Generating audio with voice: {edge_voice}")
        audio_data = asyncio.run(generate_speech(translated_text, edge_voice, 0.0))
        print(f"🎵 Generated audio data: {len(audio_data)} bytes")
        
        # Save audio file
        fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="translated_audio_")
        os.close(fd)
        print(f"📁 Created temp audio file: {temp_output_path}")
        
        # Write raw audio data to temporary file
        with open(temp_output_path, 'wb') as f:
            f.write(audio_data)
        
        # Verify file was created
        if os.path.exists(temp_output_path):
            file_size = os.path.getsize(temp_output_path)
            print(f"✅ Audio file created successfully: {file_size} bytes")
        else:
            print(f"❌ Failed to create audio file: {temp_output_path}")
        
        # Create text file for download with proper formatting
        text_file_path = None
        if text_format == "md":
            # Use Markdown formatting function for Audio Translation
            markdown_content = format_audio_translation_response(
                transcription, translated_text, detected_lang, target_language, voice_selection
            )
            text_file_path = create_text_file(markdown_content, "md", "audio_translation")
        elif text_format == "docx":
            # Create Word document with Audio Translation formatting
            markdown_content = format_audio_translation_response(
                transcription, translated_text, detected_lang, target_language, voice_selection
            )
            text_file_path = create_text_file(markdown_content, "docx", "audio_translation")
        else:
            # Create simple text file
            text_file_path = create_text_file(translated_text, "txt", "audio_translation")
        
        return transcription, detected_lang, translated_text, target_language, temp_output_path, temp_output_path, transcription, translated_text, text_file_path
        
    except Exception as e:
        # Get target language for error response
        target_language = get_target_language_from_voice(voice_selection) if 'voice_selection' in locals() else "Vietnamese"
        return f"Lỗi: {str(e)}", "Lỗi", "", target_language, None, None, "", "", None

# Voice choices organized by country - ONLY OFFICIAL VOICES  
voice_choices_by_country = {
    "🇻🇳 Việt Nam": [
        "🇻🇳 HoaiMy - Nữ Việt Chuẩn",
        "🇻🇳 NamMinh - Nam Việt Chuẩn"
    ],
    "🇺🇸 Hoa Kỳ": [
        "🇺🇸 Aria - Nữ Mỹ",
        "🇺🇸 Guy - Nam Mỹ"
    ],
    "🇬🇧 Anh": [
        "🇬🇧 Sonia - Nữ Anh",
        "🇬🇧 Ryan - Nam Anh"
    ],
    "🇩🇪 Đức": [
        "🇩🇪 Katja - Deutsche Frau",
        "🇩🇪 Conrad - Deutscher Mann"
    ],
    "🇫🇷 Pháp": [
        "🇫🇷 Denise - Française",
        "🇫🇷 Henri - Français"
    ],
    "🇪🇸 Tây Ban Nha": [
        "🇪🇸 Elvira - Española",
        "🇪🇸 Alvaro - Español"
    ],
    "🇮🇹 Ý": [
        "🇮🇹 Elsa - Italiana",
        "🇮🇹 Diego - Italiano"
    ],
    "🇯🇵 Nhật Bản": [
        "🇯🇵 Nanami - 日本女性",
        "🇯🇵 Keita - 日本男性"
    ],
    "🇰🇷 Hàn Quốc": [
        "🇰🇷 SunHi - 한국 여성",
        "🇰🇷 BongJin - 한국 남성"
    ],
    "🇨🇳 Trung Quốc": [
        "🇨🇳 Xiaoxiao - 中文女声",
        "🇨🇳 Yunxi - 中文男声"
    ],
    "🇷🇺 Nga": [
        "🇷🇺 Svetlana - Русская",
        "🇷🇺 Dmitry - Русский"
    ],
    "🇵🇹 Bồ Đào Nha": [
        "🇵🇹 Francisca - Portuguesa",
        "🇵🇹 Antonio - Português"
    ],
    "🇸🇦 Ả Rập": [
        "🇸🇦 Zariyah - عربية",
        "🇸🇦 Hamed - عربي"
    ]
}

def update_voices(country):
    """Update voice choices based on selected country"""
    if country in voice_choices_by_country:
        voices = voice_choices_by_country[country]
        return gr.Dropdown(choices=voices, value=voices[0])
    else:
        # Default to Vietnamese voices
        default_voices = voice_choices_by_country["🇻🇳 Việt Nam"]
        return gr.Dropdown(choices=default_voices, value=default_voices[0])

# Lightweight CSS - optimized for performance  
css = """

* {

    font-family: system-ui, -apple-system, 'Segoe UI', Arial, sans-serif;

}



.gradio-container {

    max-width: 1200px;

    margin: 0 auto;

    position: relative;

}



/* Critical fix for dropdown interaction */

.gradio-container * {

    pointer-events: auto;

}



/* Hide Gradio footer */

.footer {

    display: none !important;

}



/* Pulsing animation for processing status */

@keyframes pulse-processing {

    0% {

        opacity: 1;

        transform: scale(1);

        box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);

    }

    50% {

        opacity: 0.8;

        transform: scale(1.02);

        box-shadow: 0 6px 25px rgba(255, 193, 7, 0.6);

    }

    100% {

        opacity: 1;

        transform: scale(1);

        box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);

    }

}



.status-processing {

    animation: pulse-processing 1.5s ease-in-out infinite;

    background: linear-gradient(135deg, #FFC107 0%, #FF9800 100%) !important;

}



/* Success status animation */

@keyframes pulse-success {

    0% {

        opacity: 1;

        transform: scale(1);

    }

    50% {

        opacity: 0.9;

        transform: scale(1.01);

    }

    100% {

        opacity: 1;

        transform: scale(1);

    }

}



.status-success {

    animation: pulse-success 2s ease-in-out 3;

    background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;

}



/* Custom footer to cover Gradio attribution */

.custom-footer {

    position: fixed;

    bottom: 0;

    left: 0;

    right: 0;

    background: linear-gradient(135deg, #4A90E2 0%, #2E86AB 70%, #FF8A65 85%, #FF6B9D 100%);

    color: white;

    padding: 15px;

    text-align: center;

    font-weight: bold;

    z-index: 1000;

    box-shadow: 0 -2px 10px rgba(0,0,0,0.1);

}



/* Add padding to body to account for fixed footer */

body {

    padding-bottom: 60px;

}



/* Mobile-first responsive design */

.input-card {

    background: rgba(255,255,255,0.95);

    border-radius: 16px;

    padding: 16px;

    margin: 10px 0;

    box-shadow: 0 4px 20px rgba(0,0,0,0.1);

    backdrop-filter: blur(10px);

}



.output-area {

    background: rgba(255,255,255,0.95);

    border-radius: 16px;

    padding: 16px;

    margin: 15px 0;

    min-height: 200px;

    box-shadow: 0 4px 20px rgba(0,0,0,0.1);

}



.examples-section {

    background: rgba(255,255,255,0.9);

    border-radius: 16px;

    padding: 16px;

    margin: 20px 0;

}



.main-header {

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

    color: white;

    padding: 20px;

    border-radius: 10px;

    margin-bottom: 20px;

    text-align: center;

}



.feature-box {

    background: #f8f9fa;

    padding: 15px;

    border-radius: 8px;

    margin: 10px 0;

    border-left: 4px solid #667eea;

}



.status-indicator {

    display: inline-block;

    padding: 5px 10px;

    border-radius: 15px;

    font-size: 12px;

    font-weight: bold;

    margin: 5px;

}



.status-success {

    background-color: #d4edda;

    color: #155724;

}



.status-processing {

    background-color: #fff3cd;

    color: #856404;

}



.comparison-section {

    border: 1px solid #e0e0e0;

    border-radius: 8px;

    padding: 15px;

    margin: 10px 0;

    background: #fafafa;

}



.language-label {

    font-weight: bold;

    color: #667eea;

    padding: 5px 10px;

    background: #f0f2ff;

    border-radius: 15px;

    display: inline-block;

    margin-bottom: 10px;

    font-size: 14px;

}



.content-compare {

    background: white;

    border: 1px solid #ddd;

    border-radius: 6px;

    padding: 12px;

    min-height: 120px;

    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;

    line-height: 1.5;

}



/* Reset any problematic dropdown styles */

.gradio-container * {

    pointer-events: auto;

}



/* Remove any potential blocking overlays */

.gradio-container::before,

.gradio-container::after {

    display: none;

}



/* Ensure all interactive elements work */

button, select, input, textarea, .gr-dropdown {

    pointer-events: auto !important;

    position: relative !important;

}



/* Simple dropdown fix without complex selectors */

[class*="dropdown"] {

    position: relative !important;

    z-index: 999 !important;

}



[class*="dropdown"] * {

    pointer-events: auto !important;

}



/* Make sure no overlay blocks clicks */

.gradio-container .gr-form {

    position: relative;

    z-index: 1;

}



.gradio-container .gr-block {

    position: relative;

    z-index: 1;

}



.mobile-button {

    width: 100% !important;

    padding: 15px !important;

    font-size: 1.1em !important;

    margin: 20px 0 !important;

    border-radius: 12px !important;

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;

    border: none !important;

    color: white !important;

    font-weight: bold !important;

    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;

    transition: all 0.3s ease !important;

    cursor: pointer !important;

    position: relative !important;

    overflow: hidden !important;

}



.mobile-button:hover {

    transform: translateY(-2px) !important;

    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important;

    background: linear-gradient(135deg, #5a6fd8 0%, #6b4190 100%) !important;

}



.mobile-button:active {

    transform: translateY(0px) !important;

    box-shadow: 0 2px 10px rgba(102, 126, 234, 0.3) !important;

}



/* Ripple effect for button */

.mobile-button::before {

    content: '';

    position: absolute;

    top: 50%;

    left: 50%;

    width: 0;

    height: 0;

    border-radius: 50%;

    background: rgba(255, 255, 255, 0.3);

    transform: translate(-50%, -50%);

    transition: width 0.6s, height 0.6s;

}



.mobile-button:active::before {

    width: 300px;

    height: 300px;

}



/* Loading spinner animation */

@keyframes spin {

    0% { transform: rotate(0deg); }

    100% { transform: rotate(360deg); }

}



.loading-spinner {

    display: inline-block;

    width: 20px;

    height: 20px;

    border: 3px solid rgba(255,255,255,0.3);

    border-radius: 50%;

    border-top-color: white;

    animation: spin 1s ease-in-out infinite;

    margin-right: 10px;

}



/* Button pulse effect when processing */

@keyframes pulse {

    0% {

        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);

    }

    50% {

        box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6);

    }

    100% {

        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);

    }

}



.button-processing {

    animation: pulse 2s ease-in-out infinite;

    background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%) !important;

}



.mobile-textbox textarea {

    border-radius: 10px !important;

    border: 2px solid #e0e0e0 !important;

    padding: 12px !important;

    font-size: 1em !important;

    line-height: 1.5 !important;

}



.mobile-compare textarea {

    border-radius: 8px !important;

    border: 1px solid #ddd !important;

    padding: 10px !important;

    background: #fafafa !important;

    font-size: 0.95em !important;

}



.mobile-audio {

    margin: 10px 0 !important;

    border-radius: 10px !important;

}



.mobile-file {

    margin: 10px 0 !important;

    border-radius: 10px !important;

}



/* Beautiful Markdown styling for Voice RAG responses */

.markdown-response {

    background: linear-gradient(135deg, #ffffff 0%, #f8fffe 100%);

    border-radius: 12px;

    padding: 20px;

    margin: 15px 0;

    box-shadow: 0 4px 20px rgba(0,0,0,0.1);

    border-left: 4px solid #4CAF50;

}



.markdown-response h1 {

    color: #2e7d32;

    border-bottom: 2px solid #4CAF50;

    padding-bottom: 10px;

    margin-bottom: 20px;

    font-size: 1.8em;

}



.markdown-response h2 {

    color: #388E3C;

    margin-top: 25px;

    margin-bottom: 15px;

    font-size: 1.4em;

    border-left: 3px solid #4CAF50;

    padding-left: 15px;

}



.markdown-response h3 {

    color: #43A047;

    margin-top: 20px;

    margin-bottom: 12px;

    font-size: 1.2em;

}



.markdown-response p {

    line-height: 1.6;

    margin-bottom: 12px;

    color: #333;

}



.markdown-response blockquote {

    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);

    border-left: 4px solid #4CAF50;

    padding: 15px 20px;

    margin: 15px 0;

    border-radius: 8px;

    font-style: italic;

    color: #2e7d32;

}



.markdown-response table {

    width: 100%;

    border-collapse: collapse;

    margin: 15px 0;

    box-shadow: 0 2px 10px rgba(0,0,0,0.1);

    border-radius: 8px;

    overflow: hidden;

}



.markdown-response table th {

    background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);

    color: white;

    padding: 12px 15px;

    text-align: left;

    font-weight: bold;

}



.markdown-response table td {

    padding: 12px 15px;

    border-bottom: 1px solid #e0e0e0;

    background: white;

}



.markdown-response table tr:nth-child(even) td {

    background: #f8fffe;

}



.markdown-response table tr:hover td {

    background: #e8f5e8;

    transition: background 0.3s ease;

}



.markdown-response ul, .markdown-response ol {

    margin: 15px 0;

    padding-left: 25px;

}



.markdown-response li {

    margin-bottom: 8px;

    line-height: 1.5;

}



.markdown-response code {

    background: #f5f5f5;

    border: 1px solid #e0e0e0;

    border-radius: 4px;

    padding: 2px 6px;

    font-family: 'Courier New', monospace;

    color: #d32f2f;

}



.markdown-response pre {

    background: #f5f5f5;

    border: 1px solid #e0e0e0;

    border-radius: 8px;

    padding: 15px;

    overflow-x: auto;

    margin: 15px 0;

}



.markdown-response pre code {

    background: none;

    border: none;

    padding: 0;

    color: #333;

}



.markdown-response hr {

    border: none;

    height: 2px;

    background: linear-gradient(90deg, transparent, #4CAF50, transparent);

    margin: 25px 0;

}



.markdown-response strong {

    color: #2e7d32;

    font-weight: bold;

}



.markdown-response em {

    color: #388E3C;

    font-style: italic;

}



/* Responsive design for markdown */

@media (max-width: 768px) {

    .markdown-response {

        padding: 15px;

        margin: 10px 0;

    }

    

    .markdown-response table {

        font-size: 0.9em;

    }

    

    .markdown-response h1 {

        font-size: 1.6em;

    }

    

    .markdown-response h2 {

        font-size: 1.3em;

    }

}



/* Mobile responsive breakpoints */

@media (max-width: 768px) {

    .gradio-container {

        padding: 10px !important;

    }

    

    .input-card {

        padding: 12px !important;

        margin: 8px 0 !important;

    }

    

    .output-area {

        padding: 12px !important;

        margin: 10px 0 !important;

    }

    

    .examples-section {

        padding: 12px !important;

    }

    

    .main-header h2 {

        font-size: 1.5em !important;

    }

    

    .main-header p {

        font-size: 1em !important;

    }

    

    /* Mobile layout adjustments - less aggressive */

    .gr-row {

        flex-direction: column;

    }

    

    .gr-column {

        width: 100%;

        margin-bottom: 15px;

    }

}



@media (max-width: 480px) {

    .gradio-container {

        padding: 5px !important;

    }

    

    .input-card {

        padding: 10px !important;

        margin: 5px 0 !important;

    }

    

    .main-header {

        padding: 15px !important;

    }

    

    .main-header h2 {

        font-size: 1.3em !important;

    }

    

    .mobile-button {

        padding: 12px !important;

        font-size: 1em !important;

    }

}



/* JavaScript for button interactions */

"""

# Add JavaScript for button effects
js_code = """

<script>

function addButtonEffects() {

    // Find button by class since Gradio might change IDs

    const buttons = document.querySelectorAll('.mobile-button');

    

    buttons.forEach(button => {

        // Remove existing listeners to avoid duplicates

        button.removeEventListener('click', handleClick);

        

        // Add enhanced click effect

        button.addEventListener('click', handleClick);

        

        // Add hover effects for better interaction

        button.addEventListener('mouseenter', function() {

            if (!this.disabled) {

                this.style.transform = 'translateY(-2px) scale(1.02)';

            }

        });

        

        button.addEventListener('mouseleave', function() {

            if (!this.disabled) {

                this.style.transform = 'translateY(0) scale(1)';

            }

        });

    });

}



function handleClick(e) {

    const button = e.target;

    

    // Immediate visual feedback

    button.style.transform = 'scale(0.98)';

    button.style.transition = 'all 0.1s ease';

    

    setTimeout(() => {

        button.style.transform = 'scale(1)';

        button.style.transition = 'all 0.3s ease';

    }, 100);

    

    // Add processing state

    const originalText = button.innerHTML;

    button.innerHTML = '<span class="loading-spinner"></span>⏳ ĐANG XỬ LÝ...';

    button.classList.add('button-processing');

    button.disabled = true;

    

    // Monitor for completion and reset

    let checkCount = 0;

    const checkInterval = setInterval(() => {

        checkCount++;

        

        // Reset after 15 seconds max or if status changes

        const statusElements = document.querySelectorAll('[style*="Hoàn thành"]');

        if (statusElements.length > 0 || checkCount > 50) {

            clearInterval(checkInterval);

            button.innerHTML = originalText;

            button.classList.remove('button-processing');

            button.disabled = false;

            button.style.transform = 'scale(1)';

        }

    }, 300);

}



// Initialize when DOM is ready

if (document.readyState === 'loading') {

    document.addEventListener('DOMContentLoaded', addButtonEffects);

} else {

    addButtonEffects();

}



// Re-initialize periodically for Gradio updates

setInterval(addButtonEffects, 2000);

</script>

"""

# Create interface with tabs
with gr.Blocks(css=css, title="🎙️ Voice AI Platform - Voice RAG & Audio Translation") as demo:
    # Simplified header for faster loading on HF Spaces
    if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
        # Only load complex microphone permissions in local development
        gr.HTML("""

        <meta charset="UTF-8">

        <meta name="viewport" content="width=device-width, initial-scale=1.0">

        <meta http-equiv="Permissions-Policy" content="microphone=*, camera=*, display-capture=*, autoplay=*">

        <meta http-equiv="Feature-Policy" content="microphone 'self' *; camera 'self' *; autoplay 'self' *">

        <meta name="theme-color" content="#4A90E2">

    

    <script>

    // Global microphone management

    window.microphoneStatus = {

        granted: false,

        requested: false,

        supported: false

    };

    

    // Enhanced microphone permission request for iframe and main window

    function initializeMicrophoneSupport() {

        console.log('🎤 Initializing microphone support...');

        

        // Check browser support

        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {

            window.microphoneStatus.supported = true;

            console.log('✅ Browser supports microphone');

            

            // Check current permission status

            if (navigator.permissions) {

                navigator.permissions.query({name: 'microphone'}).then(function(result) {

                    console.log('🔐 Current microphone permission:', result.state);

                    window.microphoneStatus.granted = (result.state === 'granted');

                    

                    // Update UI based on permission status

                    updateMicrophoneUI(result.state);

                    

                    // Listen for permission changes

                    result.onchange = function() {

                        console.log('🔄 Microphone permission changed to:', this.state);

                        window.microphoneStatus.granted = (this.state === 'granted');

                        updateMicrophoneUI(this.state);

                    };

                }).catch(function(err) {

                    console.log('⚠️ Permission query failed:', err);

                });

            }

            

            // Auto-request permissions if we're in iframe (with user gesture simulation)

            if (window.location !== window.parent.location && !window.microphoneStatus.requested) {

                console.log('🖼️ Running in iframe - preparing microphone access');

                window.microphoneStatus.requested = true;

                

                // Add a global click listener to request permissions on first interaction

                document.addEventListener('click', function requestOnFirstClick() {

                    if (!window.microphoneStatus.granted) {

                        console.log('👆 First click detected - requesting microphone access');

                        requestMicrophonePermission();

                        document.removeEventListener('click', requestOnFirstClick);

                    }

                }, { once: true });

            }

        } else {

            console.log('❌ Browser does not support microphone');

            window.microphoneStatus.supported = false;

            updateMicrophoneUI('unsupported');

        }

    }

    

    function requestMicrophonePermission() {

        console.log('🎤 Requesting microphone permission...');

        

        if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {

            navigator.mediaDevices.getUserMedia({ 

                audio: {

                    echoCancellation: true,

                    noiseSuppression: true,

                    autoGainControl: true,

                    sampleRate: 44100

                } 

            })

            .then(function(stream) {

                console.log('✅ Microphone access granted');

                window.microphoneStatus.granted = true;

                

                // Stop the stream immediately (we just wanted permission)

                stream.getTracks().forEach(track => track.stop());

                

                updateMicrophoneUI('granted');

                

                // Notify other parts of the app

                window.dispatchEvent(new CustomEvent('microphoneGranted'));

                

            })

            .catch(function(err) {

                console.log('❌ Microphone access denied:', err);

                window.microphoneStatus.granted = false;

                updateMicrophoneUI('denied', err.message);

            });

        }

    }

    

    function updateMicrophoneUI(status, errorMessage = '') {

        // This will be called by the specific UI components

        console.log('🎛️ Updating microphone UI for status:', status);

        window.dispatchEvent(new CustomEvent('microphoneStatusChanged', { 

            detail: { status, errorMessage } 

        }));

    }

    

    // Initialize when DOM is ready

    if (document.readyState === 'loading') {

        document.addEventListener('DOMContentLoaded', initializeMicrophoneSupport);

    } else {

        initializeMicrophoneSupport();

    }

    

    // Also initialize on any dynamic content changes (for Gradio updates)

    if (window.MutationObserver) {

        const observer = new MutationObserver(function(mutations) {

            mutations.forEach(function(mutation) {

                if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {

                    // Check if audio components were added

                    const hasAudioComponent = Array.from(mutation.addedNodes).some(node => 

                        node.nodeType === 1 && (

                            node.querySelector && (

                                node.querySelector('audio') || 

                                node.querySelector('[data-testid*="audio"]') ||

                                node.classList.contains('audio')

                            )

                        )

                    );

                    

                    if (hasAudioComponent) {

                        console.log('🔄 Audio component detected, re-initializing microphone');

                        setTimeout(initializeMicrophoneSupport, 500);

                    }

                }

            });

        });

        

        observer.observe(document.body, {

            childList: true,

            subtree: true

        });

    }

    </script>

    

    <div style="text-align: center; background: linear-gradient(135deg, #4A90E2 0%, #FF6B9D 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">

        <h1>🎙️ Voice AI Platform</h1>

        <p>Voice RAG, Audio Translation và Voice Studio - Nền tảng AI giọng nói toàn diện</p>

        <div style="margin-top: 10px; font-size: 14px; opacity: 0.9;">

            ✨ Tính năng mới: Voice RAG với 24 giọng nói đa ngôn ngữ

        </div>

        <div style="margin-top: 8px;">🧠 <strong>Digitized Brains</strong></div>

    </div>

    """)
    else:
        # Production mode - minimal header
        gr.HTML('<div style="text-align:center;"><h1>🎙️ Voice AI Platform</h1></div>')
    
    with gr.Tabs():
        # Tab 1: Voice RAG
        with gr.TabItem("📚 Voice RAG"):
            # Header section with hf_voice style
            gr.HTML("""

            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">

                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">

                    <h4>📚 Voice RAG</h4>

                    <p style="margin: 0; font-size: 12px;">Hỏi đáp tài liệu thông minh</p>

                </div>

                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">

                    <h4>🌍 Multi-Language</h4>

                    <p style="margin: 0; font-size: 12px;">13 ngôn ngữ trả lời</p>

                </div>

                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">

                    <h4>🎤 Voice Output</h4>

                    <p style="margin: 0; font-size: 12px;">24 giọng nói đa dạng</p>

                </div>

                <div style="background: linear-gradient(135deg, #A8E6CF 0%, #88D8A3 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">

                    <h4>🔄 AI Gemini</h4>

                    <p style="margin: 0; font-size: 12px;">Gemini 2.0 Flash</p>

                </div>

            </div>

            """)
            
            gr.Markdown("### 📝 Upload tài liệu và đặt câu hỏi")
            
            # Input section - Mobile optimized
            with gr.Column():
                # Document upload
                with gr.Row():
                    file_upload_rag = gr.File(
                        label="📎 Tải lên tài liệu (PDF, DOCX, TXT)", 
                        file_types=[".pdf", ".docx", ".txt"]
                    )
                
                # Question input
                with gr.Row():
                    question_input_rag = gr.Textbox(
                        label="❓ Câu hỏi của bạn",
                        placeholder="Hãy đặt câu hỏi về nội dung tài liệu...",
                        lines=3
                    )
                
                # Language selection for answer
                with gr.Row():
                    answer_language_dropdown_rag = gr.Dropdown(
                        choices=SUPPORTED_LANGUAGES,
                        value="Vietnamese",
                        label="🌍 Ngôn ngữ trả lời"
                    )
                
                # Voice selection từ Voice Studio
                with gr.Row():
                    with gr.Column(scale=1):
                        rag_country_dropdown = gr.Dropdown(
                            choices=list(voice_choices_by_country.keys()),
                            value="🇻🇳 Việt Nam",
                            label="🌍 Chọn quốc gia giọng nói"
                        )
                    
                    with gr.Column(scale=1):
                        rag_voice_dropdown = gr.Dropdown(
                            choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                            value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                            label="🎭 Chọn giọng nói"
                        )
                
                # Format selection for download
                with gr.Row():
                    rag_text_format_dropdown = gr.Dropdown(
                        choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                        value="Markdown (.md)",
                        label="📄 Định dạng file trả lời"
                    )
                
                # Process button
                with gr.Row():
                    submit_btn_rag = gr.Button(
                        "🚀 Xử lý tài liệu và trả lời", 
                        variant="primary",
                        size="lg"
                    )
            
            # Results section - Mobile optimized
            with gr.Column():
                # Document info section
                with gr.Accordion("📄 Thông tin tài liệu", open=True):
                    detected_doc_language_rag = gr.Textbox(
                        label="🌐 Ngôn ngữ tài liệu được phát hiện",
                        lines=1,
                        interactive=False,
                        placeholder="Tự động nhận diện ngôn ngữ tài liệu..."
                    )
                
                # Text answer section
                with gr.Accordion("💬 Câu trả lời", open=True):
                    gr.HTML("""

                    <div style="

                        background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);

                        padding: 15px;

                        border-radius: 12px;

                        margin: 15px 0;

                        border-left: 4px solid #4CAF50;

                        text-align: center;

                    ">

                        <h4 style="margin: 0 0 10px 0; color: #2e7d32;">💬 AI Response with Markdown Formatting</h4>

                        <p style="color: #388E3C; margin: 0; font-style: italic;">

                            Formatted response with tables, headers, and beautiful layout

                        </p>

                    </div>

                    """)
                    
                    answer_output_rag = gr.Markdown(
                        value="**Câu trả lời sẽ xuất hiện ở đây sau khi xử lý...**\n\n*Hỗ trợ format Markdown với tables, headers, lists và nhiều style khác*",
                        label="",
                        show_label=False,
                        elem_classes=["markdown-response"]
                    )
                
                # Downloads section - Mobile optimized
                with gr.Accordion("💾 Tải xuống kết quả", open=True):
                    gr.HTML("""

                    <div style="text-align: center; margin-bottom: 15px;">

                        <p style="color: #666; font-style: italic;">Tải xuống câu trả lời dưới dạng file và audio</p>

                    </div>

                    """)
                    
                    # Stack vertically on mobile
                    with gr.Column():
                        # Audio download section
                        with gr.Row():
                            audio_output_rag = gr.Audio(
                                label="🔊 Audio câu trả lời",
                                type="filepath"
                            )
                        
                        # Text download section  
                        with gr.Row():
                            text_output_rag = gr.File(
                                label="📄 Văn bản câu trả lời",
                                file_count="single",
                                file_types=[".md", ".txt", ".docx"]
                            )
            
            # Status indicator for RAG
            rag_status_text = gr.HTML("""

            <div style="text-align: center; margin: 20px 0;">

                <div style="

                    background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);

                    color: white;

                    padding: 15px;

                    border-radius: 12px;

                    box-shadow: 0 4px 15px rgba(78,205,196,0.3);

                ">

                    <span style="font-weight: bold; font-size: 1.1em;">✅ Sẵn sàng xử lý tài liệu</span>

                </div>

            </div>

            """)
            
            # Helper function for RAG format
            def get_rag_format_from_dropdown(format_choice):
                if "Word" in format_choice or "docx" in format_choice:
                    return "docx"
                elif "Markdown" in format_choice or "md" in format_choice:
                    return "md"
                return "txt"
            
            # RAG processing function
            def update_rag_status_processing():
                return """

                <div style="text-align: center; margin: 20px 0;">

                    <div style="

                        background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%);

                        color: white;

                        padding: 15px;

                        border-radius: 12px;

                        box-shadow: 0 4px 15px rgba(255,142,83,0.3);

                    ">

                        <span style="font-weight: bold; font-size: 1.1em;">⏳ Đang xử lý tài liệu...</span>

                    </div>

                </div>

                """
            
            def update_rag_status_complete():
                return """

                <div style="text-align: center; margin: 20px 0;">

                    <div style="

                        background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);

                        color: white;

                        padding: 15px;

                        border-radius: 12px;

                        box-shadow: 0 4px 15px rgba(78,205,196,0.3);

                    ">

                        <span style="font-weight: bold; font-size: 1.1em;">✅ Xử lý hoàn thành!</span>

                    </div>

                </div>

                """
            
            # Event handlers for Voice RAG
            rag_country_dropdown.change(
                fn=update_voices,
                inputs=[rag_country_dropdown],
                outputs=[rag_voice_dropdown]
            )
            
            submit_btn_rag.click(
                fn=lambda: update_rag_status_processing(),
                outputs=[rag_status_text]
            ).then(
                fn=lambda file, question, lang, voice, fmt: voice_rag_pipeline(file, question, lang, voice, get_rag_format_from_dropdown(fmt)),
                inputs=[file_upload_rag, question_input_rag, answer_language_dropdown_rag, rag_voice_dropdown, rag_text_format_dropdown],
                outputs=[answer_output_rag, detected_doc_language_rag, audio_output_rag, text_output_rag]
            ).then(
                fn=lambda: update_rag_status_complete(),
                outputs=[rag_status_text]
            )
        
        # Voice Studio Tab
        with gr.TabItem("🎤 Voice Studio"):
            gr.HTML("""

            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">

                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🇻🇳 Tiếng Việt</h4>

                    <p style="margin: 0; font-size: 12px;">2 giọng chuẩn</p>

                    <p style="margin: 0; font-size: 10px;">HoaiMy • NamMinh</p>

                </div>

                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🇺🇸🇬🇧 English</h4>

                    <p style="margin: 0; font-size: 12px;">4 giọng chuẩn</p>

                    <p style="margin: 0; font-size: 10px;">US • UK</p>

                </div>

                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🌍 Đa ngôn ngữ</h4>

                    <p style="margin: 0; font-size: 12px;">20 giọng chuẩn</p>

                    <p style="margin: 0; font-size: 10px;">10 ngôn ngữ</p>

                </div>

            </div>

            """)
            
            gr.Markdown("### 📝 Nhập nội dung và chọn giọng nói")
            
            with gr.Row():
                text_input = gr.Textbox(
                    placeholder="Nhập văn bản cần chuyển thành giọng nói...",
                    lines=4,
                    label="Văn bản",
                    scale=2
                )
            
            with gr.Row():
                with gr.Column(scale=1):
                    country_dropdown = gr.Dropdown(
                        choices=list(voice_choices_by_country.keys()),
                        value="🇻🇳 Việt Nam",
                        label="🌍 Chọn quốc gia"
                    )
                
                with gr.Column(scale=1):
                    voice_dropdown = gr.Dropdown(
                        choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                        value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                        label="🎭 Chọn giọng nói"
                    )
            
            with gr.Row():
                with gr.Column(scale=2):
                    speed_slider = gr.Slider(
                        minimum=0.5,
                        maximum=2.0,
                        value=1.0,
                        step=0.1,
                        label="⚡ Tốc độ phát"
                    )
                with gr.Column(scale=1):
                    voice_studio_format_dropdown = gr.Dropdown(
                        choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                        value="Markdown (.md)",
                        label="📄 Định dạng file tải xuống"
                    )
            
            # Translation feature
            with gr.Row():
                with gr.Column(scale=1):
                    translate_checkbox = gr.Checkbox(
                        label="🌍 Dịch văn bản trước khi tạo giọng nói",
                        value=False
                    )
                with gr.Column(scale=2):
                    translate_btn = gr.Button("🔄 DỊCH VĂN BẢN", variant="secondary", size="lg", visible=False)
                
            # Show translated text when translation is enabled
            translated_text_output = gr.Textbox(
                label="📝 Văn bản đã dịch",
                lines=3,
                interactive=True,
                visible=False,
                placeholder="Văn bản sau khi dịch sẽ hiển thị ở đây..."
            )
                
            generate_btn = gr.Button("🎵 TẠO GIỌNG NÓI", variant="primary", size="lg")
            
            # Status indicator for Voice Studio
            studio_status_text = gr.HTML("""

            <div style="text-align: center; margin: 20px 0;">

                <div style="

                    background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);

                    color: white;

                    padding: 15px;

                    border-radius: 12px;

                    box-shadow: 0 4px 15px rgba(78,205,196,0.3);

                ">

                    <span style="font-weight: bold; font-size: 1.1em;">⚡ Sẵn sàng tạo giọng nói</span>

                </div>

            </div>

            """)
            
            gr.Markdown("### 🎧 Kết quả âm thanh")
            audio_output_vs = gr.HTML(
                value="<p style='text-align: center; color: #666; padding: 40px;'>Nhấn 'TẠO GIỌNG NÓI' để bắt đầu 🎤</p>"
            )
            
            # Download section for Voice Studio
            with gr.Accordion("💾 Tải xuống kết quả", open=False):
                gr.HTML("""

                <div style="

                    background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);

                    padding: 15px;

                    border-radius: 12px;

                    margin: 15px 0;

                    border-left: 4px solid #2196F3;

                    text-align: center;

                ">

                    <h4 style="margin: 0 0 10px 0; color: #1976D2;">📄 Tải xuống văn bản với Markdown formatting</h4>

                    <p style="color: #1565C0; margin: 0; font-style: italic;">

                        File chứa thông tin session, cấu hình giọng nói và technical details

                    </p>

                </div>

                """)
                
                voice_studio_text_output = gr.File(
                    label="📄 Văn bản với thông tin chi tiết",
                    file_count="single",
                    file_types=[".md", ".txt", ".docx"]
                )
            
            # Examples section
            gr.Markdown("### 📚 Ví dụ nhanh")
            with gr.Row():
                example_vn = gr.Button("🇻🇳 Tiếng Việt", size="sm")
                example_en = gr.Button("🇺🇸 English", size="sm") 
                example_de = gr.Button("🇩🇪 Deutsch", size="sm")
                example_translate = gr.Button("🌍 Dịch thuật", size="sm")
            
            # Example button functions
            def load_vn_example():
                return "Xin chào! Chào mừng bạn đến với studio giọng nói.", "🇻🇳 Việt Nam"
            
            def load_en_example():
                return "Hello! Welcome to our voice studio.", "🇺🇸 Hoa Kỳ"
                
            def load_de_example():
                return "Hallo! Willkommen in unserem Sprachstudio.", "🇩🇪 Đức"
            
            def load_translate_example():
                return "Hello! This is an example text for translation.", "🇺🇸 Hoa Kỳ", True
            
            # Translation functions
            def toggle_translation_ui(translate_enabled):
                """Show/hide translation UI elements"""
                return (
                    gr.update(visible=translate_enabled),  # translate_btn
                    gr.update(visible=translate_enabled)   # translated_text_output
                )
            
            def translate_text_interface(text, voice_selection):
                """Translate text for Voice Studio"""
                if not text.strip():
                    return "Vui lòng nhập văn bản trước khi dịch"
                
                target_language = get_target_language_from_voice(voice_selection)
                translated = translate_text_with_gemini(text, target_language)
                return translated
            
            def create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format="txt"):
                """Create voice using original or translated text"""
                if translate_enabled and translated_text.strip() and not translated_text.startswith("Lỗi"):
                    # Use translated text
                    return create_audio_voice_studio(translated_text, voice_selection, speed, text_format)
                else:
                    # Use original text
                    return create_audio_voice_studio(original_text, voice_selection, speed, text_format)
                
            # Event handlers for Voice Studio
            country_dropdown.change(
                fn=update_voices,
                inputs=[country_dropdown],
                outputs=[voice_dropdown]
            )
            
            example_vn.click(
                fn=load_vn_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_en.click(
                fn=load_en_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_de.click(
                fn=load_de_example,
                outputs=[text_input, country_dropdown]
            )
            
            example_translate.click(
                fn=load_translate_example,
                outputs=[text_input, country_dropdown, translate_checkbox]
            )
            
            # Translation UI toggle
            translate_checkbox.change(
                fn=toggle_translation_ui,
                inputs=[translate_checkbox],
                outputs=[translate_btn, translated_text_output]
            )
            
            # Translation button
            translate_btn.click(
                fn=translate_text_interface,
                inputs=[text_input, voice_dropdown],
                outputs=[translated_text_output]
            )
            
            # Helper function to extract format and process Voice Studio
            def process_voice_studio(original_text, translated_text, translate_enabled, voice_selection, speed, format_choice):
                """Process Voice Studio with format support"""
                # Extract format from dropdown
                if "Markdown" in format_choice:
                    text_format = "md"
                elif "Word" in format_choice:
                    text_format = "docx"
                else:
                    text_format = "txt"
                
                return create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format)
            
            # Generate voice with translation support
            generate_btn.click(
                fn=process_voice_studio,
                inputs=[text_input, translated_text_output, translate_checkbox, voice_dropdown, speed_slider, voice_studio_format_dropdown],
                outputs=[audio_output_vs, voice_studio_text_output]
            )
        
        # Audio Translation Tab
        with gr.TabItem("🎙️ Audio Translation"):
            # Colorful feature cards like Voice Studio
            gr.HTML("""

            <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">

                <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🎤 Ghi âm</h4>

                    <p style="margin: 0; font-size: 12px;">Microphone</p>

                    <p style="margin: 0; font-size: 10px;">Real-time</p>

                </div>

                <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>📁 Upload</h4>

                    <p style="margin: 0; font-size: 12px;">Audio Files</p>

                    <p style="margin: 0; font-size: 10px;">WAV • MP3</p>

                </div>

                <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🔄 AI Dịch</h4>

                    <p style="margin: 0; font-size: 12px;">13 ngôn ngữ</p>

                    <p style="margin: 0; font-size: 10px;">Gemini 2.0</p>

                </div>

                <div style="background: linear-gradient(135deg, #A855F7 0%, #EC4899 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">

                    <h4>🎵 Tổng hợp</h4>

                    <p style="margin: 0; font-size: 12px;">Neural TTS</p>

                    <p style="margin: 0; font-size: 10px;">26 giọng</p>

                </div>

            </div>

            """)
            
            # Input section with colorful design
            gr.HTML("""

            <div style="

                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

                color: white;

                padding: 20px;

                border-radius: 15px;

                margin: 20px 0;

                text-align: center;

                box-shadow: 0 8px 32px rgba(0,0,0,0.2);

            ">

                <h3 style="margin: 0 0 10px 0;">🎤 Tải lên file audio hoặc ghi âm trực tiếp</h3>

                <p style="margin: 0; opacity: 0.9; font-size: 0.95em;">

                    Hỗ trợ file WAV, MP3 hoặc ghi âm real-time qua microphone

                </p>

            </div>

            """)
            
            # Enhanced microphone permission notice and controls
            if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
                gr.HTML("""

                <div id="microphone-section" style="margin: 15px 0;">

                    <!-- Microphone Status Indicator -->

                    <div id="mic-status" style="

                    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);

                    color: #2e7d32;

                    padding: 12px;

                    border-radius: 8px;

                    margin-bottom: 10px;

                    text-align: center;

                    border: 1px solid #4caf50;

                    display: none;

                ">

                    <strong>🎤 Microphone Ready</strong> - Bạn có thể ghi âm trực tiếp

                </div>

                

                <!-- Microphone Error/Permission Notice -->

                <div id="microphone-notice" style="

                    background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);

                    color: #856404;

                    padding: 15px;

                    border-radius: 10px;

                    border: 1px solid #ffeaa7;

                    text-align: center;

                    display: none;

                ">

                    <strong>🎤 Microphone Access Required</strong><br>

                    Để sử dụng ghi âm, vui lòng cho phép truy cập microphone.<br>

                    <button onclick="requestMicrophoneAccess()" style="

                        background: #4caf50; 

                        color: white; 

                        padding: 8px 16px; 

                        border: none;

                        border-radius: 6px;

                        cursor: pointer;

                        margin: 8px 4px;

                    ">🎤 Kích hoạt Microphone</button>

                    <a href="#" onclick="window.open(window.location.href, '_blank')" style="

                        background: #667eea; 

                        color: white; 

                        padding: 8px 16px; 

                        text-decoration: none; 

                        border-radius: 6px;

                        display: inline-block;

                        margin: 8px 4px;

                    ">🔗 Mở cửa sổ mới</a>

                </div>

                

                <!-- Iframe Warning -->

                <div id="iframe-warning" style="

                    background: linear-gradient(135deg, #ffebee 0%, #ffcdd2 100%);

                    color: #c62828;

                    padding: 12px;

                    border-radius: 8px;

                    border: 1px solid #f44336;

                    text-align: center;

                    display: none;

                ">

                    <strong>⚠️ Iframe Restriction</strong><br>

                    Microphone có thể bị hạn chế trong iframe. 

                    <a href="#" onclick="window.open(window.location.href, '_blank')" style="color: #c62828; text-decoration: underline;">

                        Mở trong cửa sổ mới

                    </a> để sử dụng đầy đủ tính năng.

                </div>

            </div>

            

            <script>

            // Enhanced microphone permission handling

            let microphoneAccess = false;

            

            function requestMicrophoneAccess() {

                console.log('🎤 Audio Translation: Requesting microphone access...');

                

                // Use global microphone function if available

                if (window.requestMicrophonePermission) {

                    window.requestMicrophonePermission();

                    return;

                }

                

                // Fallback to local implementation

                if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {

                    navigator.mediaDevices.getUserMedia({ 

                        audio: {

                            echoCancellation: true,

                            noiseSuppression: true,

                            autoGainControl: true,

                            sampleRate: 44100

                        } 

                    })

                    .then(function(stream) {

                        console.log('✅ Audio Translation: Microphone access granted');

                        microphoneAccess = true;

                        

                        // Show success status

                        updateLocalMicrophoneUI('granted');

                        

                        // Stop the stream (we just wanted permission)

                        stream.getTracks().forEach(track => track.stop());

                        

                        // Trigger Gradio audio component refresh

                        setTimeout(() => {

                            const audioComponents = document.querySelectorAll('[data-testid*="audio"]');

                            audioComponents.forEach(comp => {

                                // Try to trigger a refresh or re-initialization

                                if (comp.click) comp.click();

                            });

                        }, 500);

                        

                        // Update global status if available

                        if (window.microphoneStatus) {

                            window.microphoneStatus.granted = true;

                        }

                        

                    })

                    .catch(function(err) {

                        console.log('❌ Audio Translation: Microphone access denied:', err);

                        updateLocalMicrophoneUI('denied', err.message);

                    });

                } else {

                    console.log('❌ getUserMedia not supported');

                    updateLocalMicrophoneUI('unsupported');

                }

            }

            

            function updateLocalMicrophoneUI(status, errorMessage = '') {

                const micStatus = document.getElementById('mic-status');

                const micNotice = document.getElementById('microphone-notice');

                

                switch(status) {

                    case 'granted':

                        if (micStatus) micStatus.style.display = 'block';

                        if (micNotice) micNotice.style.display = 'none';

                        microphoneAccess = true;

                        break;

                        

                    case 'denied':

                        if (micNotice) {

                            micNotice.style.display = 'block';

                            micNotice.innerHTML = `

                                <strong>❌ Microphone Access Denied</strong><br>

                                Lỗi: ${errorMessage}<br>

                                Vui lòng kiểm tra cài đặt trình duyệt và cho phép microphone.

                                <br><br>

                                <button onclick="requestMicrophoneAccess()" style="

                                    background: #ff9800; 

                                    color: white; 

                                    padding: 8px 16px; 

                                    border: none;

                                    border-radius: 6px;

                                    cursor: pointer;

                                    margin: 4px;

                                ">🔄 Thử lại</button>

                                <button onclick="window.open(window.location.href, '_blank')" style="

                                    background: #2196f3; 

                                    color: white; 

                                    padding: 8px 16px; 

                                    border: none;

                                    border-radius: 6px;

                                    cursor: pointer;

                                    margin: 4px;

                                ">🔗 Mở cửa sổ mới</button>

                            `;

                        }

                        break;

                        

                    case 'unsupported':

                        if (micNotice) {

                            micNotice.style.display = 'block';

                            micNotice.innerHTML = `

                                <strong>❌ Microphone Not Supported</strong><br>

                                Trình duyệt của bạn không hỗ trợ ghi âm.<br>

                                Vui lòng sử dụng Chrome, Firefox, Safari hoặc Edge phiên bản mới.

                                <br><br>

                                <a href="https://caniuse.com/stream" target="_blank" style="

                                    color: #856404;

                                    text-decoration: underline;

                                ">Kiểm tra tương thích trình duyệt</a>

                            `;

                        }

                        break;

                        

                    default:

                        if (micNotice) {

                            micNotice.style.display = 'block';

                        }

                        break;

                }

            }

            

            // Listen for global microphone events

            window.addEventListener('microphoneStatusChanged', function(event) {

                console.log('🔄 Audio Translation: Received microphone status update:', event.detail);

                updateLocalMicrophoneUI(event.detail.status, event.detail.errorMessage);

            });

            

            window.addEventListener('microphoneGranted', function() {

                console.log('✅ Audio Translation: Global microphone granted');

                updateLocalMicrophoneUI('granted');

            });

            

            // Check microphone availability on load

            function checkMicrophoneAvailability() {

                console.log('🔍 Audio Translation: Checking microphone availability...');

                

                // Check global status first

                if (window.microphoneStatus) {

                    if (window.microphoneStatus.granted) {

                        updateLocalMicrophoneUI('granted');

                        return;

                    } else if (!window.microphoneStatus.supported) {

                        updateLocalMicrophoneUI('unsupported');

                        return;

                    }

                }

                

                // Check if we're in an iframe

                if (window.location !== window.parent.location) {

                    console.log('Running in iframe');

                    const iframeWarning = document.getElementById('iframe-warning');

                    if (iframeWarning) {

                        setTimeout(() => {

                            iframeWarning.style.display = 'block';

                        }, 1000);

                    }

                }

                

                // Try to get microphone permissions

                if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {

                    // Check if we already have permission

                    navigator.permissions.query({name: 'microphone'}).then(function(result) {

                        console.log('Microphone permission status:', result.state);

                        

                        if (result.state === 'granted') {

                            const micStatus = document.getElementById('mic-status');

                            if (micStatus) micStatus.style.display = 'block';

                            microphoneAccess = true;

                        } else if (result.state === 'prompt' || result.state === 'denied') {

                            const micNotice = document.getElementById('microphone-notice');

                            if (micNotice) {

                                setTimeout(() => {

                                    micNotice.style.display = 'block';

                                }, 1500);

                            }

                        }

                        

                        // Listen for permission changes

                        result.onchange = function() {

                            console.log('Microphone permission changed to:', this.state);

                            if (this.state === 'granted') {

                                const micStatus = document.getElementById('mic-status');

                                const micNotice = document.getElementById('microphone-notice');

                                if (micStatus) micStatus.style.display = 'block';

                                if (micNotice) micNotice.style.display = 'none';

                                microphoneAccess = true;

                            }

                        };

                    }).catch(function(err) {

                        console.log('Permission query failed:', err);

                        // Fallback to showing the notice

                        setTimeout(() => {

                            const micNotice = document.getElementById('microphone-notice');

                            if (micNotice) micNotice.style.display = 'block';

                        }, 2000);

                    });

                } else {

                    // Browser doesn't support getUserMedia

                    setTimeout(() => {

                        const micNotice = document.getElementById('microphone-notice');

                        if (micNotice) {

                            micNotice.style.display = 'block';

                            micNotice.innerHTML = `

                                <strong>❌ Microphone Not Supported</strong><br>

                                Trình duyệt không hỗ trợ ghi âm. Vui lòng cập nhật trình duyệt.

                            `;

                        }

                    }, 1000);

                }

            }

            

            // Initialize when DOM is ready

            if (document.readyState === 'loading') {

                document.addEventListener('DOMContentLoaded', checkMicrophoneAvailability);

            } else {

                checkMicrophoneAvailability();

            }

            

            // Re-check periodically for dynamic content

            setInterval(checkMicrophoneAvailability, 5000);

            </script>

            """)
            else:
                # Production mode - simple microphone notice
                gr.HTML('<div style="text-align:center;color:#666;padding:10px;">📎 Upload audio file or use microphone</div>')
            
            audio_input = gr.Audio(
                label="📎 Tải lên file audio hoặc ghi âm trực tiếp",
                type="numpy",  # Use numpy to avoid temp file issues
                sources=["upload", "microphone"],
                show_label=True,
                interactive=True,
                elem_id="audio-input-translation"
            )
            
            # Audio Recording Control Buttons
            with gr.Row():
                save_recording_btn = gr.Button(
                    "💾 Save Recording",
                    variant="secondary",
                    size="sm"
                )
                new_recording_btn = gr.Button(
                    "🎙️ New Record", 
                    variant="primary",
                    size="sm"
                )
            
            # Button descriptions
            gr.HTML("""

            <div style="display: flex; justify-content: space-between; margin: 5px 0 15px 0; font-size: 0.8em; color: #666;">

                <span>💾 Lưu file audio hiện tại vào record_data</span>

                <span>🎙️ Xóa audio hiện tại để ghi âm mới</span>

            </div>

            """)
            
            # Status for recording actions
            recording_status = gr.HTML(
                value="<p style='text-align: center; color: #666; font-style: italic;'>Sẵn sàng ghi âm hoặc tải lên file</p>"
            )
            
            # === RECORDED FILES FUNCTIONS ===
            def refresh_recorded_files():
                """Refresh the list of recorded files"""
                files = get_recorded_files()
                print(f"🔄 Refreshing dropdown - found files: {files}")
                return gr.Dropdown(choices=files, value=None)
            
            def load_recorded_file(filename):
                """Load selected recorded file for playback"""
                print(f"🎵 Loading recorded file: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Full path: {file_path}")
                    if os.path.exists(file_path):
                        file_size = os.path.getsize(file_path)
                        print(f"✅ File exists, size: {file_size} bytes")
                        
                        try:
                            # Load audio as numpy array for Gradio compatibility
                            import soundfile as sf
                            audio_data, sample_rate = sf.read(file_path)
                            print(f"🎵 Loaded audio: shape={audio_data.shape}, sr={sample_rate}")
                            # Return tuple (sample_rate, audio_data) for Gradio numpy type
                            return (sample_rate, audio_data)
                        except Exception as e:
                            print(f"❌ Error loading audio: {e}")
                            return None
                    else:
                        print(f"❌ File not found: {file_path}")
                        print(f"📁 Directory contents: {os.listdir(os.path.dirname(file_path)) if os.path.exists(os.path.dirname(file_path)) else 'Directory not found'}")
                else:
                    print("❌ No filename provided")
                return None
            
            def use_recorded_for_translation(filename, country, voice, fmt):
                """Use selected recorded file for translation"""
                print(f"🔄 Using recorded file for translation: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Translation file path: {file_path}")
                    if os.path.exists(file_path):
                        print(f"✅ Starting translation for: {filename}")
                        # Use the same translation function
                        return translate_audio(file_path, country, voice, get_format_from_dropdown(fmt))
                    else:
                        print(f"❌ File not found for translation: {file_path}")
                # Return empty results if no file selected
                print("❌ No file selected for translation")
                return "", "", "", "", None, "", "", None
            
            def prepare_recorded_file_download(filename):
                """Prepare recorded file for download"""
                print(f"📥 Preparing download for: {filename}")
                if filename and filename.strip():
                    file_path = get_recorded_file_path(filename)
                    print(f"📁 Download file path: {file_path}")
                    if os.path.exists(file_path):
                        print(f"✅ File ready for download: {filename}")
                        return file_path
                    else:
                        print(f"❌ Download file not found: {file_path}")
                print("❌ No file selected for download")
                return None
            
            def save_current_recording(audio_file):
                """Save current audio recording to record_data"""
                if audio_file is None:
                    current_files = get_recorded_files()
                    return (
                        "<p style='color: #e74c3c; text-align: center;'>❌ Không có file audio để lưu</p>",
                        gr.Dropdown(choices=current_files, value=None)
                    )
                
                try:
                    saved_path = save_recorded_audio(audio_file)
                    if saved_path:
                        saved_filename = os.path.basename(saved_path)
                        # Get updated file list after saving
                        updated_files = get_recorded_files()
                        print(f"🔄 After save - updated files: {updated_files}")
                        return (
                            f"<p style='color: #27ae60; text-align: center;'>✅ Đã lưu: {saved_filename}</p>",
                            gr.Dropdown(choices=updated_files, value=saved_filename)
                        )
                    else:
                        current_files = get_recorded_files()
                        return (
                            "<p style='color: #e74c3c; text-align: center;'>❌ Lỗi khi lưu file</p>",
                            gr.Dropdown(choices=current_files, value=None)
                        )
                except Exception as e:
                    current_files = get_recorded_files()
                    return (
                        f"<p style='color: #e74c3c; text-align: center;'>❌ Lỗi: {str(e)}</p>",
                        gr.Dropdown(choices=current_files, value=None)
                    )
            
            def clear_audio_for_new_recording():
                """Clear audio input for new recording"""
                return (
                    None,  # Clear audio input
                    "<p style='color: #3498db; text-align: center;'>🎙️ Sẵn sàng ghi âm mới</p>"
                )
            
            def delete_selected_file(filename):
                """Delete selected file and refresh dropdown"""
                if not filename or not filename.strip():
                    current_files = get_recorded_files()
                    return (
                        "<p style='color: #e74c3c; text-align: center;'>❌ Vui lòng chọn file để xóa</p>",
                        gr.Dropdown(choices=current_files, value=None),
                        None  # Clear audio player
                    )
                
                # Delete the file
                delete_result = delete_recorded_file(filename)
                
                # Refresh file list
                updated_files = get_recorded_files()
                
                # Determine status color based on result
                if "✅" in delete_result:
                    status_html = f"<p style='color: #27ae60; text-align: center;'>{delete_result}</p>"
                else:
                    status_html = f"<p style='color: #e74c3c; text-align: center;'>{delete_result}</p>"
                
                return (
                    status_html,
                    gr.Dropdown(choices=updated_files, value=None),
                    None  # Clear audio player
                )
            
            # Recorded Files Management Section
            with gr.Accordion("🎤 File đã ghi âm", open=False):
                gr.HTML("""

                <div style="

                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

                    color: white;

                    padding: 15px;

                    border-radius: 12px;

                    margin: 15px 0;

                    text-align: center;

                ">

                    <h4 style="margin: 0 0 8px 0;">📁 Quản lý file đã ghi</h4>

                    <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">

                        Chọn file từ danh sách để phát lại hoặc dịch thuật

                    </p>

                </div>

                """)
                
                # Refresh button for recorded files
                refresh_files_btn = gr.Button(
                    "🔄 Làm mới danh sách",
                    variant="secondary",
                    size="sm"
                )
                
                # Status display for file operations
                file_operation_status = gr.HTML(
                    value="<p style='text-align: center; color: #666; font-style: italic;'>Chọn file để thực hiện thao tác</p>"
                )
                
                # Dropdown for recorded files  
                initial_files = get_recorded_files()
                print(f"🔍 Initial recorded files: {initial_files}")
                recorded_files_dropdown = gr.Dropdown(
                    choices=initial_files,
                    label="📂 Chọn file đã ghi",
                    info="Các file audio đã được ghi âm trước đó"
                )
                
                # Preview and controls for selected file
                with gr.Row():
                    with gr.Column():
                        # Audio player for selected file
                        recorded_audio_player = gr.Audio(
                            label="🎵 Phát lại file đã chọn",
                            interactive=False,
                            show_label=True,
                            type="numpy"  # Use numpy for better compatibility
                        )
                    
                    with gr.Column():
                        # Action buttons
                        use_for_translation_btn = gr.Button(
                            "🔄 Sử dụng để dịch thuật",
                            variant="primary",
                            size="sm"
                        )
                        
                        with gr.Row():
                            download_recorded_btn = gr.Button(
                                "📥 Tải xuống",
                                variant="secondary", 
                                size="sm"
                            )
                            
                            delete_recorded_btn = gr.Button(
                                "🗑️ Xóa file",
                                variant="stop",
                                size="sm"
                            )
                        
                        # Download link for recorded file
                        download_recorded_file = gr.File(
                            label="📥 File tải xuống",
                            visible=True,
                            file_count="single"
                        )
            
            # Settings section with gradient header
            gr.HTML("""

            <div style="

                background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%);

                color: white;

                padding: 18px;

                border-radius: 12px;

                margin: 25px 0 20px 0;

                text-align: center;

                box-shadow: 0 6px 24px rgba(255,107,107,0.3);

            ">

                <h3 style="margin: 0 0 8px 0;">🌍 Cài đặt dịch thuật</h3>

                <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">

                    Chọn ngôn ngữ đích và giọng nói cho kết quả dịch thuật

                </p>

            </div>

            """)
                
            # Separate dropdowns without complex wrappers to avoid CSS conflicts
            target_country_dropdown = gr.Dropdown(
                choices=list(voice_choices_by_country.keys()),
                value="🇻🇳 Việt Nam",
                label="🌍 Chọn quốc gia đích"
            )
            
            target_voice_dropdown = gr.Dropdown(
                choices=voice_choices_by_country["🇻🇳 Việt Nam"],
                value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
                label="🎭 Chọn giọng nói đích"
            )
            
            text_format_dropdown = gr.Dropdown(
                choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
                value="Markdown (.md)",
                label="📄 Định dạng file văn bản"
            )
            
            # Colorful action button
            gr.HTML("""

            """)
            
            # Auto-translate on audio upload - no manual button needed
            
            # Results section with colorful headers
            gr.HTML("""

            <div style="

                background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%);

                color: white;

                padding: 18px;

                border-radius: 12px;

                margin: 30px 0 20px 0;

                text-align: center;

                box-shadow: 0 6px 24px rgba(69,183,209,0.3);

            ">

                <h3 style="margin: 0 0 8px 0;">📊 Kết quả xử lý</h3>

                <p style="margin: 0; opacity: 0.9; font-size: 0.9em;">

                    Phiên âm, dịch thuật và tổng hợp giọng nói

                </p>

            </div>

            """)
            
            # Dynamic status indicator
            status_text = gr.HTML("")
            
            # Card-based layout for mobile
            with gr.Column(elem_classes=["output-area"]):
                # Original content card
                gr.HTML("""

                <div style="

                    background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);

                    padding: 15px;

                    border-radius: 12px;

                    margin: 15px 0;

                    border-left: 4px solid #2196F3;

                ">

                    <h4 style="margin: 0 0 10px 0; color: #1976D2;">📝 Nội dung gốc từ audio</h4>

                </div>

                """)
                
                transcription_output = gr.Textbox(
                    label="🎯 Phiên âm từ audio",
                    lines=4,
                    interactive=False,
                    placeholder="Nội dung phiên âm từ file audio sẽ hiển thị ở đây...",
                    elem_classes=["mobile-textbox"]
                )
                
                detected_language = gr.Textbox(
                    label="🌐 Ngôn ngữ được phát hiện",
                    lines=1,
                    interactive=False,
                    placeholder="Tự động nhận diện...",
                    elem_classes=["mobile-textbox"]
                )
                            
                
                # Translation result card
                gr.HTML("""

                <div style="

                    background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);

                    padding: 15px;

                    border-radius: 12px;

                    margin: 15px 0;

                    border-left: 4px solid #4CAF50;

                ">

                    <h4 style="margin: 0 0 10px 0; color: #388E3C;">✨ Kết quả dịch thuật</h4>

                </div>

                """)
                
                translation_output = gr.Textbox(
                    label="🔄 Nội dung đã dịch",
                    lines=4,
                    interactive=False,
                    placeholder="Bản dịch sẽ hiển thị ở đây...",
                    elem_classes=["mobile-textbox"]
                )
                
                target_language_display = gr.Textbox(
                    label="🎯 Ngôn ngữ đích",
                    lines=1,
                    interactive=False,
                    placeholder="Chưa chọn...",
                    elem_classes=["mobile-textbox"]
                )
                
                # Mobile-friendly comparison section
                with gr.Accordion("🔍 So sánh nội dung", open=False):
                    gr.HTML("""

                    <div style="

                        text-align: center; 

                        margin-bottom: 15px;

                        padding: 10px;

                        background: #f5f5f5;

                        border-radius: 8px;

                    ">

                        <p style="color: #666; font-style: italic; margin: 0;">

                            Xem nội dung gốc và bản dịch để so sánh

                        </p>

                    </div>

                    """)
                    
                    # Stack vertically on mobile for better readability
                    with gr.Column():
                        gr.HTML("""

                        <div style="

                            background: #e3f2fd;

                            padding: 10px;

                            border-radius: 8px;

                            margin: 10px 0;

                            text-align: center;

                            font-weight: bold;

                            color: #1976D2;

                        ">📝 Ngôn ngữ gốc</div>

                        """)
                        original_compare = gr.Textbox(
                            label="",
                            lines=4,
                            interactive=False,
                            show_label=False,
                            placeholder="Nội dung phiên âm từ audio sẽ hiển thị ở đây...",
                            elem_classes=["mobile-compare"]
                        )
                        
                        gr.HTML("""

                        <div style="

                            background: #e8f5e8;

                            padding: 10px;

                            border-radius: 8px;

                            margin: 15px 0 5px 0;

                            text-align: center;

                            font-weight: bold;

                            color: #388E3C;

                        ">✨ Sau khi dịch</div>

                        """)
                        translated_compare = gr.Textbox(
                            label="",
                            lines=4,
                            interactive=False,
                            show_label=False,
                            placeholder="Nội dung sau khi dịch sẽ hiển thị ở đây...",
                            elem_classes=["mobile-compare"]
                        )
                
                # Mobile-optimized download section
                with gr.Accordion("💾 Tải xuống kết quả", open=True):
                    gr.HTML("""

                    <div style="

                        background: linear-gradient(135deg, #fff3e0 0%, #ffcc80 100%);

                        padding: 15px;

                        border-radius: 12px;

                        margin: 15px 0;

                        border-left: 4px solid #FF9800;

                        text-align: center;

                    ">

                        <h4 style="margin: 0 0 10px 0; color: #E65100;">💾 Tải xuống kết quả</h4>

                        <p style="color: #BF360C; margin: 0; font-style: italic;">

                            File audio và văn bản đã dịch

                        </p>

                    </div>

                    """)
                    
                    # Stack downloads vertically for mobile
                    with gr.Column():
                        gr.HTML("""

                        <div style="

                            background: #e3f2fd;

                            padding: 12px;

                            border-radius: 8px;

                            margin: 15px 0 10px 0;

                            text-align: center;

                            font-weight: bold;

                            color: #1976D2;

                        ">🔊 Audio đã dịch</div>

                        """)
                        audio_output_at = gr.Audio(
                            label="🎵 Audio đã dịch",
                            type="filepath",
                            show_label=True,
                            elem_classes=["mobile-audio"],
                            format="wav"  # Specify format explicitly
                        )
                        
                        # Explicit download component for translated audio
                        audio_download_at = gr.File(
                            label="📥 Tải xuống audio đã dịch",
                            file_count="single",
                            file_types=[".wav"],
                            visible=True
                        )
                        
                        gr.HTML("""

                        <div style="

                            background: #e8f5e8;

                            padding: 12px;

                            border-radius: 8px;

                            margin: 25px 0 10px 0;

                            text-align: center;

                            font-weight: bold;

                            color: #388E3C;

                        ">📄 Văn bản đã dịch</div>

                        """)
                        text_output = gr.File(
                            label="",
                            file_count="single",
                            file_types=[".txt", ".docx"],
                            show_label=False,
                            elem_classes=["mobile-file"]
                        )
            
            # Event handlers for Audio Translation with colorful status
            def update_status_processing():
                return """

                <div class="status-processing" style="

                    text-align: center; 

                    margin: 20px 0;

                    padding: 15px;

                    border-radius: 12px;

                    color: white;

                    transition: all 0.3s ease;

                ">

                    <span style="font-weight: bold; font-size: 1.1em;">

                        ⚡ Đang tự động dịch thuật...

                    </span>

                </div>

                """
            
            def update_status_complete():
                return """

                <div class="status-success" style="

                    text-align: center; 

                    margin: 20px 0;

                    padding: 15px;

                    border-radius: 12px;

                    color: white;

                    transition: all 0.3s ease;

                ">

                    <span style="font-weight: bold; font-size: 1.1em;">

                        ✅ Dịch thuật hoàn thành!

                    </span>

                </div>

                """
            
            target_country_dropdown.change(
                fn=update_voices,
                inputs=[target_country_dropdown],
                outputs=[target_voice_dropdown]
            )
            
            # Update target language display when dropdown changes
            target_voice_dropdown.change(
                fn=lambda voice: voice,
                inputs=[target_voice_dropdown],
                outputs=[target_language_display]
            )
            
            # Helper function to extract format
            def get_format_from_dropdown(format_choice):
                if "Markdown" in format_choice:
                    return "md"
                elif "Word" in format_choice:
                    return "docx"
                return "txt"
            
            # Auto-translate when audio is uploaded or changed
            audio_input.change(
                fn=lambda: update_status_processing(),
                outputs=[status_text]
            ).then(
                fn=lambda audio, country, voice, fmt: translate_audio(audio, country, voice, get_format_from_dropdown(fmt)) if audio is not None else ("", "", "📎 Vui lòng tải lên file audio hoặc ghi âm", country, None, "", "", None),
                inputs=[audio_input, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
                outputs=[
                    transcription_output, 
                    detected_language, 
                    translation_output, 
                    target_language_display, 
                    audio_output_at,
                    audio_download_at,
                    original_compare,
                    translated_compare,
                    text_output
                ]
            ).then(
                fn=lambda: update_status_complete(),
                outputs=[status_text]
            ).then(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            # === RECORDED FILES EVENT HANDLERS ===
            
            # Save current recording
            save_recording_btn.click(
                fn=save_current_recording,
                inputs=[audio_input],
                outputs=[recording_status, recorded_files_dropdown]
            )
            
            # New recording (clear audio)
            new_recording_btn.click(
                fn=clear_audio_for_new_recording,
                outputs=[audio_input, recording_status]
            )
            
            refresh_files_btn.click(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            recorded_files_dropdown.change(
                fn=load_recorded_file,
                inputs=[recorded_files_dropdown],
                outputs=[recorded_audio_player]
            )
            
            use_for_translation_btn.click(
                fn=lambda: update_status_processing(),
                outputs=[status_text]
            ).then(
                fn=use_recorded_for_translation,
                inputs=[recorded_files_dropdown, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
                outputs=[
                    transcription_output, 
                    detected_language, 
                    translation_output, 
                    target_language_display, 
                    audio_output_at,
                    audio_download_at,
                    original_compare,
                    translated_compare,
                    text_output
                ]
            ).then(
                fn=lambda: update_status_complete(),
                outputs=[status_text]
            ).then(
                fn=refresh_recorded_files,
                outputs=[recorded_files_dropdown]
            )
            
            download_recorded_btn.click(
                fn=prepare_recorded_file_download,
                inputs=[recorded_files_dropdown],
                outputs=[download_recorded_file]
            )
            
            delete_recorded_btn.click(
                fn=delete_selected_file,
                inputs=[recorded_files_dropdown],
                outputs=[file_operation_status, recorded_files_dropdown, recorded_audio_player]
            )
    
    # Features section cho Voice RAG
    gr.Markdown("### 📚 Tính năng chính")
    
    with gr.Row():
        with gr.Column():
            gr.HTML("""

            <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">

                <h3>📚 Voice RAG</h3>

                <p>Upload tài liệu và đặt câu hỏi. Nhận trả lời bằng giọng nói đa ngôn ngữ.</p>

                <div style="margin-top: 15px;">

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ Hỗ trợ PDF, DOCX, TXT

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ AI Gemini 2.0 Flash

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ 24 giọng nói đa quốc gia

                    </div>

                </div>

            </div>

            """)
        
        with gr.Column():
            gr.HTML("""

            <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">

                <h3>🌍 Audio Translation</h3>

                <p>Dịch thuật âm thanh sang nhiều ngôn ngữ với giọng nói tự nhiên.</p>

                <div style="margin-top: 15px;">

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ Ghi âm real-time

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ 13 ngôn ngữ chính

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ Edge TTS Neural

                    </div>

                </div>

            </div>

            """)
    
    with gr.Row():
        with gr.Column():
            gr.HTML("""

            <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">

                <h3>🎤 Voice Studio</h3>

                <p>Chuyển văn bản thành giọng nói với nhiều lựa chọn quốc gia và giọng nói.</p>

                <div style="margin-top: 15px;">

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ 13 quốc gia

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ Tích hợp dịch thuật

                    </div>

                    <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">

                        ✓ Điều chỉnh tốc độ

                    </div>

                </div>

            </div>

            """)
    
    
    # Footer
    gr.HTML("""

    <div class="custom-footer">

        <div style="display: flex; justify-content: center; align-items: center; gap: 15px; flex-wrap: wrap;">

            <div style="display: flex; align-items: center; gap: 8px;">

                <div style="background: rgba(255,255,255,0.2); padding: 8px 15px; border-radius: 20px; font-size: 16px;">

                    🧠 DB

                </div>

                <span style="font-size: 18px; font-weight: bold;">Digitized Brains</span>

            </div>

            <div style="font-size: 14px; opacity: 0.9;">

                Voice Studio - AI Powered

            </div>

        </div>

    </div>

    """)
    
    # Add JavaScript for button effects
    gr.HTML(js_code)

if __name__ == "__main__":
    import sys
    import locale
    import os
    
    # Ensure UTF-8 encoding
    if sys.platform == 'win32':
        os.environ['PYTHONIOENCODING'] = 'utf-8'
    
    # Optimize startup for HF Spaces
    print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
    
    # Only create record_data directory when actually needed to speed up startup
    if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
        create_record_data_directory()
        print(f"📁 Record data directory ready: {RECORD_DATA_DIR}")
    else:
        print(f"🏭 Production mode - record_data will be created on first use")
    
    # Set environment variables for iframe support
    os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
    # Disable Gradio temp directory to prevent file serving issues
    # os.environ['GRADIO_TEMP_DIR'] = '/tmp'
    
    # Hugging Face Spaces configuration - Use standard port 7860 for HF
    if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
        # HF Spaces standard configuration
        port = 7860
        print("🏭 Using HF Spaces standard port 7860")
    else:
        # Local development
        port = int(os.environ.get("GRADIO_SERVER_PORT", 7880))
        print(f"🖥️ Using local development port {port}")
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False,
        show_error=True,
        ssr_mode=False,  # Disable SSR to prevent timeout issues on HF Spaces
        enable_monitoring=False  # Disable monitoring for faster startup
    )