Spaces:

ducnguyen1978
/

Voice_Agent

Running

App Files Files Community

Voice_Agent / app.py

ducnguyen1978

Upload app.py

9a2046b verified 15 days ago

raw

history blame contribute delete

138 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import sys

	# Set UTF-8 encoding for Windows
	if sys.platform == 'win32':
	import codecs
	sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach())
	sys.stderr = codecs.getwriter('utf-8')(sys.stderr.detach())

	# Load environment variables from .env file (optimized for HF Spaces)
	try:
	# Only load .env in local development, skip in production
	if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
	from dotenv import load_dotenv
	load_dotenv()
	print("✅ Environment variables loaded from .env file")
	else:
	print("🏭 Production environment - using system environment variables")
	except ImportError:
	print("⚠️ python-dotenv not installed. Using system environment variables only.")
	except Exception as e:
	print(f"⚠️ Error loading .env file: {e}")

	# Essential imports for HF Spaces
	import numpy as np
	import gradio as gr

	# Try to import google-generativeai with fallback
	try:
	import google.generativeai as genai
	GENAI_AVAILABLE = True
	except ImportError as e:
	print(f"⚠️ google-generativeai not available: {e}")
	GENAI_AVAILABLE = False
	genai = None

	try:
	from gtts import gTTS, lang
	GTTS_AVAILABLE = True
	except ImportError as e:
	print(f"⚠️ gtts not available: {e}")
	GTTS_AVAILABLE = False

	import tempfile
	# import soundfile as sf # Import locally to avoid startup overhead
	# Kokoro not used - removed for performance
	import time
	import base64

	# Try to import optional dependencies
	try:
	import edge_tts
	EDGE_TTS_AVAILABLE = True
	except ImportError as e:
	print(f"⚠️ edge-tts not available: {e}")
	EDGE_TTS_AVAILABLE = False

	import asyncio
	import io

	try:
	import PyPDF2
	PDF_AVAILABLE = True
	except ImportError:
	PDF_AVAILABLE = False

	try:
	import docx
	DOCX_AVAILABLE = True
	except ImportError:
	DOCX_AVAILABLE = False

	import shutil
	import atexit
	import glob
	import datetime

	# Librosa not used - removed for performance

	# === RECORD DATA MANAGEMENT ===
	RECORD_DATA_DIR = "record_data"

	def create_record_data_directory():
	"""Create record_data directory if it doesn't exist"""
	if not os.path.exists(RECORD_DATA_DIR):
	os.makedirs(RECORD_DATA_DIR)
	print(f"✅ Created directory: {RECORD_DATA_DIR}")
	return RECORD_DATA_DIR

	def cleanup_record_data():
	"""Clean up record_data directory when app closes (disabled for production)"""
	try:
	# Disable cleanup for HF Spaces and production environments
	if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
	print(f"🏭 Production environment detected - keeping {RECORD_DATA_DIR} directory")
	return

	# Only cleanup in local development
	if os.path.exists(RECORD_DATA_DIR):
	shutil.rmtree(RECORD_DATA_DIR)
	print(f"🧹 Cleaned up {RECORD_DATA_DIR} directory")
	except Exception as e:
	print(f"⚠️ Error cleaning up {RECORD_DATA_DIR}: {e}")

	def save_recorded_audio(audio_data, original_filename=None):
	"""Save audio data to record_data directory"""
	try:
	# Create directory if needed
	create_record_data_directory()

	# Generate filename with timestamp
	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	if original_filename:
	name_part = os.path.splitext(os.path.basename(original_filename))[0]
	filename = f"recorded_{name_part}_{timestamp}.wav"
	else:
	filename = f"recorded_{timestamp}.wav"

	filepath = os.path.join(RECORD_DATA_DIR, filename)

	# Handle different audio data types
	if isinstance(audio_data, str) and os.path.exists(audio_data):
	# File path - copy the file
	shutil.copy2(audio_data, filepath)
	elif isinstance(audio_data, tuple) and len(audio_data) == 2:
	# Numpy array format (sample_rate, audio_array)
	sample_rate, audio_array = audio_data
	import soundfile as sf
	sf.write(filepath, audio_array, sample_rate)
	print(f"📊 Saved numpy audio: sr={sample_rate}, shape={audio_array.shape}")
	else:
	# Raw data
	with open(filepath, 'wb') as f:
	f.write(audio_data)

	print(f"✅ Saved recorded audio: {filepath}")
	return filepath

	except Exception as e:
	print(f"❌ Error saving recorded audio: {e}")
	import traceback
	traceback.print_exc()
	return None

	def get_recorded_files():
	"""Get list of recorded audio files"""
	try:
	if not os.path.exists(RECORD_DATA_DIR):
	print(f"📁 Record directory does not exist: {RECORD_DATA_DIR}")
	return []

	# Get all audio files in record_data
	pattern = os.path.join(RECORD_DATA_DIR, "*.wav")
	files = glob.glob(pattern)
	print(f"🔍 Found {len(files)} files in {RECORD_DATA_DIR}")

	# Sort by modification time (newest first)
	files.sort(key=os.path.getmtime, reverse=True)

	# Return just filenames for display
	filenames = [os.path.basename(f) for f in files]
	print(f"📂 Returning filenames: {filenames}")
	return filenames

	except Exception as e:
	print(f"❌ Error getting recorded files: {e}")
	return []

	def get_recorded_file_path(filename):
	"""Get full path of recorded file"""
	return os.path.join(RECORD_DATA_DIR, filename)


	def delete_recorded_file(filename):
	"""Delete recorded file from record_data directory"""
	try:
	if not filename or not filename.strip():
	return "❌ Không có file nào được chọn để xóa"

	file_path = get_recorded_file_path(filename)
	print(f"🗑️ Attempting to delete: {file_path}")

	if os.path.exists(file_path):
	os.remove(file_path)
	print(f"✅ Successfully deleted: {filename}")
	return f"✅ Đã xóa file: {filename}"
	else:
	print(f"❌ File not found: {file_path}")
	return f"❌ Không tìm thấy file: {filename}"

	except Exception as e:
	print(f"❌ Error deleting file: {e}")
	return f"❌ Lỗi khi xóa file: {str(e)}"

	# Register cleanup function to run when app exits (disabled for stability)
	# atexit.register(cleanup_record_data) # Disabled to prevent data loss on deployment

	# DOCX support already checked above

	# Configure Gemini API - Delayed configuration for faster startup
	GEMINI_API_KEY = None

	def configure_gemini_api():
	"""Configure Gemini API on first use to speed up startup"""
	global GEMINI_API_KEY
	if not GENAI_AVAILABLE:
	print("❌ google-generativeai not available")
	return None

	if GEMINI_API_KEY is None:
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	print("✅ Gemini API configured successfully")
	else:
	print("⚠️ GEMINI_API_KEY or GOOGLE_API_KEY not found in environment variables")
	return GEMINI_API_KEY

	# Language configurations for Audio Translation (simplified)
	if GTTS_AVAILABLE:
	GTTS_LANGUAGES = lang.tts_langs()
	GTTS_LANGUAGES['ja'] = 'Japanese'
	else:
	GTTS_LANGUAGES = {'en': 'English', 'vi': 'Vietnamese'}

	SUPPORTED_LANGUAGES = sorted(list(GTTS_LANGUAGES.values()))

	# Voice mapping for Edge TTS - defined once for performance
	VOICE_MAP = {
	"🇻🇳 HoaiMy - Nữ Việt Chuẩn": "vi-VN-HoaiMyNeural",
	"🇻🇳 NamMinh - Nam Việt Chuẩn": "vi-VN-NamMinhNeural",
	"🇺🇸 Aria - Nữ Mỹ": "en-US-AriaNeural",
	"🇺🇸 Guy - Nam Mỹ": "en-US-GuyNeural",
	"🇬🇧 Sonia - Nữ Anh": "en-GB-SoniaNeural",
	"🇬🇧 Ryan - Nam Anh": "en-GB-RyanNeural",
	"🇩🇪 Katja - Deutsche Frau": "de-DE-KatjaNeural",
	"🇩🇪 Conrad - Deutscher Mann": "de-DE-ConradNeural",
	"🇫🇷 Denise - Française": "fr-FR-DeniseNeural",
	"🇫🇷 Henri - Français": "fr-FR-HenriNeural",
	"🇪🇸 Elvira - Española": "es-ES-ElviraNeural",
	"🇪🇸 Alvaro - Español": "es-ES-AlvaroNeural",
	"🇮🇹 Elsa - Italiana": "it-IT-ElsaNeural",
	"🇮🇹 Diego - Italiano": "it-IT-DiegoNeural",
	"🇯🇵 Nanami - 日本女性": "ja-JP-NanamiNeural",
	"🇯🇵 Keita - 日本男性": "ja-JP-KeitaNeural",
	"🇰🇷 SunHi - 한국 여성": "ko-KR-SunHiNeural",
	"🇰🇷 BongJin - 한국 남성": "ko-KR-BongJinNeural",
	"🇨🇳 Xiaoxiao - 中文女声": "zh-CN-XiaoxiaoNeural",
	"🇨🇳 Yunxi - 中文男声": "zh-CN-YunxiNeural",
	"🇷🇺 Svetlana - Русская": "ru-RU-SvetlanaNeural",
	"🇷🇺 Dmitry - Русский": "ru-RU-DmitryNeural",
	"🇵🇹 Francisca - Portuguesa": "pt-BR-FranciscaNeural",
	"🇵🇹 Antonio - Português": "pt-BR-AntonioNeural",
	"🇸🇦 Zariyah - عربية": "ar-SA-ZariyahNeural",
	"🇸🇦 Hamed - عربي": "ar-SA-HamedNeural"
	}

	# Voice RAG Functions (Tích hợp từ hf_Voice_Audio_Translation)
	def read_pdf(file_path):
	"""Extract text from PDF file"""
	try:
	with open(file_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text
	except Exception as e:
	return f"Error reading PDF: {str(e)}"

	def read_docx(file_path):
	"""Extract text from Word document"""
	try:
	if not DOCX_AVAILABLE:
	return "❌ python-docx not available"
	doc = docx.Document(file_path)
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text
	except Exception as e:
	return f"Error reading DOCX: {str(e)}"

	def read_txt(file_path):
	"""Extract text from TXT file"""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except Exception as e:
	return f"Error reading TXT: {str(e)}"

	def extract_text_from_file(file_path):
	"""Extract text from various file formats"""
	if file_path is None:
	return "No file uploaded"

	file_extension = os.path.splitext(file_path)[1].lower()

	if file_extension == '.pdf':
	return read_pdf(file_path)
	elif file_extension == '.docx':
	return read_docx(file_path)
	elif file_extension == '.txt':
	return read_txt(file_path)
	else:
	return f"Unsupported file format: {file_extension}"

	def detect_language_from_text(text):
	"""Detect language from text content"""
	# Vietnamese detection
	vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
	if any(char in text.lower() for char in vietnamese_chars):
	return "Vietnamese"

	# Chinese detection
	chinese_chars = '中文汉字學習语言'
	if any(char in text for char in chinese_chars):
	return "Chinese"

	# Japanese detection
	japanese_chars = 'ひらがなカタカナ日本語'
	if any(char in text for char in japanese_chars):
	return "Japanese"

	# Korean detection
	korean_chars = '한국어문자'
	if any(char in text for char in korean_chars):
	return "Korean"

	# French detection
	french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que']
	french_chars = 'àâäéèêëïîôöùûüÿç'
	if any(word in text.lower() for word in french_words) or any(char in text.lower() for char in french_chars):
	return "French"

	# German detection
	german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden']
	german_chars = 'äöüß'
	if any(word in text.lower() for word in german_words) or any(char in text.lower() for char in german_chars):
	return "German"

	# Spanish detection
	spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo']
	spanish_chars = 'ñáéíóúü'
	if any(word in text.lower() for word in spanish_words) or any(char in text.lower() for char in spanish_chars):
	return "Spanish"

	# English detection (default)
	english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could']
	if any(word in text.lower() for word in english_words):
	return "English"

	return "English" # Default fallback

	def process_with_gemini(text, question, answer_language="Vietnamese"):
	"""Process text and question using Gemini with multi-language support"""
	try:
	api_key = configure_gemini_api()
	if not api_key:
	return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"

	model = genai.GenerativeModel("gemini-2.0-flash")

	# Detect document language
	detected_doc_language = detect_language_from_text(text)

	prompt = f"""
	Based on the following document content, please answer the question in {answer_language}:

	Document Content (detected language: {detected_doc_language}):
	{text}

	Question: {question}

	Please provide a comprehensive and accurate answer in {answer_language}.
	If the document is in a different language than the question, please still answer in {answer_language}.
	Maintain the factual accuracy while adapting cultural context appropriately.
	"""

	response = model.generate_content(prompt)
	return response.text

	except Exception as e:
	return f"Error processing with Gemini: {str(e)}"

	def text_to_speech_rag(text, voice_selection):
	"""Convert text to speech using Edge TTS for RAG results"""
	try:
	if not text or text.startswith("Error"):
	return None

	# Use global VOICE_MAP for performance
	voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
	text_limited = text[:2000] if len(text) > 2000 else text

	# Generate speech using Edge TTS
	audio_data = asyncio.run(generate_speech(text_limited, voice_name, 0.0))

	# Save to temporary file
	fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="voice_rag_audio_")
	os.close(fd)

	# Write raw audio data to temporary file
	with open(temp_output_path, 'wb') as f:
	f.write(audio_data)

	return temp_output_path

	except Exception as e:
	print(f"TTS Error: {str(e)}")
	return None

	def voice_rag_pipeline(uploaded_file, question, answer_language="Vietnamese", voice_selection="🇻🇳 HoaiMy - Nữ Việt Chuẩn", text_format="txt"):
	"""Complete Voice RAG pipeline with multi-language support and downloadable text"""
	if uploaded_file is None:
	return "Please upload a document first.", "N/A", None, None

	if not question.strip():
	return "Please enter a question.", "N/A", None, None

	# Extract text from uploaded file
	extracted_text = extract_text_from_file(uploaded_file)

	if extracted_text.startswith("Error"):
	return extracted_text, "Error", None, None

	# Detect document language
	detected_doc_language = detect_language_from_text(extracted_text)

	# Process with Gemini using selected answer language
	answer = process_with_gemini(extracted_text, question, answer_language)

	# Generate speech using selected voice
	audio_file = text_to_speech_rag(answer, voice_selection)

	# Create formatted content for download
	if text_format.lower() == "md":
	# Create beautiful Markdown format
	formatted_content = format_voice_rag_response(
	question, answer, detected_doc_language, voice_selection
	)
	text_file_path = create_text_file(formatted_content, "md", "voice_rag_response")
	else:
	# Create standard text file
	text_file_path = create_text_file(answer, text_format, "voice_rag_answer")

	return answer, detected_doc_language, audio_file, text_file_path

	def detect_language(text):
	"""Detect language of input text with improved accuracy"""
	if not text.strip():
	return "unknown"

	text_lower = text.lower()

	# Vietnamese detection (more comprehensive)
	vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ'
	vietnamese_words = ['và', 'của', 'là', 'có', 'này', 'được', 'cho', 'từ', 'một', 'những', 'tôi', 'bạn']
	vietnamese_score = sum(1 for char in text if char in vietnamese_chars) + sum(2 for word in vietnamese_words if word in text_lower)

	# English detection (more comprehensive)
	english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could', 'that', 'this', 'with', 'for', 'you', 'he', 'she', 'it', 'they', 'we']
	english_score = sum(1 for word in english_words if word in text_lower)

	# German detection
	german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden', 'mit', 'auf', 'für', 'von']
	german_chars = 'äöüß'
	german_score = sum(1 for word in german_words if word in text_lower) + sum(1 for char in text if char in german_chars)

	# French detection
	french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que', 'avec', 'pour', 'dans']
	french_chars = 'àâäéèêëïîôöùûüÿç'
	french_score = sum(1 for word in french_words if word in text_lower) + sum(0.5 for char in text if char in french_chars)

	# Spanish detection
	spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'con', 'para']
	spanish_chars = 'ñáéíóúü'
	spanish_score = sum(1 for word in spanish_words if word in text_lower) + sum(0.5 for char in text if char in spanish_chars)

	# Score-based detection
	scores = {
	'Vietnamese': vietnamese_score,
	'English': english_score,
	'German': german_score,
	'French': french_score,
	'Spanish': spanish_score
	}

	# Find the language with highest score
	max_score = max(scores.values())
	if max_score > 0:
	detected = max(scores, key=scores.get)
	print(f"🔍 Language detection scores: {scores}")
	print(f"🎯 Detected language: {detected} (score: {max_score})")
	return detected

	# Default fallback
	print(f"⚠️ Could not detect language, defaulting to English")
	return "English"

	async def generate_speech(text, voice_name, rate):
	"""Generate speech using Edge TTS"""
	communicate = edge_tts.Communicate(text, voice_name, rate=f"{rate:+.0%}")

	# Create in-memory buffer
	audio_buffer = io.BytesIO()

	async for chunk in communicate.stream():
	if chunk["type"] == "audio":
	audio_buffer.write(chunk["data"])

	audio_buffer.seek(0)
	return audio_buffer.getvalue()

	def create_text_file(content, file_format="txt", filename_prefix="translated_text"):
	"""
	Create a downloadable text file from content in TXT, DOCX, or MD format
	"""
	if not content or content.startswith("Lỗi:") or content.startswith("❌"):
	return None

	try:
	if file_format.lower() == "docx" and DOCX_AVAILABLE:
	# Create Word document
	fd, temp_file_path = tempfile.mkstemp(suffix=".docx", prefix=f"{filename_prefix}_")
	os.close(fd)

	if not DOCX_AVAILABLE:
	return None
	from docx import Document
	doc = Document()
	doc.add_heading('Nội dung đã dịch', 0)
	doc.add_paragraph(content)
	doc.save(temp_file_path)

	return temp_file_path
	elif file_format.lower() == "md":
	# Create Markdown file
	fd, temp_file_path = tempfile.mkstemp(suffix=".md", prefix=f"{filename_prefix}_")
	os.close(fd)

	with open(temp_file_path, 'w', encoding='utf-8') as f:
	f.write(content)

	return temp_file_path
	else:
	# Create TXT file (default)
	fd, temp_file_path = tempfile.mkstemp(suffix=".txt", prefix=f"{filename_prefix}_")
	os.close(fd)

	with open(temp_file_path, 'w', encoding='utf-8') as f:
	f.write(content)

	return temp_file_path
	except Exception as e:
	return None

	def format_voice_rag_response(question, answer, detected_language, voice_selection, timestamp=None):
	"""
	Format Voice RAG response as beautiful Markdown
	"""
	if timestamp is None:
	timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

	# Clean and format the answer
	formatted_answer = answer.strip()

	# Create beautiful Markdown document
	markdown_content = f"""# 📚 Voice RAG - Intelligent Document Q&A

	---

	## 📄 Session Information

	\| Field \| Details \|
	\|-----------\|-------------\|
	\| 🕒 Timestamp \| {timestamp} \|
	\| 🌍 Document Language \| {detected_language} \|
	\| 🎭 Voice Selection \| {voice_selection} \|
	\| 🤖 AI Model \| Google Gemini 2.0 Flash \|

	---

	## ❓ Question

	> {question}

	---

	## 💬 AI Response

	{formatted_answer}

	---


	---

	## 📱 Generated by

	🎙️ Voice AI Platform - Digitized Brains
	Powered by Claude Code & Google Gemini 2.0 Flash

	> 🌐 Voice RAG Technology - Combining document intelligence with premium voice synthesis

	---

	Generated on {timestamp} \| Voice: {voice_selection} \| Language: {detected_language}
	"""

	return markdown_content

	def format_voice_studio_response(text, voice_selection, speed, detected_language="Auto-detected", timestamp=None):
	"""
	Format Voice Studio response as simple Markdown
	"""
	if timestamp is None:
	timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

	# Clean and format the text
	formatted_text = text.strip()

	# Create simple Markdown document
	markdown_content = f"""# Voice Studio Result

	## Input Text ({detected_language})

	{formatted_text}

	---

	Generated on {timestamp} \| Voice: {voice_selection} \| Speed: {speed:.1f}x
	"""

	return markdown_content

	def format_audio_translation_response(original_text, translated_text, source_language, target_language, voice_selection, timestamp=None):
	"""
	Format Audio Translation response as simple Markdown
	"""
	if timestamp is None:
	timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

	# Clean and format the texts
	formatted_original = original_text.strip()
	formatted_translated = translated_text.strip()

	# Create simple Markdown document
	markdown_content = f"""# Audio Translation Result

	## Original Text ({source_language})

	{formatted_original}

	## Translated Text ({target_language})

	{formatted_translated}

	---

	Generated on {timestamp} \| {source_language} → {target_language} \| Voice: {voice_selection}
	"""

	return markdown_content

	def create_audio_voice_studio(text, voice_selection, speed, text_format="txt"):
	"""Voice Studio functionality with text file generation"""
	if not text.strip():
	return "❌ Vui lòng nhập văn bản / Please enter text / Bitte Text eingeben", None

	try:
	# Use global VOICE_MAP for performance (avoiding recreation on each call)
	voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
	text_limited = text[:1000] if len(text) > 1000 else text

	# Convert speed (0.5-2.0) to rate percentage (-50% to +100%)
	rate_percent = (speed - 1.0)

	# Generate speech using Edge TTS
	audio_data = asyncio.run(generate_speech(text_limited, voice_name, rate_percent))

	# Convert to base64
	audio_base64 = base64.b64encode(audio_data).decode('utf-8')

	timestamp = int(time.time())
	filename = f"voice_{voice_name}_{speed}x_{timestamp}.mp3"

	# Detect language
	detected_lang = detect_language(text_limited)

	# Mobile-optimized HTML player
	html_player = f'''
	<div style="
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 20px;
	padding: 20px;
	margin: 10px 0;
	box-shadow: 0 8px 32px rgba(0,0,0,0.2);
	color: white;
	text-align: center;
	">
	<div style="margin-bottom: 20px;">
	<h3 style="color: #fff; margin: 0 0 15px 0; font-size: 1.3em; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
	🎵 Âm thanh hoàn thành!
	</h3>
	<div style="
	background: rgba(255,255,255,0.2);
	border-radius: 12px;
	padding: 12px;
	font-size: 0.9em;
	line-height: 1.5;
	backdrop-filter: blur(10px);
	">
	<div><strong>🎭 Giọng:</strong> {voice_selection}</div>
	<div><strong>⚡ Tốc độ:</strong> {speed:.1f}x \| <strong>🌍 Ngôn ngữ:</strong> {detected_lang.title()}</div>
	<div><strong>📝 Độ dài:</strong> {len(text_limited)} ký tự</div>
	</div>
	</div>

	<audio controls style="
	width: 100%;
	max-width: 100%;
	height: 50px;
	margin: 20px 0;
	border-radius: 25px;
	background: rgba(255,255,255,0.95);
	box-shadow: 0 4px 15px rgba(0,0,0,0.2);
	">
	<source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg">
	Trình duyệt không hỗ trợ audio.
	</audio>

	<div style="
	display: flex;
	justify-content: center;
	margin-top: 20px;
	">
	<a href="data:audio/mpeg;base64,{audio_base64}" download="{filename}"
	style="
	background: linear-gradient(45deg, #28a745, #20c997);
	color: white;
	padding: 15px 30px;
	text-decoration: none;
	border-radius: 25px;
	font-weight: 700;
	font-size: 1.1em;
	display: flex;
	align-items: center;
	justify-content: center;
	box-shadow: 0 4px 15px rgba(40,167,69,0.3);
	transition: all 0.3s ease;
	min-height: 48px;
	min-width: 200px;
	"
	ontouchstart=""
	onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 20px rgba(40,167,69,0.4)'"
	onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 15px rgba(40,167,69,0.3)'">
	📥 TẢI XUỐNG MP3
	</a>
	</div>
	</div>
	'''

	# Create text file based on format
	text_file_path = None
	if text_format == "md":
	# Use Markdown formatting function
	detected_language = detect_language(text_limited)
	markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
	text_file_path = create_text_file(markdown_content, "md", "voice_studio")
	elif text_format == "docx":
	# Create Word document with Voice Studio formatting
	detected_language = detect_language(text_limited)
	markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language)
	text_file_path = create_text_file(markdown_content, "docx", "voice_studio")
	elif text_format == "txt":
	# Create simple text file
	text_file_path = create_text_file(text_limited, "txt", "voice_studio")

	return html_player, text_file_path

	except Exception as e:
	return f"❌ Error: {str(e)}", None

	# Language mapping for voices - defined once for performance
	VOICE_TO_LANGUAGE = {
	# Vietnamese
	"🇻🇳 HoaiMy - Nữ Việt Chuẩn": "Vietnamese",
	"🇻🇳 NamMinh - Nam Việt Chuẩn": "Vietnamese",
	# English
	"🇺🇸 Aria - Nữ Mỹ": "English",
	"🇺🇸 Guy - Nam Mỹ": "English",
	"🇬🇧 Sonia - Nữ Anh": "English",
	"🇬🇧 Ryan - Nam Anh": "English",
	# German
	"🇩🇪 Katja - Deutsche Frau": "German",
	"🇩🇪 Conrad - Deutscher Mann": "German",
	# French
	"🇫🇷 Denise - Française": "French",
	"🇫🇷 Henri - Français": "French",
	# Spanish
	"🇪🇸 Elvira - Española": "Spanish",
	"🇪🇸 Alvaro - Español": "Spanish",
	# Italian
	"🇮🇹 Elsa - Italiana": "Italian",
	"🇮🇹 Diego - Italiano": "Italian",
	# Japanese
	"🇯🇵 Nanami - 日本女性": "Japanese",
	"🇯🇵 Keita - 日本男性": "Japanese",
	# Korean
	"🇰🇷 SunHi - 한국 여성": "Korean",
	"🇰🇷 BongJin - 한국 남성": "Korean",
	# Chinese
	"🇨🇳 Xiaoxiao - 中文女声": "Chinese",
	"🇨🇳 Yunxi - 中文男声": "Chinese",
	# Russian
	"🇷🇺 Svetlana - Русская": "Russian",
	"🇷🇺 Dmitry - Русский": "Russian",
	# Portuguese
	"🇵🇹 Francisca - Portuguesa": "Portuguese",
	"🇵🇹 Antonio - Português": "Portuguese",
	# Arabic
	"🇸🇦 Zariyah - عربية": "Arabic",
	"🇸🇦 Hamed - عربي": "Arabic"
	}

	def get_target_language_from_voice(voice_selection):
	"""Map voice selection to target language for translation"""
	return VOICE_TO_LANGUAGE.get(voice_selection, "Vietnamese")

	def translate_text_with_gemini(text, target_language):
	"""Translate text using Gemini API"""
	try:
	api_key = configure_gemini_api()
	if not api_key:
	return f"❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables"

	if not text.strip():
	return ""

	model = genai.GenerativeModel("gemini-2.0-flash")

	prompt = f"""Translate the following text to {target_language}. Return ONLY the translated text, nothing else:

	{text}"""

	response = model.generate_content(prompt)
	translated_text = response.text.strip()

	# Clean up any unwanted text that might be included
	if translated_text.lower().startswith("translation:"):
	translated_text = translated_text[12:].strip()
	if translated_text.lower().startswith("here is"):
	lines = translated_text.split('\n')
	if len(lines) > 1:
	translated_text = '\n'.join(lines[1:]).strip()

	return translated_text

	except Exception as e:
	return f"Lỗi dịch thuật: {str(e)}"

	def translate_audio(audio_file, target_country, voice_selection, text_format="txt"):
	"""
	Transcribe, translate and synthesize audio to target language with Voice Studio integration
	"""
	try:
	api_key = configure_gemini_api()
	if not api_key:
	return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables", "Không xác định", "", target_country, None, None, "", "", None

	if audio_file is None:
	return "Lỗi: Vui lòng tải lên file audio", "Không xác định", "", target_country, None, None, "", "", None

	# Save recorded audio to record_data directory
	print(f"🔍 Processing audio file type: {type(audio_file)}")
	saved_audio_path = save_recorded_audio(audio_file)
	if saved_audio_path:
	print(f"🎤 Audio saved to record_data: {os.path.basename(saved_audio_path)}")
	# Debug: check if file really exists
	if os.path.exists(saved_audio_path):
	file_size = os.path.getsize(saved_audio_path)
	print(f"✅ File confirmed: {saved_audio_path} ({file_size} bytes)")
	else:
	print(f"❌ File not found after save: {saved_audio_path}")
	return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None
	else:
	print("❌ Failed to save audio file")
	return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None

	# Get target language from voice selection
	target_language = get_target_language_from_voice(voice_selection)

	# Transcribe audio using Gemini
	model = genai.GenerativeModel("gemini-2.0-flash")

	# Read audio file using saved path
	with open(saved_audio_path, 'rb') as f:
	audio_data = f.read()

	# Create audio blob
	audio_blob = {
	'mime_type': 'audio/wav',
	'data': audio_data
	}

	# Step 1: Transcribe audio only first
	transcribe_prompt = """Transcribe this audio accurately in its original language. Return only the transcribed text, nothing else."""

	response = model.generate_content([transcribe_prompt, audio_blob])
	transcription = response.text.strip()

	# Step 2: Detect language of transcription
	detected_lang = detect_language(transcription)

	# Step 3: Translate if needed (only if source is different from target)
	if detected_lang.lower() != target_language.lower():
	print(f"🔄 Translating from {detected_lang} to {target_language}")
	translated_text = translate_text_with_gemini(transcription, target_language)

	# Check if translation was successful
	if translated_text.startswith("❌") or translated_text.startswith("Lỗi"):
	print(f"❌ Translation failed: {translated_text}")
	# Use original transcription if translation fails
	translated_text = transcription
	else:
	print(f"✅ Translation successful")
	else:
	print(f"ℹ️ No translation needed - same language ({detected_lang})")
	translated_text = transcription

	# Generate audio using Edge TTS (use global VOICE_MAP for performance)
	edge_voice = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural")
	print(f"🎙️ Generating audio with voice: {edge_voice}")
	audio_data = asyncio.run(generate_speech(translated_text, edge_voice, 0.0))
	print(f"🎵 Generated audio data: {len(audio_data)} bytes")

	# Save audio file
	fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="translated_audio_")
	os.close(fd)
	print(f"📁 Created temp audio file: {temp_output_path}")

	# Write raw audio data to temporary file
	with open(temp_output_path, 'wb') as f:
	f.write(audio_data)

	# Verify file was created
	if os.path.exists(temp_output_path):
	file_size = os.path.getsize(temp_output_path)
	print(f"✅ Audio file created successfully: {file_size} bytes")
	else:
	print(f"❌ Failed to create audio file: {temp_output_path}")

	# Create text file for download with proper formatting
	text_file_path = None
	if text_format == "md":
	# Use Markdown formatting function for Audio Translation
	markdown_content = format_audio_translation_response(
	transcription, translated_text, detected_lang, target_language, voice_selection
	)
	text_file_path = create_text_file(markdown_content, "md", "audio_translation")
	elif text_format == "docx":
	# Create Word document with Audio Translation formatting
	markdown_content = format_audio_translation_response(
	transcription, translated_text, detected_lang, target_language, voice_selection
	)
	text_file_path = create_text_file(markdown_content, "docx", "audio_translation")
	else:
	# Create simple text file
	text_file_path = create_text_file(translated_text, "txt", "audio_translation")

	return transcription, detected_lang, translated_text, target_language, temp_output_path, temp_output_path, transcription, translated_text, text_file_path

	except Exception as e:
	# Get target language for error response
	target_language = get_target_language_from_voice(voice_selection) if 'voice_selection' in locals() else "Vietnamese"
	return f"Lỗi: {str(e)}", "Lỗi", "", target_language, None, None, "", "", None

	# Voice choices organized by country - ONLY OFFICIAL VOICES
	voice_choices_by_country = {
	"🇻🇳 Việt Nam": [
	"🇻🇳 HoaiMy - Nữ Việt Chuẩn",
	"🇻🇳 NamMinh - Nam Việt Chuẩn"
	],
	"🇺🇸 Hoa Kỳ": [
	"🇺🇸 Aria - Nữ Mỹ",
	"🇺🇸 Guy - Nam Mỹ"
	],
	"🇬🇧 Anh": [
	"🇬🇧 Sonia - Nữ Anh",
	"🇬🇧 Ryan - Nam Anh"
	],
	"🇩🇪 Đức": [
	"🇩🇪 Katja - Deutsche Frau",
	"🇩🇪 Conrad - Deutscher Mann"
	],
	"🇫🇷 Pháp": [
	"🇫🇷 Denise - Française",
	"🇫🇷 Henri - Français"
	],
	"🇪🇸 Tây Ban Nha": [
	"🇪🇸 Elvira - Española",
	"🇪🇸 Alvaro - Español"
	],
	"🇮🇹 Ý": [
	"🇮🇹 Elsa - Italiana",
	"🇮🇹 Diego - Italiano"
	],
	"🇯🇵 Nhật Bản": [
	"🇯🇵 Nanami - 日本女性",
	"🇯🇵 Keita - 日本男性"
	],
	"🇰🇷 Hàn Quốc": [
	"🇰🇷 SunHi - 한국 여성",
	"🇰🇷 BongJin - 한국 남성"
	],
	"🇨🇳 Trung Quốc": [
	"🇨🇳 Xiaoxiao - 中文女声",
	"🇨🇳 Yunxi - 中文男声"
	],
	"🇷🇺 Nga": [
	"🇷🇺 Svetlana - Русская",
	"🇷🇺 Dmitry - Русский"
	],
	"🇵🇹 Bồ Đào Nha": [
	"🇵🇹 Francisca - Portuguesa",
	"🇵🇹 Antonio - Português"
	],
	"🇸🇦 Ả Rập": [
	"🇸🇦 Zariyah - عربية",
	"🇸🇦 Hamed - عربي"
	]
	}

	def update_voices(country):
	"""Update voice choices based on selected country"""
	if country in voice_choices_by_country:
	voices = voice_choices_by_country[country]
	return gr.Dropdown(choices=voices, value=voices[0])
	else:
	# Default to Vietnamese voices
	default_voices = voice_choices_by_country["🇻🇳 Việt Nam"]
	return gr.Dropdown(choices=default_voices, value=default_voices[0])

	# Lightweight CSS - optimized for performance
	css = """
	* {
	font-family: system-ui, -apple-system, 'Segoe UI', Arial, sans-serif;
	}

	.gradio-container {
	max-width: 1200px;
	margin: 0 auto;
	position: relative;
	}

	/* Critical fix for dropdown interaction */
	.gradio-container * {
	pointer-events: auto;
	}

	/* Hide Gradio footer */
	.footer {
	display: none !important;
	}

	/* Pulsing animation for processing status */
	@keyframes pulse-processing {
	0% {
	opacity: 1;
	transform: scale(1);
	box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);
	}
	50% {
	opacity: 0.8;
	transform: scale(1.02);
	box-shadow: 0 6px 25px rgba(255, 193, 7, 0.6);
	}
	100% {
	opacity: 1;
	transform: scale(1);
	box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3);
	}
	}

	.status-processing {
	animation: pulse-processing 1.5s ease-in-out infinite;
	background: linear-gradient(135deg, #FFC107 0%, #FF9800 100%) !important;
	}

	/* Success status animation */
	@keyframes pulse-success {
	0% {
	opacity: 1;
	transform: scale(1);
	}
	50% {
	opacity: 0.9;
	transform: scale(1.01);
	}
	100% {
	opacity: 1;
	transform: scale(1);
	}
	}

	.status-success {
	animation: pulse-success 2s ease-in-out 3;
	background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
	}

	/* Custom footer to cover Gradio attribution */
	.custom-footer {
	position: fixed;
	bottom: 0;
	left: 0;
	right: 0;
	background: linear-gradient(135deg, #4A90E2 0%, #2E86AB 70%, #FF8A65 85%, #FF6B9D 100%);
	color: white;
	padding: 15px;
	text-align: center;
	font-weight: bold;
	z-index: 1000;
	box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
	}

	/* Add padding to body to account for fixed footer */
	body {
	padding-bottom: 60px;
	}

	/* Mobile-first responsive design */
	.input-card {
	background: rgba(255,255,255,0.95);
	border-radius: 16px;
	padding: 16px;
	margin: 10px 0;
	box-shadow: 0 4px 20px rgba(0,0,0,0.1);
	backdrop-filter: blur(10px);
	}

	.output-area {
	background: rgba(255,255,255,0.95);
	border-radius: 16px;
	padding: 16px;
	margin: 15px 0;
	min-height: 200px;
	box-shadow: 0 4px 20px rgba(0,0,0,0.1);
	}

	.examples-section {
	background: rgba(255,255,255,0.9);
	border-radius: 16px;
	padding: 16px;
	margin: 20px 0;
	}

	.main-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 10px;
	margin-bottom: 20px;
	text-align: center;
	}

	.feature-box {
	background: #f8f9fa;
	padding: 15px;
	border-radius: 8px;
	margin: 10px 0;
	border-left: 4px solid #667eea;
	}

	.status-indicator {
	display: inline-block;
	padding: 5px 10px;
	border-radius: 15px;
	font-size: 12px;
	font-weight: bold;
	margin: 5px;
	}

	.status-success {
	background-color: #d4edda;
	color: #155724;
	}

	.status-processing {
	background-color: #fff3cd;
	color: #856404;
	}

	.comparison-section {
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	padding: 15px;
	margin: 10px 0;
	background: #fafafa;
	}

	.language-label {
	font-weight: bold;
	color: #667eea;
	padding: 5px 10px;
	background: #f0f2ff;
	border-radius: 15px;
	display: inline-block;
	margin-bottom: 10px;
	font-size: 14px;
	}

	.content-compare {
	background: white;
	border: 1px solid #ddd;
	border-radius: 6px;
	padding: 12px;
	min-height: 120px;
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	line-height: 1.5;
	}

	/* Reset any problematic dropdown styles */
	.gradio-container * {
	pointer-events: auto;
	}

	/* Remove any potential blocking overlays */
	.gradio-container::before,
	.gradio-container::after {
	display: none;
	}

	/* Ensure all interactive elements work */
	button, select, input, textarea, .gr-dropdown {
	pointer-events: auto !important;
	position: relative !important;
	}

	/* Simple dropdown fix without complex selectors */
	[class*="dropdown"] {
	position: relative !important;
	z-index: 999 !important;
	}

	[class="dropdown"] {
	pointer-events: auto !important;
	}

	/* Make sure no overlay blocks clicks */
	.gradio-container .gr-form {
	position: relative;
	z-index: 1;
	}

	.gradio-container .gr-block {
	position: relative;
	z-index: 1;
	}

	.mobile-button {
	width: 100% !important;
	padding: 15px !important;
	font-size: 1.1em !important;
	margin: 20px 0 !important;
	border-radius: 12px !important;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: bold !important;
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important;
	transition: all 0.3s ease !important;
	cursor: pointer !important;
	position: relative !important;
	overflow: hidden !important;
	}

	.mobile-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important;
	background: linear-gradient(135deg, #5a6fd8 0%, #6b4190 100%) !important;
	}

	.mobile-button:active {
	transform: translateY(0px) !important;
	box-shadow: 0 2px 10px rgba(102, 126, 234, 0.3) !important;
	}

	/* Ripple effect for button */
	.mobile-button::before {
	content: '';
	position: absolute;
	top: 50%;
	left: 50%;
	width: 0;
	height: 0;
	border-radius: 50%;
	background: rgba(255, 255, 255, 0.3);
	transform: translate(-50%, -50%);
	transition: width 0.6s, height 0.6s;
	}

	.mobile-button:active::before {
	width: 300px;
	height: 300px;
	}

	/* Loading spinner animation */
	@keyframes spin {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}

	.loading-spinner {
	display: inline-block;
	width: 20px;
	height: 20px;
	border: 3px solid rgba(255,255,255,0.3);
	border-radius: 50%;
	border-top-color: white;
	animation: spin 1s ease-in-out infinite;
	margin-right: 10px;
	}

	/* Button pulse effect when processing */
	@keyframes pulse {
	0% {
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
	}
	50% {
	box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6);
	}
	100% {
	box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
	}
	}

	.button-processing {
	animation: pulse 2s ease-in-out infinite;
	background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%) !important;
	}

	.mobile-textbox textarea {
	border-radius: 10px !important;
	border: 2px solid #e0e0e0 !important;
	padding: 12px !important;
	font-size: 1em !important;
	line-height: 1.5 !important;
	}

	.mobile-compare textarea {
	border-radius: 8px !important;
	border: 1px solid #ddd !important;
	padding: 10px !important;
	background: #fafafa !important;
	font-size: 0.95em !important;
	}

	.mobile-audio {
	margin: 10px 0 !important;
	border-radius: 10px !important;
	}

	.mobile-file {
	margin: 10px 0 !important;
	border-radius: 10px !important;
	}

	/* Beautiful Markdown styling for Voice RAG responses */
	.markdown-response {
	background: linear-gradient(135deg, #ffffff 0%, #f8fffe 100%);
	border-radius: 12px;
	padding: 20px;
	margin: 15px 0;
	box-shadow: 0 4px 20px rgba(0,0,0,0.1);
	border-left: 4px solid #4CAF50;
	}

	.markdown-response h1 {
	color: #2e7d32;
	border-bottom: 2px solid #4CAF50;
	padding-bottom: 10px;
	margin-bottom: 20px;
	font-size: 1.8em;
	}

	.markdown-response h2 {
	color: #388E3C;
	margin-top: 25px;
	margin-bottom: 15px;
	font-size: 1.4em;
	border-left: 3px solid #4CAF50;
	padding-left: 15px;
	}

	.markdown-response h3 {
	color: #43A047;
	margin-top: 20px;
	margin-bottom: 12px;
	font-size: 1.2em;
	}

	.markdown-response p {
	line-height: 1.6;
	margin-bottom: 12px;
	color: #333;
	}

	.markdown-response blockquote {
	background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
	border-left: 4px solid #4CAF50;
	padding: 15px 20px;
	margin: 15px 0;
	border-radius: 8px;
	font-style: italic;
	color: #2e7d32;
	}

	.markdown-response table {
	width: 100%;
	border-collapse: collapse;
	margin: 15px 0;
	box-shadow: 0 2px 10px rgba(0,0,0,0.1);
	border-radius: 8px;
	overflow: hidden;
	}

	.markdown-response table th {
	background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
	color: white;
	padding: 12px 15px;
	text-align: left;
	font-weight: bold;
	}

	.markdown-response table td {
	padding: 12px 15px;
	border-bottom: 1px solid #e0e0e0;
	background: white;
	}

	.markdown-response table tr:nth-child(even) td {
	background: #f8fffe;
	}

	.markdown-response table tr:hover td {
	background: #e8f5e8;
	transition: background 0.3s ease;
	}

	.markdown-response ul, .markdown-response ol {
	margin: 15px 0;
	padding-left: 25px;
	}

	.markdown-response li {
	margin-bottom: 8px;
	line-height: 1.5;
	}

	.markdown-response code {
	background: #f5f5f5;
	border: 1px solid #e0e0e0;
	border-radius: 4px;
	padding: 2px 6px;
	font-family: 'Courier New', monospace;
	color: #d32f2f;
	}

	.markdown-response pre {
	background: #f5f5f5;
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	padding: 15px;
	overflow-x: auto;
	margin: 15px 0;
	}

	.markdown-response pre code {
	background: none;
	border: none;
	padding: 0;
	color: #333;
	}

	.markdown-response hr {
	border: none;
	height: 2px;
	background: linear-gradient(90deg, transparent, #4CAF50, transparent);
	margin: 25px 0;
	}

	.markdown-response strong {
	color: #2e7d32;
	font-weight: bold;
	}

	.markdown-response em {
	color: #388E3C;
	font-style: italic;
	}

	/* Responsive design for markdown */
	@media (max-width: 768px) {
	.markdown-response {
	padding: 15px;
	margin: 10px 0;
	}

	.markdown-response table {
	font-size: 0.9em;
	}

	.markdown-response h1 {
	font-size: 1.6em;
	}

	.markdown-response h2 {
	font-size: 1.3em;
	}
	}

	/* Mobile responsive breakpoints */
	@media (max-width: 768px) {
	.gradio-container {
	padding: 10px !important;
	}

	.input-card {
	padding: 12px !important;
	margin: 8px 0 !important;
	}

	.output-area {
	padding: 12px !important;
	margin: 10px 0 !important;
	}

	.examples-section {
	padding: 12px !important;
	}

	.main-header h2 {
	font-size: 1.5em !important;
	}

	.main-header p {
	font-size: 1em !important;
	}

	/* Mobile layout adjustments - less aggressive */
	.gr-row {
	flex-direction: column;
	}

	.gr-column {
	width: 100%;
	margin-bottom: 15px;
	}
	}

	@media (max-width: 480px) {
	.gradio-container {
	padding: 5px !important;
	}

	.input-card {
	padding: 10px !important;
	margin: 5px 0 !important;
	}

	.main-header {
	padding: 15px !important;
	}

	.main-header h2 {
	font-size: 1.3em !important;
	}

	.mobile-button {
	padding: 12px !important;
	font-size: 1em !important;
	}
	}

	/* JavaScript for button interactions */
	"""

	# Add JavaScript for button effects
	js_code = """
	<script>
	function addButtonEffects() {
	// Find button by class since Gradio might change IDs
	const buttons = document.querySelectorAll('.mobile-button');

	buttons.forEach(button => {
	// Remove existing listeners to avoid duplicates
	button.removeEventListener('click', handleClick);

	// Add enhanced click effect
	button.addEventListener('click', handleClick);

	// Add hover effects for better interaction
	button.addEventListener('mouseenter', function() {
	if (!this.disabled) {
	this.style.transform = 'translateY(-2px) scale(1.02)';
	}
	});

	button.addEventListener('mouseleave', function() {
	if (!this.disabled) {
	this.style.transform = 'translateY(0) scale(1)';
	}
	});
	});
	}

	function handleClick(e) {
	const button = e.target;

	// Immediate visual feedback
	button.style.transform = 'scale(0.98)';
	button.style.transition = 'all 0.1s ease';

	setTimeout(() => {
	button.style.transform = 'scale(1)';
	button.style.transition = 'all 0.3s ease';
	}, 100);

	// Add processing state
	const originalText = button.innerHTML;
	button.innerHTML = '<span class="loading-spinner"></span>⏳ ĐANG XỬ LÝ...';
	button.classList.add('button-processing');
	button.disabled = true;

	// Monitor for completion and reset
	let checkCount = 0;
	const checkInterval = setInterval(() => {
	checkCount++;

	// Reset after 15 seconds max or if status changes
	const statusElements = document.querySelectorAll('[style*="Hoàn thành"]');
	if (statusElements.length > 0 \|\| checkCount > 50) {
	clearInterval(checkInterval);
	button.innerHTML = originalText;
	button.classList.remove('button-processing');
	button.disabled = false;
	button.style.transform = 'scale(1)';
	}
	}, 300);
	}

	// Initialize when DOM is ready
	if (document.readyState === 'loading') {
	document.addEventListener('DOMContentLoaded', addButtonEffects);
	} else {
	addButtonEffects();
	}

	// Re-initialize periodically for Gradio updates
	setInterval(addButtonEffects, 2000);
	</script>
	"""

	# Create interface with tabs
	with gr.Blocks(css=css, title="🎙️ Voice AI Platform - Voice RAG & Audio Translation") as demo:
	# Simplified header for faster loading on HF Spaces
	if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
	# Only load complex microphone permissions in local development
	gr.HTML("""
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<meta http-equiv="Permissions-Policy" content="microphone=, camera=, display-capture=, autoplay=">
	<meta http-equiv="Feature-Policy" content="microphone 'self' ; camera 'self' ; autoplay 'self' *">
	<meta name="theme-color" content="#4A90E2">

	<script>
	// Global microphone management
	window.microphoneStatus = {
	granted: false,
	requested: false,
	supported: false
	};

	// Enhanced microphone permission request for iframe and main window
	function initializeMicrophoneSupport() {
	console.log('🎤 Initializing microphone support...');

	// Check browser support
	if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
	window.microphoneStatus.supported = true;
	console.log('✅ Browser supports microphone');

	// Check current permission status
	if (navigator.permissions) {
	navigator.permissions.query({name: 'microphone'}).then(function(result) {
	console.log('🔐 Current microphone permission:', result.state);
	window.microphoneStatus.granted = (result.state === 'granted');

	// Update UI based on permission status
	updateMicrophoneUI(result.state);

	// Listen for permission changes
	result.onchange = function() {
	console.log('🔄 Microphone permission changed to:', this.state);
	window.microphoneStatus.granted = (this.state === 'granted');
	updateMicrophoneUI(this.state);
	};
	}).catch(function(err) {
	console.log('⚠️ Permission query failed:', err);
	});
	}

	// Auto-request permissions if we're in iframe (with user gesture simulation)
	if (window.location !== window.parent.location && !window.microphoneStatus.requested) {
	console.log('🖼️ Running in iframe - preparing microphone access');
	window.microphoneStatus.requested = true;

	// Add a global click listener to request permissions on first interaction
	document.addEventListener('click', function requestOnFirstClick() {
	if (!window.microphoneStatus.granted) {
	console.log('👆 First click detected - requesting microphone access');
	requestMicrophonePermission();
	document.removeEventListener('click', requestOnFirstClick);
	}
	}, { once: true });
	}
	} else {
	console.log('❌ Browser does not support microphone');
	window.microphoneStatus.supported = false;
	updateMicrophoneUI('unsupported');
	}
	}

	function requestMicrophonePermission() {
	console.log('🎤 Requesting microphone permission...');

	if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
	navigator.mediaDevices.getUserMedia({
	audio: {
	echoCancellation: true,
	noiseSuppression: true,
	autoGainControl: true,
	sampleRate: 44100
	}
	})
	.then(function(stream) {
	console.log('✅ Microphone access granted');
	window.microphoneStatus.granted = true;

	// Stop the stream immediately (we just wanted permission)
	stream.getTracks().forEach(track => track.stop());

	updateMicrophoneUI('granted');

	// Notify other parts of the app
	window.dispatchEvent(new CustomEvent('microphoneGranted'));

	})
	.catch(function(err) {
	console.log('❌ Microphone access denied:', err);
	window.microphoneStatus.granted = false;
	updateMicrophoneUI('denied', err.message);
	});
	}
	}

	function updateMicrophoneUI(status, errorMessage = '') {
	// This will be called by the specific UI components
	console.log('🎛️ Updating microphone UI for status:', status);
	window.dispatchEvent(new CustomEvent('microphoneStatusChanged', {
	detail: { status, errorMessage }
	}));
	}

	// Initialize when DOM is ready
	if (document.readyState === 'loading') {
	document.addEventListener('DOMContentLoaded', initializeMicrophoneSupport);
	} else {
	initializeMicrophoneSupport();
	}

	// Also initialize on any dynamic content changes (for Gradio updates)
	if (window.MutationObserver) {
	const observer = new MutationObserver(function(mutations) {
	mutations.forEach(function(mutation) {
	if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
	// Check if audio components were added
	const hasAudioComponent = Array.from(mutation.addedNodes).some(node =>
	node.nodeType === 1 && (
	node.querySelector && (
	node.querySelector('audio') \|\|
	node.querySelector('[data-testid*="audio"]') \|\|
	node.classList.contains('audio')
	)
	)
	);

	if (hasAudioComponent) {
	console.log('🔄 Audio component detected, re-initializing microphone');
	setTimeout(initializeMicrophoneSupport, 500);
	}
	}
	});
	});

	observer.observe(document.body, {
	childList: true,
	subtree: true
	});
	}
	</script>

	<div style="text-align: center; background: linear-gradient(135deg, #4A90E2 0%, #FF6B9D 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
	<h1>🎙️ Voice AI Platform</h1>
	<p>Voice RAG, Audio Translation và Voice Studio - Nền tảng AI giọng nói toàn diện</p>
	<div style="margin-top: 10px; font-size: 14px; opacity: 0.9;">
	✨ Tính năng mới: Voice RAG với 24 giọng nói đa ngôn ngữ
	</div>
	<div style="margin-top: 8px;">🧠 <strong>Digitized Brains</strong></div>
	</div>
	""")
	else:
	# Production mode - minimal header
	gr.HTML('<div style="text-align:center;"><h1>🎙️ Voice AI Platform</h1></div>')

	with gr.Tabs():
	# Tab 1: Voice RAG
	with gr.TabItem("📚 Voice RAG"):
	# Header section with hf_voice style
	gr.HTML("""
	<div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
	<div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
	<h4>📚 Voice RAG</h4>
	<p style="margin: 0; font-size: 12px;">Hỏi đáp tài liệu thông minh</p>
	</div>
	<div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
	<h4>🌍 Multi-Language</h4>
	<p style="margin: 0; font-size: 12px;">13 ngôn ngữ trả lời</p>
	</div>
	<div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
	<h4>🎤 Voice Output</h4>
	<p style="margin: 0; font-size: 12px;">24 giọng nói đa dạng</p>
	</div>
	<div style="background: linear-gradient(135deg, #A8E6CF 0%, #88D8A3 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;">
	<h4>🔄 AI Gemini</h4>
	<p style="margin: 0; font-size: 12px;">Gemini 2.0 Flash</p>
	</div>
	</div>
	""")

	gr.Markdown("### 📝 Upload tài liệu và đặt câu hỏi")

	# Input section - Mobile optimized
	with gr.Column():
	# Document upload
	with gr.Row():
	file_upload_rag = gr.File(
	label="📎 Tải lên tài liệu (PDF, DOCX, TXT)",
	file_types=[".pdf", ".docx", ".txt"]
	)

	# Question input
	with gr.Row():
	question_input_rag = gr.Textbox(
	label="❓ Câu hỏi của bạn",
	placeholder="Hãy đặt câu hỏi về nội dung tài liệu...",
	lines=3
	)

	# Language selection for answer
	with gr.Row():
	answer_language_dropdown_rag = gr.Dropdown(
	choices=SUPPORTED_LANGUAGES,
	value="Vietnamese",
	label="🌍 Ngôn ngữ trả lời"
	)

	# Voice selection từ Voice Studio
	with gr.Row():
	with gr.Column(scale=1):
	rag_country_dropdown = gr.Dropdown(
	choices=list(voice_choices_by_country.keys()),
	value="🇻🇳 Việt Nam",
	label="🌍 Chọn quốc gia giọng nói"
	)

	with gr.Column(scale=1):
	rag_voice_dropdown = gr.Dropdown(
	choices=voice_choices_by_country["🇻🇳 Việt Nam"],
	value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
	label="🎭 Chọn giọng nói"
	)

	# Format selection for download
	with gr.Row():
	rag_text_format_dropdown = gr.Dropdown(
	choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
	value="Markdown (.md)",
	label="📄 Định dạng file trả lời"
	)

	# Process button
	with gr.Row():
	submit_btn_rag = gr.Button(
	"🚀 Xử lý tài liệu và trả lời",
	variant="primary",
	size="lg"
	)

	# Results section - Mobile optimized
	with gr.Column():
	# Document info section
	with gr.Accordion("📄 Thông tin tài liệu", open=True):
	detected_doc_language_rag = gr.Textbox(
	label="🌐 Ngôn ngữ tài liệu được phát hiện",
	lines=1,
	interactive=False,
	placeholder="Tự động nhận diện ngôn ngữ tài liệu..."
	)

	# Text answer section
	with gr.Accordion("💬 Câu trả lời", open=True):
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	border-left: 4px solid #4CAF50;
	text-align: center;
	">
	<h4 style="margin: 0 0 10px 0; color: #2e7d32;">💬 AI Response with Markdown Formatting</h4>
	<p style="color: #388E3C; margin: 0; font-style: italic;">
	Formatted response with tables, headers, and beautiful layout
	</p>
	</div>
	""")

	answer_output_rag = gr.Markdown(
	value="Câu trả lời sẽ xuất hiện ở đây sau khi xử lý...\n\nHỗ trợ format Markdown với tables, headers, lists và nhiều style khác",
	label="",
	show_label=False,
	elem_classes=["markdown-response"]
	)

	# Downloads section - Mobile optimized
	with gr.Accordion("💾 Tải xuống kết quả", open=True):
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 15px;">
	<p style="color: #666; font-style: italic;">Tải xuống câu trả lời dưới dạng file và audio</p>
	</div>
	""")

	# Stack vertically on mobile
	with gr.Column():
	# Audio download section
	with gr.Row():
	audio_output_rag = gr.Audio(
	label="🔊 Audio câu trả lời",
	type="filepath"
	)

	# Text download section
	with gr.Row():
	text_output_rag = gr.File(
	label="📄 Văn bản câu trả lời",
	file_count="single",
	file_types=[".md", ".txt", ".docx"]
	)

	# Status indicator for RAG
	rag_status_text = gr.HTML("""
	<div style="text-align: center; margin: 20px 0;">
	<div style="
	background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
	color: white;
	padding: 15px;
	border-radius: 12px;
	box-shadow: 0 4px 15px rgba(78,205,196,0.3);
	">
	<span style="font-weight: bold; font-size: 1.1em;">✅ Sẵn sàng xử lý tài liệu</span>
	</div>
	</div>
	""")

	# Helper function for RAG format
	def get_rag_format_from_dropdown(format_choice):
	if "Word" in format_choice or "docx" in format_choice:
	return "docx"
	elif "Markdown" in format_choice or "md" in format_choice:
	return "md"
	return "txt"

	# RAG processing function
	def update_rag_status_processing():
	return """
	<div style="text-align: center; margin: 20px 0;">
	<div style="
	background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%);
	color: white;
	padding: 15px;
	border-radius: 12px;
	box-shadow: 0 4px 15px rgba(255,142,83,0.3);
	">
	<span style="font-weight: bold; font-size: 1.1em;">⏳ Đang xử lý tài liệu...</span>
	</div>
	</div>
	"""

	def update_rag_status_complete():
	return """
	<div style="text-align: center; margin: 20px 0;">
	<div style="
	background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
	color: white;
	padding: 15px;
	border-radius: 12px;
	box-shadow: 0 4px 15px rgba(78,205,196,0.3);
	">
	<span style="font-weight: bold; font-size: 1.1em;">✅ Xử lý hoàn thành!</span>
	</div>
	</div>
	"""

	# Event handlers for Voice RAG
	rag_country_dropdown.change(
	fn=update_voices,
	inputs=[rag_country_dropdown],
	outputs=[rag_voice_dropdown]
	)

	submit_btn_rag.click(
	fn=lambda: update_rag_status_processing(),
	outputs=[rag_status_text]
	).then(
	fn=lambda file, question, lang, voice, fmt: voice_rag_pipeline(file, question, lang, voice, get_rag_format_from_dropdown(fmt)),
	inputs=[file_upload_rag, question_input_rag, answer_language_dropdown_rag, rag_voice_dropdown, rag_text_format_dropdown],
	outputs=[answer_output_rag, detected_doc_language_rag, audio_output_rag, text_output_rag]
	).then(
	fn=lambda: update_rag_status_complete(),
	outputs=[rag_status_text]
	)

	# Voice Studio Tab
	with gr.TabItem("🎤 Voice Studio"):
	gr.HTML("""
	<div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
	<div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🇻🇳 Tiếng Việt</h4>
	<p style="margin: 0; font-size: 12px;">2 giọng chuẩn</p>
	<p style="margin: 0; font-size: 10px;">HoaiMy • NamMinh</p>
	</div>
	<div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🇺🇸🇬🇧 English</h4>
	<p style="margin: 0; font-size: 12px;">4 giọng chuẩn</p>
	<p style="margin: 0; font-size: 10px;">US • UK</p>
	</div>
	<div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🌍 Đa ngôn ngữ</h4>
	<p style="margin: 0; font-size: 12px;">20 giọng chuẩn</p>
	<p style="margin: 0; font-size: 10px;">10 ngôn ngữ</p>
	</div>
	</div>
	""")

	gr.Markdown("### 📝 Nhập nội dung và chọn giọng nói")

	with gr.Row():
	text_input = gr.Textbox(
	placeholder="Nhập văn bản cần chuyển thành giọng nói...",
	lines=4,
	label="Văn bản",
	scale=2
	)

	with gr.Row():
	with gr.Column(scale=1):
	country_dropdown = gr.Dropdown(
	choices=list(voice_choices_by_country.keys()),
	value="🇻🇳 Việt Nam",
	label="🌍 Chọn quốc gia"
	)

	with gr.Column(scale=1):
	voice_dropdown = gr.Dropdown(
	choices=voice_choices_by_country["🇻🇳 Việt Nam"],
	value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
	label="🎭 Chọn giọng nói"
	)

	with gr.Row():
	with gr.Column(scale=2):
	speed_slider = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="⚡ Tốc độ phát"
	)
	with gr.Column(scale=1):
	voice_studio_format_dropdown = gr.Dropdown(
	choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
	value="Markdown (.md)",
	label="📄 Định dạng file tải xuống"
	)

	# Translation feature
	with gr.Row():
	with gr.Column(scale=1):
	translate_checkbox = gr.Checkbox(
	label="🌍 Dịch văn bản trước khi tạo giọng nói",
	value=False
	)
	with gr.Column(scale=2):
	translate_btn = gr.Button("🔄 DỊCH VĂN BẢN", variant="secondary", size="lg", visible=False)

	# Show translated text when translation is enabled
	translated_text_output = gr.Textbox(
	label="📝 Văn bản đã dịch",
	lines=3,
	interactive=True,
	visible=False,
	placeholder="Văn bản sau khi dịch sẽ hiển thị ở đây..."
	)

	generate_btn = gr.Button("🎵 TẠO GIỌNG NÓI", variant="primary", size="lg")

	# Status indicator for Voice Studio
	studio_status_text = gr.HTML("""
	<div style="text-align: center; margin: 20px 0;">
	<div style="
	background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%);
	color: white;
	padding: 15px;
	border-radius: 12px;
	box-shadow: 0 4px 15px rgba(78,205,196,0.3);
	">
	<span style="font-weight: bold; font-size: 1.1em;">⚡ Sẵn sàng tạo giọng nói</span>
	</div>
	</div>
	""")

	gr.Markdown("### 🎧 Kết quả âm thanh")
	audio_output_vs = gr.HTML(
	value="<p style='text-align: center; color: #666; padding: 40px;'>Nhấn 'TẠO GIỌNG NÓI' để bắt đầu 🎤</p>"
	)

	# Download section for Voice Studio
	with gr.Accordion("💾 Tải xuống kết quả", open=False):
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	border-left: 4px solid #2196F3;
	text-align: center;
	">
	<h4 style="margin: 0 0 10px 0; color: #1976D2;">📄 Tải xuống văn bản với Markdown formatting</h4>
	<p style="color: #1565C0; margin: 0; font-style: italic;">
	File chứa thông tin session, cấu hình giọng nói và technical details
	</p>
	</div>
	""")

	voice_studio_text_output = gr.File(
	label="📄 Văn bản với thông tin chi tiết",
	file_count="single",
	file_types=[".md", ".txt", ".docx"]
	)

	# Examples section
	gr.Markdown("### 📚 Ví dụ nhanh")
	with gr.Row():
	example_vn = gr.Button("🇻🇳 Tiếng Việt", size="sm")
	example_en = gr.Button("🇺🇸 English", size="sm")
	example_de = gr.Button("🇩🇪 Deutsch", size="sm")
	example_translate = gr.Button("🌍 Dịch thuật", size="sm")

	# Example button functions
	def load_vn_example():
	return "Xin chào! Chào mừng bạn đến với studio giọng nói.", "🇻🇳 Việt Nam"

	def load_en_example():
	return "Hello! Welcome to our voice studio.", "🇺🇸 Hoa Kỳ"

	def load_de_example():
	return "Hallo! Willkommen in unserem Sprachstudio.", "🇩🇪 Đức"

	def load_translate_example():
	return "Hello! This is an example text for translation.", "🇺🇸 Hoa Kỳ", True

	# Translation functions
	def toggle_translation_ui(translate_enabled):
	"""Show/hide translation UI elements"""
	return (
	gr.update(visible=translate_enabled), # translate_btn
	gr.update(visible=translate_enabled) # translated_text_output
	)

	def translate_text_interface(text, voice_selection):
	"""Translate text for Voice Studio"""
	if not text.strip():
	return "Vui lòng nhập văn bản trước khi dịch"

	target_language = get_target_language_from_voice(voice_selection)
	translated = translate_text_with_gemini(text, target_language)
	return translated

	def create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format="txt"):
	"""Create voice using original or translated text"""
	if translate_enabled and translated_text.strip() and not translated_text.startswith("Lỗi"):
	# Use translated text
	return create_audio_voice_studio(translated_text, voice_selection, speed, text_format)
	else:
	# Use original text
	return create_audio_voice_studio(original_text, voice_selection, speed, text_format)

	# Event handlers for Voice Studio
	country_dropdown.change(
	fn=update_voices,
	inputs=[country_dropdown],
	outputs=[voice_dropdown]
	)

	example_vn.click(
	fn=load_vn_example,
	outputs=[text_input, country_dropdown]
	)

	example_en.click(
	fn=load_en_example,
	outputs=[text_input, country_dropdown]
	)

	example_de.click(
	fn=load_de_example,
	outputs=[text_input, country_dropdown]
	)

	example_translate.click(
	fn=load_translate_example,
	outputs=[text_input, country_dropdown, translate_checkbox]
	)

	# Translation UI toggle
	translate_checkbox.change(
	fn=toggle_translation_ui,
	inputs=[translate_checkbox],
	outputs=[translate_btn, translated_text_output]
	)

	# Translation button
	translate_btn.click(
	fn=translate_text_interface,
	inputs=[text_input, voice_dropdown],
	outputs=[translated_text_output]
	)

	# Helper function to extract format and process Voice Studio
	def process_voice_studio(original_text, translated_text, translate_enabled, voice_selection, speed, format_choice):
	"""Process Voice Studio with format support"""
	# Extract format from dropdown
	if "Markdown" in format_choice:
	text_format = "md"
	elif "Word" in format_choice:
	text_format = "docx"
	else:
	text_format = "txt"

	return create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format)

	# Generate voice with translation support
	generate_btn.click(
	fn=process_voice_studio,
	inputs=[text_input, translated_text_output, translate_checkbox, voice_dropdown, speed_slider, voice_studio_format_dropdown],
	outputs=[audio_output_vs, voice_studio_text_output]
	)

	# Audio Translation Tab
	with gr.TabItem("🎙️ Audio Translation"):
	# Colorful feature cards like Voice Studio
	gr.HTML("""
	<div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;">
	<div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🎤 Ghi âm</h4>
	<p style="margin: 0; font-size: 12px;">Microphone</p>
	<p style="margin: 0; font-size: 10px;">Real-time</p>
	</div>
	<div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>📁 Upload</h4>
	<p style="margin: 0; font-size: 12px;">Audio Files</p>
	<p style="margin: 0; font-size: 10px;">WAV • MP3</p>
	</div>
	<div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🔄 AI Dịch</h4>
	<p style="margin: 0; font-size: 12px;">13 ngôn ngữ</p>
	<p style="margin: 0; font-size: 10px;">Gemini 2.0</p>
	</div>
	<div style="background: linear-gradient(135deg, #A855F7 0%, #EC4899 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;">
	<h4>🎵 Tổng hợp</h4>
	<p style="margin: 0; font-size: 12px;">Neural TTS</p>
	<p style="margin: 0; font-size: 10px;">26 giọng</p>
	</div>
	</div>
	""")

	# Input section with colorful design
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 15px;
	margin: 20px 0;
	text-align: center;
	box-shadow: 0 8px 32px rgba(0,0,0,0.2);
	">
	<h3 style="margin: 0 0 10px 0;">🎤 Tải lên file audio hoặc ghi âm trực tiếp</h3>
	<p style="margin: 0; opacity: 0.9; font-size: 0.95em;">
	Hỗ trợ file WAV, MP3 hoặc ghi âm real-time qua microphone
	</p>
	</div>
	""")

	# Enhanced microphone permission notice and controls
	if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")):
	gr.HTML("""
	<div id="microphone-section" style="margin: 15px 0;">
	<!-- Microphone Status Indicator -->
	<div id="mic-status" style="
	background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
	color: #2e7d32;
	padding: 12px;
	border-radius: 8px;
	margin-bottom: 10px;
	text-align: center;
	border: 1px solid #4caf50;
	display: none;
	">
	<strong>🎤 Microphone Ready</strong> - Bạn có thể ghi âm trực tiếp
	</div>

	<!-- Microphone Error/Permission Notice -->
	<div id="microphone-notice" style="
	background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);
	color: #856404;
	padding: 15px;
	border-radius: 10px;
	border: 1px solid #ffeaa7;
	text-align: center;
	display: none;
	">
	<strong>🎤 Microphone Access Required</strong><br>
	Để sử dụng ghi âm, vui lòng cho phép truy cập microphone.<br>
	<button onclick="requestMicrophoneAccess()" style="
	background: #4caf50;
	color: white;
	padding: 8px 16px;
	border: none;
	border-radius: 6px;
	cursor: pointer;
	margin: 8px 4px;
	">🎤 Kích hoạt Microphone</button>
	<a href="#" onclick="window.open(window.location.href, '_blank')" style="
	background: #667eea;
	color: white;
	padding: 8px 16px;
	text-decoration: none;
	border-radius: 6px;
	display: inline-block;
	margin: 8px 4px;
	">🔗 Mở cửa sổ mới</a>
	</div>

	<!-- Iframe Warning -->
	<div id="iframe-warning" style="
	background: linear-gradient(135deg, #ffebee 0%, #ffcdd2 100%);
	color: #c62828;
	padding: 12px;
	border-radius: 8px;
	border: 1px solid #f44336;
	text-align: center;
	display: none;
	">
	<strong>⚠️ Iframe Restriction</strong><br>
	Microphone có thể bị hạn chế trong iframe.
	<a href="#" onclick="window.open(window.location.href, '_blank')" style="color: #c62828; text-decoration: underline;">
	Mở trong cửa sổ mới
	</a> để sử dụng đầy đủ tính năng.
	</div>
	</div>

	<script>
	// Enhanced microphone permission handling
	let microphoneAccess = false;

	function requestMicrophoneAccess() {
	console.log('🎤 Audio Translation: Requesting microphone access...');

	// Use global microphone function if available
	if (window.requestMicrophonePermission) {
	window.requestMicrophonePermission();
	return;
	}

	// Fallback to local implementation
	if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
	navigator.mediaDevices.getUserMedia({
	audio: {
	echoCancellation: true,
	noiseSuppression: true,
	autoGainControl: true,
	sampleRate: 44100
	}
	})
	.then(function(stream) {
	console.log('✅ Audio Translation: Microphone access granted');
	microphoneAccess = true;

	// Show success status
	updateLocalMicrophoneUI('granted');

	// Stop the stream (we just wanted permission)
	stream.getTracks().forEach(track => track.stop());

	// Trigger Gradio audio component refresh
	setTimeout(() => {
	const audioComponents = document.querySelectorAll('[data-testid*="audio"]');
	audioComponents.forEach(comp => {
	// Try to trigger a refresh or re-initialization
	if (comp.click) comp.click();
	});
	}, 500);

	// Update global status if available
	if (window.microphoneStatus) {
	window.microphoneStatus.granted = true;
	}

	})
	.catch(function(err) {
	console.log('❌ Audio Translation: Microphone access denied:', err);
	updateLocalMicrophoneUI('denied', err.message);
	});
	} else {
	console.log('❌ getUserMedia not supported');
	updateLocalMicrophoneUI('unsupported');
	}
	}

	function updateLocalMicrophoneUI(status, errorMessage = '') {
	const micStatus = document.getElementById('mic-status');
	const micNotice = document.getElementById('microphone-notice');

	switch(status) {
	case 'granted':
	if (micStatus) micStatus.style.display = 'block';
	if (micNotice) micNotice.style.display = 'none';
	microphoneAccess = true;
	break;

	case 'denied':
	if (micNotice) {
	micNotice.style.display = 'block';
	micNotice.innerHTML = `
	<strong>❌ Microphone Access Denied</strong><br>
	Lỗi: ${errorMessage}<br>
	Vui lòng kiểm tra cài đặt trình duyệt và cho phép microphone.
	<br><br>
	<button onclick="requestMicrophoneAccess()" style="
	background: #ff9800;
	color: white;
	padding: 8px 16px;
	border: none;
	border-radius: 6px;
	cursor: pointer;
	margin: 4px;
	">🔄 Thử lại</button>
	<button onclick="window.open(window.location.href, '_blank')" style="
	background: #2196f3;
	color: white;
	padding: 8px 16px;
	border: none;
	border-radius: 6px;
	cursor: pointer;
	margin: 4px;
	">🔗 Mở cửa sổ mới</button>
	`;
	}
	break;

	case 'unsupported':
	if (micNotice) {
	micNotice.style.display = 'block';
	micNotice.innerHTML = `
	<strong>❌ Microphone Not Supported</strong><br>
	Trình duyệt của bạn không hỗ trợ ghi âm.<br>
	Vui lòng sử dụng Chrome, Firefox, Safari hoặc Edge phiên bản mới.
	<br><br>
	<a href="https://caniuse.com/stream" target="_blank" style="
	color: #856404;
	text-decoration: underline;
	">Kiểm tra tương thích trình duyệt</a>
	`;
	}
	break;

	default:
	if (micNotice) {
	micNotice.style.display = 'block';
	}
	break;
	}
	}

	// Listen for global microphone events
	window.addEventListener('microphoneStatusChanged', function(event) {
	console.log('🔄 Audio Translation: Received microphone status update:', event.detail);
	updateLocalMicrophoneUI(event.detail.status, event.detail.errorMessage);
	});

	window.addEventListener('microphoneGranted', function() {
	console.log('✅ Audio Translation: Global microphone granted');
	updateLocalMicrophoneUI('granted');
	});

	// Check microphone availability on load
	function checkMicrophoneAvailability() {
	console.log('🔍 Audio Translation: Checking microphone availability...');

	// Check global status first
	if (window.microphoneStatus) {
	if (window.microphoneStatus.granted) {
	updateLocalMicrophoneUI('granted');
	return;
	} else if (!window.microphoneStatus.supported) {
	updateLocalMicrophoneUI('unsupported');
	return;
	}
	}

	// Check if we're in an iframe
	if (window.location !== window.parent.location) {
	console.log('Running in iframe');
	const iframeWarning = document.getElementById('iframe-warning');
	if (iframeWarning) {
	setTimeout(() => {
	iframeWarning.style.display = 'block';
	}, 1000);
	}
	}

	// Try to get microphone permissions
	if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
	// Check if we already have permission
	navigator.permissions.query({name: 'microphone'}).then(function(result) {
	console.log('Microphone permission status:', result.state);

	if (result.state === 'granted') {
	const micStatus = document.getElementById('mic-status');
	if (micStatus) micStatus.style.display = 'block';
	microphoneAccess = true;
	} else if (result.state === 'prompt' \|\| result.state === 'denied') {
	const micNotice = document.getElementById('microphone-notice');
	if (micNotice) {
	setTimeout(() => {
	micNotice.style.display = 'block';
	}, 1500);
	}
	}

	// Listen for permission changes
	result.onchange = function() {
	console.log('Microphone permission changed to:', this.state);
	if (this.state === 'granted') {
	const micStatus = document.getElementById('mic-status');
	const micNotice = document.getElementById('microphone-notice');
	if (micStatus) micStatus.style.display = 'block';
	if (micNotice) micNotice.style.display = 'none';
	microphoneAccess = true;
	}
	};
	}).catch(function(err) {
	console.log('Permission query failed:', err);
	// Fallback to showing the notice
	setTimeout(() => {
	const micNotice = document.getElementById('microphone-notice');
	if (micNotice) micNotice.style.display = 'block';
	}, 2000);
	});
	} else {
	// Browser doesn't support getUserMedia
	setTimeout(() => {
	const micNotice = document.getElementById('microphone-notice');
	if (micNotice) {
	micNotice.style.display = 'block';
	micNotice.innerHTML = `
	<strong>❌ Microphone Not Supported</strong><br>
	Trình duyệt không hỗ trợ ghi âm. Vui lòng cập nhật trình duyệt.
	`;
	}
	}, 1000);
	}
	}

	// Initialize when DOM is ready
	if (document.readyState === 'loading') {
	document.addEventListener('DOMContentLoaded', checkMicrophoneAvailability);
	} else {
	checkMicrophoneAvailability();
	}

	// Re-check periodically for dynamic content
	setInterval(checkMicrophoneAvailability, 5000);
	</script>
	""")
	else:
	# Production mode - simple microphone notice
	gr.HTML('<div style="text-align:center;color:#666;padding:10px;">📎 Upload audio file or use microphone</div>')

	audio_input = gr.Audio(
	label="📎 Tải lên file audio hoặc ghi âm trực tiếp",
	type="numpy", # Use numpy to avoid temp file issues
	sources=["upload", "microphone"],
	show_label=True,
	interactive=True,
	elem_id="audio-input-translation"
	)

	# Audio Recording Control Buttons
	with gr.Row():
	save_recording_btn = gr.Button(
	"💾 Save Recording",
	variant="secondary",
	size="sm"
	)
	new_recording_btn = gr.Button(
	"🎙️ New Record",
	variant="primary",
	size="sm"
	)

	# Button descriptions
	gr.HTML("""
	<div style="display: flex; justify-content: space-between; margin: 5px 0 15px 0; font-size: 0.8em; color: #666;">
	<span>💾 Lưu file audio hiện tại vào record_data</span>
	<span>🎙️ Xóa audio hiện tại để ghi âm mới</span>
	</div>
	""")

	# Status for recording actions
	recording_status = gr.HTML(
	value="<p style='text-align: center; color: #666; font-style: italic;'>Sẵn sàng ghi âm hoặc tải lên file</p>"
	)

	# === RECORDED FILES FUNCTIONS ===
	def refresh_recorded_files():
	"""Refresh the list of recorded files"""
	files = get_recorded_files()
	print(f"🔄 Refreshing dropdown - found files: {files}")
	return gr.Dropdown(choices=files, value=None)

	def load_recorded_file(filename):
	"""Load selected recorded file for playback"""
	print(f"🎵 Loading recorded file: {filename}")
	if filename and filename.strip():
	file_path = get_recorded_file_path(filename)
	print(f"📁 Full path: {file_path}")
	if os.path.exists(file_path):
	file_size = os.path.getsize(file_path)
	print(f"✅ File exists, size: {file_size} bytes")

	try:
	# Load audio as numpy array for Gradio compatibility
	import soundfile as sf
	audio_data, sample_rate = sf.read(file_path)
	print(f"🎵 Loaded audio: shape={audio_data.shape}, sr={sample_rate}")
	# Return tuple (sample_rate, audio_data) for Gradio numpy type
	return (sample_rate, audio_data)
	except Exception as e:
	print(f"❌ Error loading audio: {e}")
	return None
	else:
	print(f"❌ File not found: {file_path}")
	print(f"📁 Directory contents: {os.listdir(os.path.dirname(file_path)) if os.path.exists(os.path.dirname(file_path)) else 'Directory not found'}")
	else:
	print("❌ No filename provided")
	return None

	def use_recorded_for_translation(filename, country, voice, fmt):
	"""Use selected recorded file for translation"""
	print(f"🔄 Using recorded file for translation: {filename}")
	if filename and filename.strip():
	file_path = get_recorded_file_path(filename)
	print(f"📁 Translation file path: {file_path}")
	if os.path.exists(file_path):
	print(f"✅ Starting translation for: {filename}")
	# Use the same translation function
	return translate_audio(file_path, country, voice, get_format_from_dropdown(fmt))
	else:
	print(f"❌ File not found for translation: {file_path}")
	# Return empty results if no file selected
	print("❌ No file selected for translation")
	return "", "", "", "", None, "", "", None

	def prepare_recorded_file_download(filename):
	"""Prepare recorded file for download"""
	print(f"📥 Preparing download for: {filename}")
	if filename and filename.strip():
	file_path = get_recorded_file_path(filename)
	print(f"📁 Download file path: {file_path}")
	if os.path.exists(file_path):
	print(f"✅ File ready for download: {filename}")
	return file_path
	else:
	print(f"❌ Download file not found: {file_path}")
	print("❌ No file selected for download")
	return None

	def save_current_recording(audio_file):
	"""Save current audio recording to record_data"""
	if audio_file is None:
	current_files = get_recorded_files()
	return (
	"<p style='color: #e74c3c; text-align: center;'>❌ Không có file audio để lưu</p>",
	gr.Dropdown(choices=current_files, value=None)
	)

	try:
	saved_path = save_recorded_audio(audio_file)
	if saved_path:
	saved_filename = os.path.basename(saved_path)
	# Get updated file list after saving
	updated_files = get_recorded_files()
	print(f"🔄 After save - updated files: {updated_files}")
	return (
	f"<p style='color: #27ae60; text-align: center;'>✅ Đã lưu: {saved_filename}</p>",
	gr.Dropdown(choices=updated_files, value=saved_filename)
	)
	else:
	current_files = get_recorded_files()
	return (
	"<p style='color: #e74c3c; text-align: center;'>❌ Lỗi khi lưu file</p>",
	gr.Dropdown(choices=current_files, value=None)
	)
	except Exception as e:
	current_files = get_recorded_files()
	return (
	f"<p style='color: #e74c3c; text-align: center;'>❌ Lỗi: {str(e)}</p>",
	gr.Dropdown(choices=current_files, value=None)
	)

	def clear_audio_for_new_recording():
	"""Clear audio input for new recording"""
	return (
	None, # Clear audio input
	"<p style='color: #3498db; text-align: center;'>🎙️ Sẵn sàng ghi âm mới</p>"
	)

	def delete_selected_file(filename):
	"""Delete selected file and refresh dropdown"""
	if not filename or not filename.strip():
	current_files = get_recorded_files()
	return (
	"<p style='color: #e74c3c; text-align: center;'>❌ Vui lòng chọn file để xóa</p>",
	gr.Dropdown(choices=current_files, value=None),
	None # Clear audio player
	)

	# Delete the file
	delete_result = delete_recorded_file(filename)

	# Refresh file list
	updated_files = get_recorded_files()

	# Determine status color based on result
	if "✅" in delete_result:
	status_html = f"<p style='color: #27ae60; text-align: center;'>{delete_result}</p>"
	else:
	status_html = f"<p style='color: #e74c3c; text-align: center;'>{delete_result}</p>"

	return (
	status_html,
	gr.Dropdown(choices=updated_files, value=None),
	None # Clear audio player
	)

	# Recorded Files Management Section
	with gr.Accordion("🎤 File đã ghi âm", open=False):
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	text-align: center;
	">
	<h4 style="margin: 0 0 8px 0;">📁 Quản lý file đã ghi</h4>
	<p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
	Chọn file từ danh sách để phát lại hoặc dịch thuật
	</p>
	</div>
	""")

	# Refresh button for recorded files
	refresh_files_btn = gr.Button(
	"🔄 Làm mới danh sách",
	variant="secondary",
	size="sm"
	)

	# Status display for file operations
	file_operation_status = gr.HTML(
	value="<p style='text-align: center; color: #666; font-style: italic;'>Chọn file để thực hiện thao tác</p>"
	)

	# Dropdown for recorded files
	initial_files = get_recorded_files()
	print(f"🔍 Initial recorded files: {initial_files}")
	recorded_files_dropdown = gr.Dropdown(
	choices=initial_files,
	label="📂 Chọn file đã ghi",
	info="Các file audio đã được ghi âm trước đó"
	)

	# Preview and controls for selected file
	with gr.Row():
	with gr.Column():
	# Audio player for selected file
	recorded_audio_player = gr.Audio(
	label="🎵 Phát lại file đã chọn",
	interactive=False,
	show_label=True,
	type="numpy" # Use numpy for better compatibility
	)

	with gr.Column():
	# Action buttons
	use_for_translation_btn = gr.Button(
	"🔄 Sử dụng để dịch thuật",
	variant="primary",
	size="sm"
	)

	with gr.Row():
	download_recorded_btn = gr.Button(
	"📥 Tải xuống",
	variant="secondary",
	size="sm"
	)

	delete_recorded_btn = gr.Button(
	"🗑️ Xóa file",
	variant="stop",
	size="sm"
	)

	# Download link for recorded file
	download_recorded_file = gr.File(
	label="📥 File tải xuống",
	visible=True,
	file_count="single"
	)

	# Settings section with gradient header
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%);
	color: white;
	padding: 18px;
	border-radius: 12px;
	margin: 25px 0 20px 0;
	text-align: center;
	box-shadow: 0 6px 24px rgba(255,107,107,0.3);
	">
	<h3 style="margin: 0 0 8px 0;">🌍 Cài đặt dịch thuật</h3>
	<p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
	Chọn ngôn ngữ đích và giọng nói cho kết quả dịch thuật
	</p>
	</div>
	""")

	# Separate dropdowns without complex wrappers to avoid CSS conflicts
	target_country_dropdown = gr.Dropdown(
	choices=list(voice_choices_by_country.keys()),
	value="🇻🇳 Việt Nam",
	label="🌍 Chọn quốc gia đích"
	)

	target_voice_dropdown = gr.Dropdown(
	choices=voice_choices_by_country["🇻🇳 Việt Nam"],
	value="🇻🇳 HoaiMy - Nữ Việt Chuẩn",
	label="🎭 Chọn giọng nói đích"
	)

	text_format_dropdown = gr.Dropdown(
	choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"],
	value="Markdown (.md)",
	label="📄 Định dạng file văn bản"
	)

	# Colorful action button
	gr.HTML("""
	""")

	# Auto-translate on audio upload - no manual button needed

	# Results section with colorful headers
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%);
	color: white;
	padding: 18px;
	border-radius: 12px;
	margin: 30px 0 20px 0;
	text-align: center;
	box-shadow: 0 6px 24px rgba(69,183,209,0.3);
	">
	<h3 style="margin: 0 0 8px 0;">📊 Kết quả xử lý</h3>
	<p style="margin: 0; opacity: 0.9; font-size: 0.9em;">
	Phiên âm, dịch thuật và tổng hợp giọng nói
	</p>
	</div>
	""")

	# Dynamic status indicator
	status_text = gr.HTML("")

	# Card-based layout for mobile
	with gr.Column(elem_classes=["output-area"]):
	# Original content card
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	border-left: 4px solid #2196F3;
	">
	<h4 style="margin: 0 0 10px 0; color: #1976D2;">📝 Nội dung gốc từ audio</h4>
	</div>
	""")

	transcription_output = gr.Textbox(
	label="🎯 Phiên âm từ audio",
	lines=4,
	interactive=False,
	placeholder="Nội dung phiên âm từ file audio sẽ hiển thị ở đây...",
	elem_classes=["mobile-textbox"]
	)

	detected_language = gr.Textbox(
	label="🌐 Ngôn ngữ được phát hiện",
	lines=1,
	interactive=False,
	placeholder="Tự động nhận diện...",
	elem_classes=["mobile-textbox"]
	)


	# Translation result card
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	border-left: 4px solid #4CAF50;
	">
	<h4 style="margin: 0 0 10px 0; color: #388E3C;">✨ Kết quả dịch thuật</h4>
	</div>
	""")

	translation_output = gr.Textbox(
	label="🔄 Nội dung đã dịch",
	lines=4,
	interactive=False,
	placeholder="Bản dịch sẽ hiển thị ở đây...",
	elem_classes=["mobile-textbox"]
	)

	target_language_display = gr.Textbox(
	label="🎯 Ngôn ngữ đích",
	lines=1,
	interactive=False,
	placeholder="Chưa chọn...",
	elem_classes=["mobile-textbox"]
	)

	# Mobile-friendly comparison section
	with gr.Accordion("🔍 So sánh nội dung", open=False):
	gr.HTML("""
	<div style="
	text-align: center;
	margin-bottom: 15px;
	padding: 10px;
	background: #f5f5f5;
	border-radius: 8px;
	">
	<p style="color: #666; font-style: italic; margin: 0;">
	Xem nội dung gốc và bản dịch để so sánh
	</p>
	</div>
	""")

	# Stack vertically on mobile for better readability
	with gr.Column():
	gr.HTML("""
	<div style="
	background: #e3f2fd;
	padding: 10px;
	border-radius: 8px;
	margin: 10px 0;
	text-align: center;
	font-weight: bold;
	color: #1976D2;
	">📝 Ngôn ngữ gốc</div>
	""")
	original_compare = gr.Textbox(
	label="",
	lines=4,
	interactive=False,
	show_label=False,
	placeholder="Nội dung phiên âm từ audio sẽ hiển thị ở đây...",
	elem_classes=["mobile-compare"]
	)

	gr.HTML("""
	<div style="
	background: #e8f5e8;
	padding: 10px;
	border-radius: 8px;
	margin: 15px 0 5px 0;
	text-align: center;
	font-weight: bold;
	color: #388E3C;
	">✨ Sau khi dịch</div>
	""")
	translated_compare = gr.Textbox(
	label="",
	lines=4,
	interactive=False,
	show_label=False,
	placeholder="Nội dung sau khi dịch sẽ hiển thị ở đây...",
	elem_classes=["mobile-compare"]
	)

	# Mobile-optimized download section
	with gr.Accordion("💾 Tải xuống kết quả", open=True):
	gr.HTML("""
	<div style="
	background: linear-gradient(135deg, #fff3e0 0%, #ffcc80 100%);
	padding: 15px;
	border-radius: 12px;
	margin: 15px 0;
	border-left: 4px solid #FF9800;
	text-align: center;
	">
	<h4 style="margin: 0 0 10px 0; color: #E65100;">💾 Tải xuống kết quả</h4>
	<p style="color: #BF360C; margin: 0; font-style: italic;">
	File audio và văn bản đã dịch
	</p>
	</div>
	""")

	# Stack downloads vertically for mobile
	with gr.Column():
	gr.HTML("""
	<div style="
	background: #e3f2fd;
	padding: 12px;
	border-radius: 8px;
	margin: 15px 0 10px 0;
	text-align: center;
	font-weight: bold;
	color: #1976D2;
	">🔊 Audio đã dịch</div>
	""")
	audio_output_at = gr.Audio(
	label="🎵 Audio đã dịch",
	type="filepath",
	show_label=True,
	elem_classes=["mobile-audio"],
	format="wav" # Specify format explicitly
	)

	# Explicit download component for translated audio
	audio_download_at = gr.File(
	label="📥 Tải xuống audio đã dịch",
	file_count="single",
	file_types=[".wav"],
	visible=True
	)

	gr.HTML("""
	<div style="
	background: #e8f5e8;
	padding: 12px;
	border-radius: 8px;
	margin: 25px 0 10px 0;
	text-align: center;
	font-weight: bold;
	color: #388E3C;
	">📄 Văn bản đã dịch</div>
	""")
	text_output = gr.File(
	label="",
	file_count="single",
	file_types=[".txt", ".docx"],
	show_label=False,
	elem_classes=["mobile-file"]
	)

	# Event handlers for Audio Translation with colorful status
	def update_status_processing():
	return """
	<div class="status-processing" style="
	text-align: center;
	margin: 20px 0;
	padding: 15px;
	border-radius: 12px;
	color: white;
	transition: all 0.3s ease;
	">
	<span style="font-weight: bold; font-size: 1.1em;">
	⚡ Đang tự động dịch thuật...
	</span>
	</div>
	"""

	def update_status_complete():
	return """
	<div class="status-success" style="
	text-align: center;
	margin: 20px 0;
	padding: 15px;
	border-radius: 12px;
	color: white;
	transition: all 0.3s ease;
	">
	<span style="font-weight: bold; font-size: 1.1em;">
	✅ Dịch thuật hoàn thành!
	</span>
	</div>
	"""

	target_country_dropdown.change(
	fn=update_voices,
	inputs=[target_country_dropdown],
	outputs=[target_voice_dropdown]
	)

	# Update target language display when dropdown changes
	target_voice_dropdown.change(
	fn=lambda voice: voice,
	inputs=[target_voice_dropdown],
	outputs=[target_language_display]
	)

	# Helper function to extract format
	def get_format_from_dropdown(format_choice):
	if "Markdown" in format_choice:
	return "md"
	elif "Word" in format_choice:
	return "docx"
	return "txt"

	# Auto-translate when audio is uploaded or changed
	audio_input.change(
	fn=lambda: update_status_processing(),
	outputs=[status_text]
	).then(
	fn=lambda audio, country, voice, fmt: translate_audio(audio, country, voice, get_format_from_dropdown(fmt)) if audio is not None else ("", "", "📎 Vui lòng tải lên file audio hoặc ghi âm", country, None, "", "", None),
	inputs=[audio_input, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
	outputs=[
	transcription_output,
	detected_language,
	translation_output,
	target_language_display,
	audio_output_at,
	audio_download_at,
	original_compare,
	translated_compare,
	text_output
	]
	).then(
	fn=lambda: update_status_complete(),
	outputs=[status_text]
	).then(
	fn=refresh_recorded_files,
	outputs=[recorded_files_dropdown]
	)

	# === RECORDED FILES EVENT HANDLERS ===

	# Save current recording
	save_recording_btn.click(
	fn=save_current_recording,
	inputs=[audio_input],
	outputs=[recording_status, recorded_files_dropdown]
	)

	# New recording (clear audio)
	new_recording_btn.click(
	fn=clear_audio_for_new_recording,
	outputs=[audio_input, recording_status]
	)

	refresh_files_btn.click(
	fn=refresh_recorded_files,
	outputs=[recorded_files_dropdown]
	)

	recorded_files_dropdown.change(
	fn=load_recorded_file,
	inputs=[recorded_files_dropdown],
	outputs=[recorded_audio_player]
	)

	use_for_translation_btn.click(
	fn=lambda: update_status_processing(),
	outputs=[status_text]
	).then(
	fn=use_recorded_for_translation,
	inputs=[recorded_files_dropdown, target_country_dropdown, target_voice_dropdown, text_format_dropdown],
	outputs=[
	transcription_output,
	detected_language,
	translation_output,
	target_language_display,
	audio_output_at,
	audio_download_at,
	original_compare,
	translated_compare,
	text_output
	]
	).then(
	fn=lambda: update_status_complete(),
	outputs=[status_text]
	).then(
	fn=refresh_recorded_files,
	outputs=[recorded_files_dropdown]
	)

	download_recorded_btn.click(
	fn=prepare_recorded_file_download,
	inputs=[recorded_files_dropdown],
	outputs=[download_recorded_file]
	)

	delete_recorded_btn.click(
	fn=delete_selected_file,
	inputs=[recorded_files_dropdown],
	outputs=[file_operation_status, recorded_files_dropdown, recorded_audio_player]
	)

	# Features section cho Voice RAG
	gr.Markdown("### 📚 Tính năng chính")

	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
	<h3>📚 Voice RAG</h3>
	<p>Upload tài liệu và đặt câu hỏi. Nhận trả lời bằng giọng nói đa ngôn ngữ.</p>
	<div style="margin-top: 15px;">
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ Hỗ trợ PDF, DOCX, TXT
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ AI Gemini 2.0 Flash
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ 24 giọng nói đa quốc gia
	</div>
	</div>
	</div>
	""")

	with gr.Column():
	gr.HTML("""
	<div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
	<h3>🌍 Audio Translation</h3>
	<p>Dịch thuật âm thanh sang nhiều ngôn ngữ với giọng nói tự nhiên.</p>
	<div style="margin-top: 15px;">
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ Ghi âm real-time
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ 13 ngôn ngữ chính
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ Edge TTS Neural
	</div>
	</div>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;">
	<h3>🎤 Voice Studio</h3>
	<p>Chuyển văn bản thành giọng nói với nhiều lựa chọn quốc gia và giọng nói.</p>
	<div style="margin-top: 15px;">
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ 13 quốc gia
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ Tích hợp dịch thuật
	</div>
	<div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;">
	✓ Điều chỉnh tốc độ
	</div>
	</div>
	</div>
	""")


	# Footer
	gr.HTML("""
	<div class="custom-footer">
	<div style="display: flex; justify-content: center; align-items: center; gap: 15px; flex-wrap: wrap;">
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="background: rgba(255,255,255,0.2); padding: 8px 15px; border-radius: 20px; font-size: 16px;">
	🧠 DB
	</div>
	<span style="font-size: 18px; font-weight: bold;">Digitized Brains</span>
	</div>
	<div style="font-size: 14px; opacity: 0.9;">
	Voice Studio - AI Powered
	</div>
	</div>
	</div>
	""")

	# Add JavaScript for button effects
	gr.HTML(js_code)

	if __name__ == "__main__":
	import sys
	import locale
	import os

	# Ensure UTF-8 encoding
	if sys.platform == 'win32':
	os.environ['PYTHONIOENCODING'] = 'utf-8'

	# Optimize startup for HF Spaces
	print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")

	# Only create record_data directory when actually needed to speed up startup
	if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"):
	create_record_data_directory()
	print(f"📁 Record data directory ready: {RECORD_DATA_DIR}")
	else:
	print(f"🏭 Production mode - record_data will be created on first use")

	# Set environment variables for iframe support
	os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
	# Disable Gradio temp directory to prevent file serving issues
	# os.environ['GRADIO_TEMP_DIR'] = '/tmp'

	# Hugging Face Spaces configuration - Use standard port 7860 for HF
	if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"):
	# HF Spaces standard configuration
	port = 7860
	print("🏭 Using HF Spaces standard port 7860")
	else:
	# Local development
	port = int(os.environ.get("GRADIO_SERVER_PORT", 7880))
	print(f"🖥️ Using local development port {port}")

	demo.launch(
	server_name="0.0.0.0",
	server_port=port,
	share=False,
	show_error=True,
	ssr_mode=False, # Disable SSR to prevent timeout issues on HF Spaces
	enable_monitoring=False # Disable monitoring for faster startup
	)