""" Main entry point for the Audio Translation Web Application Handles file upload, processing pipeline, and UI rendering using DDD architecture with Gradio """ import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("app.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) import gradio as gr import os import json from typing import Optional, Tuple, Dict, Any # Import application services and DTOs from src.application.services.audio_processing_service import AudioProcessingApplicationService from src.application.services.configuration_service import ConfigurationApplicationService from src.application.dtos.audio_upload_dto import AudioUploadDto from src.application.dtos.processing_request_dto import ProcessingRequestDto from src.application.dtos.processing_result_dto import ProcessingResultDto # Import infrastructure setup from src.infrastructure.config.container_setup import initialize_global_container, get_global_container # Initialize environment configurations os.makedirs("temp/uploads", exist_ok=True) os.makedirs("temp/outputs", exist_ok=True) # Global container initialization container_initialized = False def initialize_application(): """Initialize the application with dependency injection container""" global container_initialized if not container_initialized: try: logger.info("Initializing application container") initialize_global_container() container_initialized = True logger.info("Application container initialized successfully") except Exception as e: logger.error(f"Failed to initialize application: {e}") raise RuntimeError(f"Application initialization failed: {str(e)}") def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto: """ Create AudioUploadDto from audio file path. Args: audio_file_path: Path to the uploaded audio file Returns: AudioUploadDto: DTO containing upload information """ try: if not audio_file_path or not os.path.exists(audio_file_path): raise ValueError("No audio file provided or file does not exist") filename = os.path.basename(audio_file_path) with open(audio_file_path, 'rb') as f: content = f.read() # Determine content type based on file extension file_ext = os.path.splitext(filename.lower())[1] content_type_map = { '.wav': 'audio/wav', '.mp3': 'audio/mpeg', '.m4a': 'audio/mp4', '.flac': 'audio/flac', '.ogg': 'audio/ogg' } content_type = content_type_map.get(file_ext, 'audio/wav') return AudioUploadDto( filename=filename, content=content, content_type=content_type, size=len(content) ) except Exception as e: logger.error(f"Failed to create AudioUploadDto: {e}") raise ValueError(f"Invalid audio file: {str(e)}") def get_supported_configurations() -> dict: """ Get supported configurations from application service. Returns: dict: Supported configurations """ try: logger.info("Getting global container...") container = get_global_container() logger.info("Resolving AudioProcessingApplicationService...") audio_service = container.resolve(AudioProcessingApplicationService) logger.info("Getting supported configurations from service...") config = audio_service.get_supported_configurations() logger.info(f"Retrieved configurations: {config}") return config except Exception as e: logger.error(f"Failed to get configurations: {e}", exc_info=True) # Return fallback configurations return { 'asr_models': ['whisper-small', 'parakeet'], 'voices': ['kokoro', 'dia', 'cosyvoice2', 'dummy'], 'languages': ['en', 'zh', 'es', 'fr', 'de'], 'audio_formats': ['wav', 'mp3'], 'max_file_size_mb': 100, 'speed_range': {'min': 0.5, 'max': 2.0} } def process_audio_pipeline( audio_file, asr_model: str, target_language: str, voice: str, speed: float, source_language: str = "en" ) -> Tuple[str, str, str, str, str]: """ Execute the complete processing pipeline using application services. Args: audio_file: Gradio audio file input asr_model: ASR model to use target_language: Target language for translation voice: Voice for TTS speed: Speech speed source_language: Source language Returns: Tuple: (status_message, original_text, translated_text, audio_output_path, processing_details) """ try: if not audio_file: return "❌ No audio file provided", "", "", None, "" logger.info(f"Starting processing for: {audio_file} using {asr_model} model") # Create audio upload DTO audio_upload = create_audio_upload_dto(audio_file) # Get application service from container container = get_global_container() audio_service = container.resolve(AudioProcessingApplicationService) # Create processing request request = ProcessingRequestDto( audio=audio_upload, asr_model=asr_model, target_language=target_language, voice=voice, speed=speed, source_language=source_language ) # Process through application service result = audio_service.process_audio_pipeline(request) if result.success: status_message = f"✅ Processing Complete! ({result.processing_time:.2f}s)" logger.info(f"Processing completed successfully in {result.processing_time:.2f}s") # Prepare processing details details = { "processing_time": f"{result.processing_time:.2f}s", "asr_model": asr_model, "target_language": target_language, "voice": voice, "speed": speed } if result.metadata: details.update(result.metadata) processing_details = json.dumps(details, indent=2) return ( status_message, result.original_text or "", result.translated_text or "", result.audio_path if result.has_audio_output else None, processing_details ) else: error_msg = f"❌ Processing Failed: {result.error_message}" logger.error(f"Processing failed: {result.error_message}") return error_msg, "", "", None, f"Error: {result.error_message}" except Exception as e: logger.error(f"Processing failed: {str(e)}", exc_info=True) error_msg = f"❌ Processing Failed: {str(e)}" return error_msg, "", "", None, f"System Error: {str(e)}" def create_interface(): """Create and configure the Gradio interface using gr.Interface for better compatibility""" # Initialize application initialize_application() # Get supported configurations config = get_supported_configurations() # Language options mapping language_options = { "Chinese (Mandarin)": "zh", "Spanish": "es", "French": "fr", "German": "de", "English": "en" } def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val): """Wrapper function for processing""" # Map display language to code target_lang_code = language_options.get(target_lang_val, "zh") return process_audio_pipeline( audio_file=audio_file, asr_model=asr_model_val, target_language=target_lang_code, voice=voice_val, speed=speed_val, source_language="en" ) # Create the interface using gr.Interface for better compatibility interface = gr.Interface( fn=process_wrapper, inputs=[ gr.Audio(label="Upload Audio File", type="filepath"), gr.Dropdown( choices=config['asr_models'], value=config['asr_models'][0] if config['asr_models'] else "parakeet", label="Speech Recognition Model" ), gr.Dropdown( choices=list(language_options.keys()), value="Chinese (Mandarin)", label="Target Language" ), gr.Dropdown( choices=config['voices'], value="chatterbox", label="Voice" ), gr.Slider( minimum=config['speed_range']['min'], maximum=config['speed_range']['max'], value=1.0, step=0.1, label="Speech Speed" ) ], outputs=[ gr.Textbox(label="Status"), gr.Textbox(label="Recognition Results"), gr.Textbox(label="Translation Results"), gr.Audio(label="Audio Output"), gr.Code(label="Processing Details", language="json") ], title="🎧 High-Quality Audio Translation System", description="Upload English Audio → Get Chinese Speech Output", examples=[ # Add example configurations if needed ] ) return interface def main(): """Main application entry point""" logger.info("Starting Gradio application") try: # Create interface interface = create_interface() # Launch the interface interface.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True, quiet=False ) except Exception as e: logger.error(f"Failed to start application: {str(e)}", exc_info=True) raise if __name__ == "__main__": main()