Spaces:
Build error
Build error
""" | |
Main entry point for the Audio Translation Web Application | |
Handles file upload, processing pipeline, and UI rendering using DDD architecture with Gradio | |
""" | |
import logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler("app.log"), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger(__name__) | |
import gradio as gr | |
import os | |
import json | |
from typing import Optional, Tuple, Dict, Any | |
# Import application services and DTOs | |
from src.application.services.audio_processing_service import AudioProcessingApplicationService | |
from src.application.services.configuration_service import ConfigurationApplicationService | |
from src.application.dtos.audio_upload_dto import AudioUploadDto | |
from src.application.dtos.processing_request_dto import ProcessingRequestDto | |
from src.application.dtos.processing_result_dto import ProcessingResultDto | |
# Import infrastructure setup | |
from src.infrastructure.config.container_setup import initialize_global_container, get_global_container | |
# Initialize environment configurations | |
os.makedirs("temp/uploads", exist_ok=True) | |
os.makedirs("temp/outputs", exist_ok=True) | |
# Global container initialization | |
container_initialized = False | |
def initialize_application(): | |
"""Initialize the application with dependency injection container""" | |
global container_initialized | |
if not container_initialized: | |
try: | |
logger.info("Initializing application container") | |
initialize_global_container() | |
container_initialized = True | |
logger.info("Application container initialized successfully") | |
except Exception as e: | |
logger.error(f"Failed to initialize application: {e}") | |
raise RuntimeError(f"Application initialization failed: {str(e)}") | |
def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto: | |
""" | |
Create AudioUploadDto from audio file path. | |
Args: | |
audio_file_path: Path to the uploaded audio file | |
Returns: | |
AudioUploadDto: DTO containing upload information | |
""" | |
try: | |
if not audio_file_path or not os.path.exists(audio_file_path): | |
raise ValueError("No audio file provided or file does not exist") | |
filename = os.path.basename(audio_file_path) | |
with open(audio_file_path, 'rb') as f: | |
content = f.read() | |
# Determine content type based on file extension | |
file_ext = os.path.splitext(filename.lower())[1] | |
content_type_map = { | |
'.wav': 'audio/wav', | |
'.mp3': 'audio/mpeg', | |
'.m4a': 'audio/mp4', | |
'.flac': 'audio/flac', | |
'.ogg': 'audio/ogg' | |
} | |
content_type = content_type_map.get(file_ext, 'audio/wav') | |
return AudioUploadDto( | |
filename=filename, | |
content=content, | |
content_type=content_type, | |
size=len(content) | |
) | |
except Exception as e: | |
logger.error(f"Failed to create AudioUploadDto: {e}") | |
raise ValueError(f"Invalid audio file: {str(e)}") | |
def get_supported_configurations() -> dict: | |
""" | |
Get supported configurations from application service. | |
Returns: | |
dict: Supported configurations | |
""" | |
try: | |
logger.info("Getting global container...") | |
container = get_global_container() | |
logger.info("Resolving AudioProcessingApplicationService...") | |
audio_service = container.resolve(AudioProcessingApplicationService) | |
logger.info("Getting supported configurations from service...") | |
config = audio_service.get_supported_configurations() | |
logger.info(f"Retrieved configurations: {config}") | |
return config | |
except Exception as e: | |
logger.error(f"Failed to get configurations: {e}", exc_info=True) | |
# Return fallback configurations | |
return { | |
'asr_models': ['whisper-small', 'parakeet'], | |
'voices': ['kokoro', 'dia', 'cosyvoice2', 'dummy'], | |
'languages': ['en', 'zh', 'es', 'fr', 'de'], | |
'audio_formats': ['wav', 'mp3'], | |
'max_file_size_mb': 100, | |
'speed_range': {'min': 0.5, 'max': 2.0} | |
} | |
def process_audio_pipeline( | |
audio_file, | |
asr_model: str, | |
target_language: str, | |
voice: str, | |
speed: float, | |
source_language: str = "en" | |
) -> Tuple[str, str, str, str, str]: | |
""" | |
Execute the complete processing pipeline using application services. | |
Args: | |
audio_file: Gradio audio file input | |
asr_model: ASR model to use | |
target_language: Target language for translation | |
voice: Voice for TTS | |
speed: Speech speed | |
source_language: Source language | |
Returns: | |
Tuple: (status_message, original_text, translated_text, audio_output_path, processing_details) | |
""" | |
try: | |
if not audio_file: | |
return "β No audio file provided", "", "", None, "" | |
logger.info(f"Starting processing for: {audio_file} using {asr_model} model") | |
# Create audio upload DTO | |
audio_upload = create_audio_upload_dto(audio_file) | |
# Get application service from container | |
container = get_global_container() | |
audio_service = container.resolve(AudioProcessingApplicationService) | |
# Create processing request | |
request = ProcessingRequestDto( | |
audio=audio_upload, | |
asr_model=asr_model, | |
target_language=target_language, | |
voice=voice, | |
speed=speed, | |
source_language=source_language | |
) | |
# Process through application service | |
result = audio_service.process_audio_pipeline(request) | |
if result.success: | |
status_message = f"β Processing Complete! ({result.processing_time:.2f}s)" | |
logger.info(f"Processing completed successfully in {result.processing_time:.2f}s") | |
# Prepare processing details | |
details = { | |
"processing_time": f"{result.processing_time:.2f}s", | |
"asr_model": asr_model, | |
"target_language": target_language, | |
"voice": voice, | |
"speed": speed | |
} | |
if result.metadata: | |
details.update(result.metadata) | |
processing_details = json.dumps(details, indent=2) | |
return ( | |
status_message, | |
result.original_text or "", | |
result.translated_text or "", | |
result.audio_path if result.has_audio_output else None, | |
processing_details | |
) | |
else: | |
error_msg = f"β Processing Failed: {result.error_message}" | |
logger.error(f"Processing failed: {result.error_message}") | |
return error_msg, "", "", None, f"Error: {result.error_message}" | |
except Exception as e: | |
logger.error(f"Processing failed: {str(e)}", exc_info=True) | |
error_msg = f"β Processing Failed: {str(e)}" | |
return error_msg, "", "", None, f"System Error: {str(e)}" | |
def create_interface(): | |
"""Create and configure the Gradio interface using gr.Interface for better compatibility""" | |
# Initialize application | |
initialize_application() | |
# Get supported configurations | |
config = get_supported_configurations() | |
# Language options mapping | |
language_options = { | |
"Chinese (Mandarin)": "zh", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"English": "en" | |
} | |
def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val): | |
"""Wrapper function for processing""" | |
# Map display language to code | |
target_lang_code = language_options.get(target_lang_val, "zh") | |
return process_audio_pipeline( | |
audio_file=audio_file, | |
asr_model=asr_model_val, | |
target_language=target_lang_code, | |
voice=voice_val, | |
speed=speed_val, | |
source_language="en" | |
) | |
# Create the interface using gr.Interface for better compatibility | |
interface = gr.Interface( | |
fn=process_wrapper, | |
inputs=[ | |
gr.Audio(label="Upload Audio File", type="filepath"), | |
gr.Dropdown( | |
choices=config['asr_models'], | |
value=config['asr_models'][0] if config['asr_models'] else "parakeet", | |
label="Speech Recognition Model" | |
), | |
gr.Dropdown( | |
choices=list(language_options.keys()), | |
value="Chinese (Mandarin)", | |
label="Target Language" | |
), | |
gr.Dropdown( | |
choices=config['voices'], | |
value="chatterbox", | |
label="Voice" | |
), | |
gr.Slider( | |
minimum=config['speed_range']['min'], | |
maximum=config['speed_range']['max'], | |
value=1.0, | |
step=0.1, | |
label="Speech Speed" | |
) | |
], | |
outputs=[ | |
gr.Textbox(label="Status"), | |
gr.Textbox(label="Recognition Results"), | |
gr.Textbox(label="Translation Results"), | |
gr.Audio(label="Audio Output"), | |
gr.Code(label="Processing Details", language="json") | |
], | |
title="π§ High-Quality Audio Translation System", | |
description="Upload English Audio β Get Chinese Speech Output", | |
examples=[ | |
# Add example configurations if needed | |
] | |
) | |
return interface | |
def main(): | |
"""Main application entry point""" | |
logger.info("Starting Gradio application") | |
try: | |
# Create interface | |
interface = create_interface() | |
# Launch the interface | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, | |
debug=False, | |
show_error=True, | |
quiet=False | |
) | |
except Exception as e: | |
logger.error(f"Failed to start application: {str(e)}", exc_info=True) | |
raise | |
if __name__ == "__main__": | |
main() |