"""Concrete implementation of audio processing service.""" import time from typing import TYPE_CHECKING from ..interfaces.audio_processing import IAudioProcessingService from ..interfaces.speech_recognition import ISpeechRecognitionService from ..interfaces.translation import ITranslationService from ..interfaces.speech_synthesis import ISpeechSynthesisService from ..models.processing_result import ProcessingResult from ..models.translation_request import TranslationRequest from ..models.speech_synthesis_request import SpeechSynthesisRequest from ..exceptions import ( AudioProcessingException, SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException ) if TYPE_CHECKING: from ..models.audio_content import AudioContent from ..models.voice_settings import VoiceSettings class AudioProcessingService(IAudioProcessingService): """Concrete implementation of audio processing pipeline orchestration.""" def __init__( self, speech_recognition_service: ISpeechRecognitionService, translation_service: ITranslationService, speech_synthesis_service: ISpeechSynthesisService ): """ Initialize the audio processing service with injected dependencies. Args: speech_recognition_service: Service for speech-to-text conversion translation_service: Service for text translation speech_synthesis_service: Service for text-to-speech synthesis """ self._speech_recognition_service = speech_recognition_service self._translation_service = translation_service self._speech_synthesis_service = speech_synthesis_service def process_audio_pipeline( self, audio: 'AudioContent', target_language: str, voice_settings: 'VoiceSettings' ) -> 'ProcessingResult': """ Process audio through the complete pipeline: STT -> Translation -> TTS. Args: audio: The input audio content target_language: The target language for translation voice_settings: Voice settings for TTS synthesis Returns: ProcessingResult: The result of the complete processing pipeline Raises: AudioProcessingException: If any step in the pipeline fails """ start_time = time.time() try: # Validate inputs self._validate_pipeline_inputs(audio, target_language, voice_settings) # Step 1: Speech Recognition (STT) original_text = self._perform_speech_recognition(audio) # Step 2: Translation translated_text = self._perform_translation(original_text, target_language) # Step 3: Speech Synthesis (TTS) audio_output = self._perform_speech_synthesis(translated_text, voice_settings) # Calculate processing time processing_time = time.time() - start_time # Create successful result return ProcessingResult.success_result( original_text=original_text, translated_text=translated_text, audio_output=audio_output, processing_time=processing_time ) except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e: # Handle domain-specific exceptions processing_time = time.time() - start_time return ProcessingResult.failure_result( error_message=str(e), processing_time=processing_time ) except Exception as e: # Handle unexpected exceptions processing_time = time.time() - start_time error_message = f"Unexpected error in audio processing pipeline: {str(e)}" return ProcessingResult.failure_result( error_message=error_message, processing_time=processing_time ) def _validate_pipeline_inputs( self, audio: 'AudioContent', target_language: str, voice_settings: 'VoiceSettings' ) -> None: """ Validate inputs for the audio processing pipeline. Args: audio: The input audio content target_language: The target language for translation voice_settings: Voice settings for TTS synthesis Raises: AudioProcessingException: If validation fails """ if audio is None: raise AudioProcessingException("Audio content cannot be None") if not target_language or not target_language.strip(): raise AudioProcessingException("Target language cannot be empty") if voice_settings is None: raise AudioProcessingException("Voice settings cannot be None") # Validate that voice settings language matches target language if voice_settings.language != target_language: raise AudioProcessingException( f"Voice settings language ({voice_settings.language}) must match " f"target language ({target_language})" ) # Validate audio duration for processing limits if audio.duration > 300: # 5 minutes limit raise AudioProcessingException( f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)" ) # Validate audio format is supported if not audio.is_valid_format: raise AudioProcessingException(f"Unsupported audio format: {audio.format}") def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent': """ Perform speech recognition on the input audio. Args: audio: The input audio content Returns: TextContent: The transcribed text Raises: SpeechRecognitionException: If transcription fails """ try: # Use a default STT model - this could be configurable in the future model = "whisper-base" # Default model return self._speech_recognition_service.transcribe(audio, model) except Exception as e: raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}") def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent': """ Perform translation of the transcribed text. Args: text: The text to translate target_language: The target language for translation Returns: TextContent: The translated text Raises: TranslationFailedException: If translation fails """ try: # Check if translation is needed if text.language == target_language: # No translation needed, return original text return text # Create translation request translation_request = TranslationRequest( source_text=text, target_language=target_language ) return self._translation_service.translate(translation_request) except Exception as e: raise TranslationFailedException(f"Translation failed: {str(e)}") def _perform_speech_synthesis( self, text: 'TextContent', voice_settings: 'VoiceSettings' ) -> 'AudioContent': """ Perform speech synthesis on the translated text. Args: text: The text to synthesize voice_settings: Voice settings for synthesis Returns: AudioContent: The synthesized audio Raises: SpeechSynthesisException: If synthesis fails """ try: # Create speech synthesis request synthesis_request = SpeechSynthesisRequest( text_content=text, voice_settings=voice_settings ) return self._speech_synthesis_service.synthesize(synthesis_request) except Exception as e: raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")