teachingAssistant / src /domain /services /audio_processing_service.py
Michael Hu
Implement domain services
6aea21a
"""Concrete implementation of audio processing service."""
import time
from typing import TYPE_CHECKING
from ..interfaces.audio_processing import IAudioProcessingService
from ..interfaces.speech_recognition import ISpeechRecognitionService
from ..interfaces.translation import ITranslationService
from ..interfaces.speech_synthesis import ISpeechSynthesisService
from ..models.processing_result import ProcessingResult
from ..models.translation_request import TranslationRequest
from ..models.speech_synthesis_request import SpeechSynthesisRequest
from ..exceptions import (
AudioProcessingException,
SpeechRecognitionException,
TranslationFailedException,
SpeechSynthesisException
)
if TYPE_CHECKING:
from ..models.audio_content import AudioContent
from ..models.voice_settings import VoiceSettings
class AudioProcessingService(IAudioProcessingService):
"""Concrete implementation of audio processing pipeline orchestration."""
def __init__(
self,
speech_recognition_service: ISpeechRecognitionService,
translation_service: ITranslationService,
speech_synthesis_service: ISpeechSynthesisService
):
"""
Initialize the audio processing service with injected dependencies.
Args:
speech_recognition_service: Service for speech-to-text conversion
translation_service: Service for text translation
speech_synthesis_service: Service for text-to-speech synthesis
"""
self._speech_recognition_service = speech_recognition_service
self._translation_service = translation_service
self._speech_synthesis_service = speech_synthesis_service
def process_audio_pipeline(
self,
audio: 'AudioContent',
target_language: str,
voice_settings: 'VoiceSettings'
) -> 'ProcessingResult':
"""
Process audio through the complete pipeline: STT -> Translation -> TTS.
Args:
audio: The input audio content
target_language: The target language for translation
voice_settings: Voice settings for TTS synthesis
Returns:
ProcessingResult: The result of the complete processing pipeline
Raises:
AudioProcessingException: If any step in the pipeline fails
"""
start_time = time.time()
try:
# Validate inputs
self._validate_pipeline_inputs(audio, target_language, voice_settings)
# Step 1: Speech Recognition (STT)
original_text = self._perform_speech_recognition(audio)
# Step 2: Translation
translated_text = self._perform_translation(original_text, target_language)
# Step 3: Speech Synthesis (TTS)
audio_output = self._perform_speech_synthesis(translated_text, voice_settings)
# Calculate processing time
processing_time = time.time() - start_time
# Create successful result
return ProcessingResult.success_result(
original_text=original_text,
translated_text=translated_text,
audio_output=audio_output,
processing_time=processing_time
)
except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
# Handle domain-specific exceptions
processing_time = time.time() - start_time
return ProcessingResult.failure_result(
error_message=str(e),
processing_time=processing_time
)
except Exception as e:
# Handle unexpected exceptions
processing_time = time.time() - start_time
error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
return ProcessingResult.failure_result(
error_message=error_message,
processing_time=processing_time
)
def _validate_pipeline_inputs(
self,
audio: 'AudioContent',
target_language: str,
voice_settings: 'VoiceSettings'
) -> None:
"""
Validate inputs for the audio processing pipeline.
Args:
audio: The input audio content
target_language: The target language for translation
voice_settings: Voice settings for TTS synthesis
Raises:
AudioProcessingException: If validation fails
"""
if audio is None:
raise AudioProcessingException("Audio content cannot be None")
if not target_language or not target_language.strip():
raise AudioProcessingException("Target language cannot be empty")
if voice_settings is None:
raise AudioProcessingException("Voice settings cannot be None")
# Validate that voice settings language matches target language
if voice_settings.language != target_language:
raise AudioProcessingException(
f"Voice settings language ({voice_settings.language}) must match "
f"target language ({target_language})"
)
# Validate audio duration for processing limits
if audio.duration > 300: # 5 minutes limit
raise AudioProcessingException(
f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
)
# Validate audio format is supported
if not audio.is_valid_format:
raise AudioProcessingException(f"Unsupported audio format: {audio.format}")
def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
"""
Perform speech recognition on the input audio.
Args:
audio: The input audio content
Returns:
TextContent: The transcribed text
Raises:
SpeechRecognitionException: If transcription fails
"""
try:
# Use a default STT model - this could be configurable in the future
model = "whisper-base" # Default model
return self._speech_recognition_service.transcribe(audio, model)
except Exception as e:
raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")
def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
"""
Perform translation of the transcribed text.
Args:
text: The text to translate
target_language: The target language for translation
Returns:
TextContent: The translated text
Raises:
TranslationFailedException: If translation fails
"""
try:
# Check if translation is needed
if text.language == target_language:
# No translation needed, return original text
return text
# Create translation request
translation_request = TranslationRequest(
source_text=text,
target_language=target_language
)
return self._translation_service.translate(translation_request)
except Exception as e:
raise TranslationFailedException(f"Translation failed: {str(e)}")
def _perform_speech_synthesis(
self,
text: 'TextContent',
voice_settings: 'VoiceSettings'
) -> 'AudioContent':
"""
Perform speech synthesis on the translated text.
Args:
text: The text to synthesize
voice_settings: Voice settings for synthesis
Returns:
AudioContent: The synthesized audio
Raises:
SpeechSynthesisException: If synthesis fails
"""
try:
# Create speech synthesis request
synthesis_request = SpeechSynthesisRequest(
text_content=text,
voice_settings=voice_settings
)
return self._speech_synthesis_service.synthesize(synthesis_request)
except Exception as e:
raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")