Spaces:

DroolingPanda
/

teachingAssistant

Build error

File size: 8,220 Bytes

6aea21a

"""Concrete implementation of audio processing service."""

import time
from typing import TYPE_CHECKING

from ..interfaces.audio_processing import IAudioProcessingService
from ..interfaces.speech_recognition import ISpeechRecognitionService
from ..interfaces.translation import ITranslationService
from ..interfaces.speech_synthesis import ISpeechSynthesisService
from ..models.processing_result import ProcessingResult
from ..models.translation_request import TranslationRequest
from ..models.speech_synthesis_request import SpeechSynthesisRequest
from ..exceptions import (
    AudioProcessingException,
    SpeechRecognitionException,
    TranslationFailedException,
    SpeechSynthesisException
)

if TYPE_CHECKING:
    from ..models.audio_content import AudioContent
    from ..models.voice_settings import VoiceSettings


class AudioProcessingService(IAudioProcessingService):
    """Concrete implementation of audio processing pipeline orchestration."""

    def __init__(
        self,
        speech_recognition_service: ISpeechRecognitionService,
        translation_service: ITranslationService,
        speech_synthesis_service: ISpeechSynthesisService
    ):
        """
        Initialize the audio processing service with injected dependencies.

        Args:
            speech_recognition_service: Service for speech-to-text conversion
            translation_service: Service for text translation
            speech_synthesis_service: Service for text-to-speech synthesis
        """
        self._speech_recognition_service = speech_recognition_service
        self._translation_service = translation_service
        self._speech_synthesis_service = speech_synthesis_service

    def process_audio_pipeline(
        self,
        audio: 'AudioContent',
        target_language: str,
        voice_settings: 'VoiceSettings'
    ) -> 'ProcessingResult':
        """
        Process audio through the complete pipeline: STT -> Translation -> TTS.

        Args:
            audio: The input audio content
            target_language: The target language for translation
            voice_settings: Voice settings for TTS synthesis

        Returns:
            ProcessingResult: The result of the complete processing pipeline

        Raises:
            AudioProcessingException: If any step in the pipeline fails
        """
        start_time = time.time()

        try:
            # Validate inputs
            self._validate_pipeline_inputs(audio, target_language, voice_settings)

            # Step 1: Speech Recognition (STT)
            original_text = self._perform_speech_recognition(audio)

            # Step 2: Translation
            translated_text = self._perform_translation(original_text, target_language)

            # Step 3: Speech Synthesis (TTS)
            audio_output = self._perform_speech_synthesis(translated_text, voice_settings)

            # Calculate processing time
            processing_time = time.time() - start_time

            # Create successful result
            return ProcessingResult.success_result(
                original_text=original_text,
                translated_text=translated_text,
                audio_output=audio_output,
                processing_time=processing_time
            )

        except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
            # Handle domain-specific exceptions
            processing_time = time.time() - start_time
            return ProcessingResult.failure_result(
                error_message=str(e),
                processing_time=processing_time
            )
        except Exception as e:
            # Handle unexpected exceptions
            processing_time = time.time() - start_time
            error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
            return ProcessingResult.failure_result(
                error_message=error_message,
                processing_time=processing_time
            )

    def _validate_pipeline_inputs(
        self,
        audio: 'AudioContent',
        target_language: str,
        voice_settings: 'VoiceSettings'
    ) -> None:
        """
        Validate inputs for the audio processing pipeline.

        Args:
            audio: The input audio content
            target_language: The target language for translation
            voice_settings: Voice settings for TTS synthesis

        Raises:
            AudioProcessingException: If validation fails
        """
        if audio is None:
            raise AudioProcessingException("Audio content cannot be None")

        if not target_language or not target_language.strip():
            raise AudioProcessingException("Target language cannot be empty")

        if voice_settings is None:
            raise AudioProcessingException("Voice settings cannot be None")

        # Validate that voice settings language matches target language
        if voice_settings.language != target_language:
            raise AudioProcessingException(
                f"Voice settings language ({voice_settings.language}) must match "
                f"target language ({target_language})"
            )

        # Validate audio duration for processing limits
        if audio.duration > 300:  # 5 minutes limit
            raise AudioProcessingException(
                f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
            )

        # Validate audio format is supported
        if not audio.is_valid_format:
            raise AudioProcessingException(f"Unsupported audio format: {audio.format}")

    def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
        """
        Perform speech recognition on the input audio.

        Args:
            audio: The input audio content

        Returns:
            TextContent: The transcribed text

        Raises:
            SpeechRecognitionException: If transcription fails
        """
        try:
            # Use a default STT model - this could be configurable in the future
            model = "whisper-base"  # Default model
            return self._speech_recognition_service.transcribe(audio, model)
        except Exception as e:
            raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")

    def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
        """
        Perform translation of the transcribed text.

        Args:
            text: The text to translate
            target_language: The target language for translation

        Returns:
            TextContent: The translated text

        Raises:
            TranslationFailedException: If translation fails
        """
        try:
            # Check if translation is needed
            if text.language == target_language:
                # No translation needed, return original text
                return text

            # Create translation request
            translation_request = TranslationRequest(
                source_text=text,
                target_language=target_language
            )

            return self._translation_service.translate(translation_request)
        except Exception as e:
            raise TranslationFailedException(f"Translation failed: {str(e)}")

    def _perform_speech_synthesis(
        self,
        text: 'TextContent',
        voice_settings: 'VoiceSettings'
    ) -> 'AudioContent':
        """
        Perform speech synthesis on the translated text.

        Args:
            text: The text to synthesize
            voice_settings: Voice settings for synthesis

        Returns:
            AudioContent: The synthesized audio

        Raises:
            SpeechSynthesisException: If synthesis fails
        """
        try:
            # Create speech synthesis request
            synthesis_request = SpeechSynthesisRequest(
                text_content=text,
                voice_settings=voice_settings
            )

            return self._speech_synthesis_service.synthesize(synthesis_request)
        except Exception as e:
            raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")