""" Multilingual Voice Processing Tools STT and TTS with language support """ import whisper import numpy as np from gtts import gTTS import edge_tts import io import asyncio from typing import Tuple, Optional from crewai.tools import BaseTool import speech_recognition as sr class MultilingualVoiceProcessor: """Handles multilingual STT and TTS""" def __init__(self): # Load Whisper model for multilingual STT self.whisper_model = whisper.load_model("base") # Language voice mappings for Edge TTS self.voice_map = { "en": "en-US-AriaNeural", "es": "es-ES-ElviraNeural", "fr": "fr-FR-DeniseNeural", "de": "de-DE-KatjaNeural", "it": "it-IT-ElsaNeural", "pt": "pt-BR-FranciscaNeural", "hi": "hi-IN-SwaraNeural", "zh": "zh-CN-XiaoxiaoNeural", "ja": "ja-JP-NanamiNeural", "ko": "ko-KR-SunHiNeural", "ar": "ar-SA-ZariyahNeural", "ru": "ru-RU-SvetlanaNeural" } async def transcribe( self, audio_data: np.ndarray, language: Optional[str] = None ) -> Tuple[str, str]: """Transcribe audio to text with language detection""" try: # Process audio if isinstance(audio_data, tuple): sample_rate, audio = audio_data else: audio = audio_data sample_rate = 16000 # Normalize audio if audio.dtype != np.float32: audio = audio.astype(np.float32) / 32768.0 # Transcribe with Whisper if language and language != "auto": result = self.whisper_model.transcribe( audio, language=language ) else: # Auto-detect language result = self.whisper_model.transcribe(audio) text = result["text"] detected_language = result["language"] return text, detected_language except Exception as e: print(f"Transcription error: {e}") return "Could not transcribe audio", "en" async def synthesize( self, text: str, language: str = "en", voice_type: str = "normal" ) -> bytes: """Convert text to speech with voice modulation""" try: voice = self.voice_map.get(language, "en-US-AriaNeural") # Apply voice settings for meditation tone if voice_type == "meditation": rate = "-15%" # Slower pitch = "-50Hz" # Lower pitch else: rate = "+0%" pitch = "+0Hz" # Generate speech communicate = edge_tts.Communicate( text, voice, rate=rate, pitch=pitch ) audio_data = b"" async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_data += chunk["data"] return audio_data except Exception as e: print(f"TTS error: {e}") # Fallback to gTTS try: tts = gTTS(text=text, lang=language[:2]) fp = io.BytesIO() tts.write_to_fp(fp) return fp.getvalue() except: return None class TranscribeTool(BaseTool): name: str = "transcribe_audio" description: str = "Transcribe audio input to text with language detection" def _run(self, audio_data: np.ndarray, language: str = None) -> dict: processor = MultilingualVoiceProcessor() text, detected_lang = asyncio.run( processor.transcribe(audio_data, language) ) return { "text": text, "language": detected_lang } class DetectEmotionTool(BaseTool): name: str = "detect_emotion" description: str = "Detect emotional state from text using Mistral" def _run(self, text: str) -> dict: # Use Mistral for emotion detection from models.mistral_model import MistralModel model = MistralModel() prompt = f""" Analyze the emotional state in this text: "{text}" Identify: 1. Primary emotion (joy, sadness, anger, fear, anxiety, confusion, etc.) 2. Emotional intensity (low, medium, high) 3. Underlying feelings 4. Key concerns Format as JSON with keys: primary_emotion, intensity, feelings, concerns """ response = model.generate(prompt) # Parse response (simplified) return { "primary_emotion": "detected_emotion", "intensity": "medium", "feelings": ["feeling1", "feeling2"], "concerns": ["concern1", "concern2"] } class GenerateQuestionsTool(BaseTool): name: str = "generate_reflective_questions" description: str = "Generate empathetic reflective questions" def _run(self, context: dict) -> list: emotion = context.get("primary_emotion", "neutral") questions_map = { "anxiety": [ "What specific thoughts are creating this anxiety?", "What would feeling calm look like in this situation?", "What has helped you manage anxiety before?" ], "sadness": [ "What would comfort mean to you right now?", "What are you grieving or missing?", "How can you be gentle with yourself today?" ], "confusion": [ "What would clarity feel like?", "What's the main question you're grappling with?", "What does your intuition tell you?" ] } return questions_map.get(emotion, [ "How are you feeling in this moment?", "What would support look like for you?", "What's most important to explore right now?" ])