import numpy as np import asyncio #from .base_tool import BaseTool from models.tinygpt2_model import TinyGPT2Model from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq import os import tempfile import soundfile as sf import torch from pydantic import PrivateAttr from crewai.tools import BaseTool # class MultilingualVoiceProcessor: # def __init__(self, model_name="openai/whisper-base", device=None): # cache_dir = os.getenv("TRANSFORMERS_CACHE", None) # if device is None: # device = 0 if torch.cuda.is_available() else -1 # # Load model and processor with cache_dir # processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir) # model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir) # # Create the pipeline, DO NOT PASS cache_dir here # # self.pipe = pipeline( # # "automatic-speech-recognition", # # model=model, # # tokenizer=processor, # # feature_extractor=processor, # # device=device, # # generate_kwargs={"task": "transcribe", "return_timestamps": False}, # # ) # self.pipe = pipeline( # "automatic-speech-recognition", # model=model_name, # device=device, # generate_kwargs={"task": "transcribe", "return_timestamps": False}, # ) # async def transcribe(self, audio_data: np.ndarray, language: str = None): # with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: # sf.write(tmp_wav.name, audio_data, samplerate=16000) # extra = {"language": language} if language else {} # result = self.pipe(tmp_wav.name, **extra) # text = result['text'] # return text, language or "unknown" # async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): # raise NotImplementedError("Use gTTS or edge-tts as before.") # class TranscribeAudioTool(BaseTool): # name: str = "transcribe_audio" # description: str = "Transcribe audio to text and detect language." # model_config = {"arbitrary_types_allowed": True} # #_vp: MultilingualVoiceProcessor = PrivateAttr() # def __init__(self, config=None): # super().__init__() # self.vp = MultilingualVoiceProcessor() # def _run(self, audio_data: np.ndarray, language=None): # text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language)) # return {"text": text, "language": detected_lang} # class DetectEmotionTool(BaseTool): # name: str = "detect_emotion" # description: str = "Detect the emotional state from text." # model_config = {"arbitrary_types_allowed": True} # def __init__(self, config=None): # super().__init__() # def _run(self, text: str): # model = TinyGPT2Model() # prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' # response = model.generate(prompt) # return {"primary_emotion": "detected_emotion", # "intensity": "medium", # "feelings": ["feeling1"], # "concerns": ["concern1"]} # class GenerateReflectiveQuestionsTool(BaseTool): # name: str = "generate_reflective_questions" # description: str = "Generate reflective questions." # model_config = {"arbitrary_types_allowed": True} # def __init__(self, config=None): # super().__init__() # def _run(self, context: dict): # emotion = context.get("primary_emotion", "neutral") # questions_map = { # "anxiety": ["What triggers your anxiety?", "How do you cope?"], # "sadness": ["What helps when you feel sad?", "Who can you talk to?"] # } # return questions_map.get(emotion, [ # "How are you feeling?", # "What feels important now?" # ]) # class VoiceTools: # def __init__(self, config=None): # self.transcribe_audio = TranscribeAudioTool(config) # self.detect_emotion = DetectEmotionTool(config) # self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config) import numpy as np import asyncio from typing import List, Optional from models.tinygpt2_model import TinyGPT2Model from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq import os import tempfile import soundfile as sf import torch from crewai.tools import BaseTool class MultilingualVoiceProcessor: def __init__(self, model_name="openai/whisper-base", device=None): cache_dir = os.getenv("TRANSFORMERS_CACHE", None) if device is None: device = 0 if torch.cuda.is_available() else -1 self.pipe = pipeline( "automatic-speech-recognition", model=model_name, device=device, generate_kwargs={"task": "transcribe", "return_timestamps": False}, ) async def transcribe(self, audio_data: np.ndarray, language: str = None): with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: sf.write(tmp_wav.name, audio_data, samplerate=16000) extra = {"language": language} if language else {} result = self.pipe(tmp_wav.name, **extra) text = result['text'] return text, language or "unknown" async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): raise NotImplementedError("Use gTTS or edge-tts as before.") class TranscribeAudioTool(BaseTool): name: str = "transcribe_audio" description: str = "Transcribe audio to text and detect language." model_config = {"arbitrary_types_allowed": True} _vp: MultilingualVoiceProcessor = PrivateAttr() def __init__(self, config=None): super().__init__() self._vp = MultilingualVoiceProcessor() def _run(self, audio_data: List[float], language: Optional[str] = None): audio_np = np.array(audio_data, dtype=np.float32) text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language)) return {"text": text, "language": detected_lang} class DetectEmotionTool(BaseTool): name: str = "detect_emotion" description: str = "Detect the emotional state from text." model_config = {"arbitrary_types_allowed": True} def __init__(self, config=None): super().__init__() def _run(self, text: str): model = TinyGPT2Model() prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' response = model.generate(prompt) return {"primary_emotion": "detected_emotion", "intensity": "medium", "feelings": ["feeling1"], "concerns": ["concern1"]} class GenerateReflectiveQuestionsTool(BaseTool): name: str = "generate_reflective_questions" description: str = "Generate reflective questions." model_config = {"arbitrary_types_allowed": True} def __init__(self, config=None): super().__init__() def _run(self, context: dict): emotion = context.get("primary_emotion", "neutral") questions_map = { "anxiety": ["What triggers your anxiety?", "How do you cope?"], "sadness": ["What helps when you feel sad?", "Who can you talk to?"] } return questions_map.get(emotion, [ "How are you feeling?", "What feels important now?" ]) class VoiceTools: def __init__(self, config=None): self.transcribe_audio = TranscribeAudioTool(config) self.detect_emotion = DetectEmotionTool(config) self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)