import numpy as np | |
import asyncio | |
#from .base_tool import BaseTool | |
from models.tinygpt2_model import TinyGPT2Model | |
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq | |
import os | |
import tempfile | |
import soundfile as sf | |
import torch | |
from pydantic import PrivateAttr | |
from crewai.tools import BaseTool | |
# class MultilingualVoiceProcessor: | |
# def __init__(self, model_name="openai/whisper-base", device=None): | |
# cache_dir = os.getenv("TRANSFORMERS_CACHE", None) | |
# if device is None: | |
# device = 0 if torch.cuda.is_available() else -1 | |
# # Load model and processor with cache_dir | |
# processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir) | |
# model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir) | |
# # Create the pipeline, DO NOT PASS cache_dir here | |
# # self.pipe = pipeline( | |
# # "automatic-speech-recognition", | |
# # model=model, | |
# # tokenizer=processor, | |
# # feature_extractor=processor, | |
# # device=device, | |
# # generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
# # ) | |
# self.pipe = pipeline( | |
# "automatic-speech-recognition", | |
# model=model_name, | |
# device=device, | |
# generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
# ) | |
# async def transcribe(self, audio_data: np.ndarray, language: str = None): | |
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: | |
# sf.write(tmp_wav.name, audio_data, samplerate=16000) | |
# extra = {"language": language} if language else {} | |
# result = self.pipe(tmp_wav.name, **extra) | |
# text = result['text'] | |
# return text, language or "unknown" | |
# async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): | |
# raise NotImplementedError("Use gTTS or edge-tts as before.") | |
# class TranscribeAudioTool(BaseTool): | |
# name: str = "transcribe_audio" | |
# description: str = "Transcribe audio to text and detect language." | |
# model_config = {"arbitrary_types_allowed": True} | |
# #_vp: MultilingualVoiceProcessor = PrivateAttr() | |
# def __init__(self, config=None): | |
# super().__init__() | |
# self.vp = MultilingualVoiceProcessor() | |
# def _run(self, audio_data: np.ndarray, language=None): | |
# text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language)) | |
# return {"text": text, "language": detected_lang} | |
# class DetectEmotionTool(BaseTool): | |
# name: str = "detect_emotion" | |
# description: str = "Detect the emotional state from text." | |
# model_config = {"arbitrary_types_allowed": True} | |
# def __init__(self, config=None): | |
# super().__init__() | |
# def _run(self, text: str): | |
# model = TinyGPT2Model() | |
# prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' | |
# response = model.generate(prompt) | |
# return {"primary_emotion": "detected_emotion", | |
# "intensity": "medium", | |
# "feelings": ["feeling1"], | |
# "concerns": ["concern1"]} | |
# class GenerateReflectiveQuestionsTool(BaseTool): | |
# name: str = "generate_reflective_questions" | |
# description: str = "Generate reflective questions." | |
# model_config = {"arbitrary_types_allowed": True} | |
# def __init__(self, config=None): | |
# super().__init__() | |
# def _run(self, context: dict): | |
# emotion = context.get("primary_emotion", "neutral") | |
# questions_map = { | |
# "anxiety": ["What triggers your anxiety?", "How do you cope?"], | |
# "sadness": ["What helps when you feel sad?", "Who can you talk to?"] | |
# } | |
# return questions_map.get(emotion, [ | |
# "How are you feeling?", | |
# "What feels important now?" | |
# ]) | |
# class VoiceTools: | |
# def __init__(self, config=None): | |
# self.transcribe_audio = TranscribeAudioTool(config) | |
# self.detect_emotion = DetectEmotionTool(config) | |
# self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config) | |
import numpy as np | |
import asyncio | |
from typing import List, Optional | |
from models.tinygpt2_model import TinyGPT2Model | |
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq | |
import os | |
import tempfile | |
import soundfile as sf | |
import torch | |
from crewai.tools import BaseTool | |
class MultilingualVoiceProcessor: | |
def __init__(self, model_name="openai/whisper-base", device=None): | |
cache_dir = os.getenv("TRANSFORMERS_CACHE", None) | |
if device is None: | |
device = 0 if torch.cuda.is_available() else -1 | |
self.pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_name, | |
device=device, | |
generate_kwargs={"task": "transcribe", "return_timestamps": False}, | |
) | |
async def transcribe(self, audio_data: np.ndarray, language: str = None): | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav: | |
sf.write(tmp_wav.name, audio_data, samplerate=16000) | |
extra = {"language": language} if language else {} | |
result = self.pipe(tmp_wav.name, **extra) | |
text = result['text'] | |
return text, language or "unknown" | |
async def synthesize(self, text, language: str = "en", voice_type: str = "normal"): | |
raise NotImplementedError("Use gTTS or edge-tts as before.") | |
class TranscribeAudioTool(BaseTool): | |
name: str = "transcribe_audio" | |
description: str = "Transcribe audio to text and detect language." | |
model_config = {"arbitrary_types_allowed": True} | |
_vp: MultilingualVoiceProcessor = PrivateAttr() | |
def __init__(self, config=None): | |
super().__init__() | |
self._vp = MultilingualVoiceProcessor() | |
def _run(self, audio_data: List[float], language: Optional[str] = None): | |
audio_np = np.array(audio_data, dtype=np.float32) | |
text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language)) | |
return {"text": text, "language": detected_lang} | |
class DetectEmotionTool(BaseTool): | |
name: str = "detect_emotion" | |
description: str = "Detect the emotional state from text." | |
model_config = {"arbitrary_types_allowed": True} | |
def __init__(self, config=None): | |
super().__init__() | |
def _run(self, text: str): | |
model = TinyGPT2Model() | |
prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.' | |
response = model.generate(prompt) | |
return {"primary_emotion": "detected_emotion", | |
"intensity": "medium", | |
"feelings": ["feeling1"], | |
"concerns": ["concern1"]} | |
class GenerateReflectiveQuestionsTool(BaseTool): | |
name: str = "generate_reflective_questions" | |
description: str = "Generate reflective questions." | |
model_config = {"arbitrary_types_allowed": True} | |
def __init__(self, config=None): | |
super().__init__() | |
def _run(self, context: dict): | |
emotion = context.get("primary_emotion", "neutral") | |
questions_map = { | |
"anxiety": ["What triggers your anxiety?", "How do you cope?"], | |
"sadness": ["What helps when you feel sad?", "Who can you talk to?"] | |
} | |
return questions_map.get(emotion, [ | |
"How are you feeling?", | |
"What feels important now?" | |
]) | |
class VoiceTools: | |
def __init__(self, config=None): | |
self.transcribe_audio = TranscribeAudioTool(config) | |
self.detect_emotion = DetectEmotionTool(config) | |
self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config) |