Spaces:

jayashree
/

TatTwamAI

Sleeping

TatTwamAI / agents /tools /voice_tools.py

Jayashree Sridhar

added privateattr from pydantic

3019028 about 2 months ago

7.8 kB

	import numpy as np
	import asyncio
	#from .base_tool import BaseTool
	from models.tinygpt2_model import TinyGPT2Model
	from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
	import os
	import tempfile
	import soundfile as sf
	import torch
	from pydantic import PrivateAttr
	from crewai.tools import BaseTool



	# class MultilingualVoiceProcessor:

	# def __init__(self, model_name="openai/whisper-base", device=None):
	# cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
	# if device is None:
	# device = 0 if torch.cuda.is_available() else -1

	# # Load model and processor with cache_dir
	# processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
	# model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)

	# # Create the pipeline, DO NOT PASS cache_dir here
	# # self.pipe = pipeline(
	# # "automatic-speech-recognition",
	# # model=model,
	# # tokenizer=processor,
	# # feature_extractor=processor,
	# # device=device,
	# # generate_kwargs={"task": "transcribe", "return_timestamps": False},
	# # )
	# self.pipe = pipeline(
	# "automatic-speech-recognition",
	# model=model_name,
	# device=device,
	# generate_kwargs={"task": "transcribe", "return_timestamps": False},
	# )

	# async def transcribe(self, audio_data: np.ndarray, language: str = None):
	# with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
	# sf.write(tmp_wav.name, audio_data, samplerate=16000)
	# extra = {"language": language} if language else {}
	# result = self.pipe(tmp_wav.name, **extra)
	# text = result['text']
	# return text, language or "unknown"

	# async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
	# raise NotImplementedError("Use gTTS or edge-tts as before.")
	# class TranscribeAudioTool(BaseTool):
	# name: str = "transcribe_audio"
	# description: str = "Transcribe audio to text and detect language."
	# model_config = {"arbitrary_types_allowed": True}
	# #_vp: MultilingualVoiceProcessor = PrivateAttr()
	# def __init__(self, config=None):
	# super().__init__()
	# self.vp = MultilingualVoiceProcessor()
	# def _run(self, audio_data: np.ndarray, language=None):
	# text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
	# return {"text": text, "language": detected_lang}

	# class DetectEmotionTool(BaseTool):
	# name: str = "detect_emotion"
	# description: str = "Detect the emotional state from text."
	# model_config = {"arbitrary_types_allowed": True}
	# def __init__(self, config=None):
	# super().__init__()
	# def _run(self, text: str):
	# model = TinyGPT2Model()
	# prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
	# response = model.generate(prompt)
	# return {"primary_emotion": "detected_emotion",
	# "intensity": "medium",
	# "feelings": ["feeling1"],
	# "concerns": ["concern1"]}

	# class GenerateReflectiveQuestionsTool(BaseTool):
	# name: str = "generate_reflective_questions"
	# description: str = "Generate reflective questions."
	# model_config = {"arbitrary_types_allowed": True}
	# def __init__(self, config=None):
	# super().__init__()
	# def _run(self, context: dict):
	# emotion = context.get("primary_emotion", "neutral")
	# questions_map = {
	# "anxiety": ["What triggers your anxiety?", "How do you cope?"],
	# "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
	# }
	# return questions_map.get(emotion, [
	# "How are you feeling?",
	# "What feels important now?"
	# ])

	# class VoiceTools:
	# def __init__(self, config=None):
	# self.transcribe_audio = TranscribeAudioTool(config)
	# self.detect_emotion = DetectEmotionTool(config)
	# self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)
	import numpy as np
	import asyncio
	from typing import List, Optional
	from models.tinygpt2_model import TinyGPT2Model
	from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
	import os
	import tempfile
	import soundfile as sf
	import torch
	from crewai.tools import BaseTool

	class MultilingualVoiceProcessor:
	def __init__(self, model_name="openai/whisper-base", device=None):
	cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
	if device is None:
	device = 0 if torch.cuda.is_available() else -1
	self.pipe = pipeline(
	"automatic-speech-recognition",
	model=model_name,
	device=device,
	generate_kwargs={"task": "transcribe", "return_timestamps": False},
	)

	async def transcribe(self, audio_data: np.ndarray, language: str = None):
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
	sf.write(tmp_wav.name, audio_data, samplerate=16000)
	extra = {"language": language} if language else {}
	result = self.pipe(tmp_wav.name, **extra)
	text = result['text']
	return text, language or "unknown"

	async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
	raise NotImplementedError("Use gTTS or edge-tts as before.")

	class TranscribeAudioTool(BaseTool):
	name: str = "transcribe_audio"
	description: str = "Transcribe audio to text and detect language."
	model_config = {"arbitrary_types_allowed": True}
	_vp: MultilingualVoiceProcessor = PrivateAttr()
	def __init__(self, config=None):
	super().__init__()
	self._vp = MultilingualVoiceProcessor()
	def _run(self, audio_data: List[float], language: Optional[str] = None):
	audio_np = np.array(audio_data, dtype=np.float32)
	text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language))
	return {"text": text, "language": detected_lang}

	class DetectEmotionTool(BaseTool):
	name: str = "detect_emotion"
	description: str = "Detect the emotional state from text."
	model_config = {"arbitrary_types_allowed": True}
	def __init__(self, config=None):
	super().__init__()
	def _run(self, text: str):
	model = TinyGPT2Model()
	prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
	response = model.generate(prompt)
	return {"primary_emotion": "detected_emotion",
	"intensity": "medium",
	"feelings": ["feeling1"],
	"concerns": ["concern1"]}

	class GenerateReflectiveQuestionsTool(BaseTool):
	name: str = "generate_reflective_questions"
	description: str = "Generate reflective questions."
	model_config = {"arbitrary_types_allowed": True}
	def __init__(self, config=None):
	super().__init__()
	def _run(self, context: dict):
	emotion = context.get("primary_emotion", "neutral")
	questions_map = {
	"anxiety": ["What triggers your anxiety?", "How do you cope?"],
	"sadness": ["What helps when you feel sad?", "Who can you talk to?"]
	}
	return questions_map.get(emotion, [
	"How are you feeling?",
	"What feels important now?"
	])

	class VoiceTools:
	def __init__(self, config=None):
	self.transcribe_audio = TranscribeAudioTool(config)
	self.detect_emotion = DetectEmotionTool(config)
	self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)