Spaces:

jayashree
/

TatTwamAI

Sleeping

File size: 7,804 Bytes

20d720d
 
b28635c
7a4afbb
 
 
 
 
a69210a
3019028
b28635c
6d99a60
b73fa18
 
0f20f6a
96970ff
0f20f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4afbb
 
 
 
 
0f20f6a
 
 
 
 
7a4afbb
20d720d
7a4afbb
 
 
 
 
 
20d720d
7a4afbb
 
0f20f6a
c537b15
7a4afbb
 
d39c478
3019028
20d720d
7a4afbb
3019028
0f20f6a
 
 
20d720d
 
c537b15
7a4afbb
 
d39c478
c537b15
7a4afbb
0f20f6a
292f6f6
7a4afbb
20d720d
7a4afbb
 
 
 
20d720d
c537b15
7a4afbb
 
d39c478
c537b15
7a4afbb
0f20f6a
20d720d
 
7a4afbb
 
20d720d
 
7a4afbb
 
c537b15

import numpy as np
import asyncio
#from .base_tool import BaseTool
from models.tinygpt2_model import TinyGPT2Model
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
import os
import tempfile
import soundfile as sf
import torch
from pydantic import PrivateAttr
from crewai.tools import BaseTool



# class MultilingualVoiceProcessor:
    
#     def __init__(self, model_name="openai/whisper-base", device=None):
#         cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
#         if device is None:
#             device = 0 if torch.cuda.is_available() else -1
        
#         # Load model and processor with cache_dir
#         processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
#         model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)

#         # Create the pipeline, DO NOT PASS cache_dir here
#         # self.pipe = pipeline(
#         #     "automatic-speech-recognition",
#         #     model=model,
#         #     tokenizer=processor,
#         #     feature_extractor=processor,
#         #     device=device,
#         #     generate_kwargs={"task": "transcribe", "return_timestamps": False},
#         # )
#         self.pipe = pipeline(
#    "automatic-speech-recognition",
#    model=model_name,
#    device=device,
#    generate_kwargs={"task": "transcribe", "return_timestamps": False},
# )

#     async def transcribe(self, audio_data: np.ndarray, language: str = None):
#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
#             sf.write(tmp_wav.name, audio_data, samplerate=16000)
#             extra = {"language": language} if language else {}
#             result = self.pipe(tmp_wav.name, **extra)
#         text = result['text']
#         return text, language or "unknown"

#     async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
#         raise NotImplementedError("Use gTTS or edge-tts as before.")
# class TranscribeAudioTool(BaseTool):
#     name: str = "transcribe_audio"
#     description: str = "Transcribe audio to text and detect language."
#     model_config = {"arbitrary_types_allowed": True}
#     #_vp: MultilingualVoiceProcessor = PrivateAttr()
#     def __init__(self, config=None):
#         super().__init__()
#         self.vp = MultilingualVoiceProcessor()
#     def  _run(self, audio_data: np.ndarray, language=None):
#         text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
#         return {"text": text, "language": detected_lang}

# class DetectEmotionTool(BaseTool):
#     name: str = "detect_emotion"
#     description: str = "Detect the emotional state from text."
#     model_config = {"arbitrary_types_allowed": True}
#     def __init__(self, config=None):
#         super().__init__()
#     def  _run(self, text: str):
#         model = TinyGPT2Model()
#         prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
#         response = model.generate(prompt)
#         return {"primary_emotion": "detected_emotion",
#                 "intensity": "medium",
#                 "feelings": ["feeling1"],
#                 "concerns": ["concern1"]}

# class GenerateReflectiveQuestionsTool(BaseTool):
#     name: str = "generate_reflective_questions"
#     description: str = "Generate reflective questions."
#     model_config = {"arbitrary_types_allowed": True}
#     def __init__(self, config=None):
#         super().__init__()
#     def  _run(self, context: dict):
#         emotion = context.get("primary_emotion", "neutral")
#         questions_map = {
#             "anxiety": ["What triggers your anxiety?", "How do you cope?"],
#             "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
#         }
#         return questions_map.get(emotion, [
#             "How are you feeling?",
#             "What feels important now?"
#         ])

# class VoiceTools:
#     def __init__(self, config=None):
#         self.transcribe_audio = TranscribeAudioTool(config)
#         self.detect_emotion = DetectEmotionTool(config)
#         self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)
import numpy as np
import asyncio
from typing import List, Optional
from models.tinygpt2_model import TinyGPT2Model
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
import os
import tempfile
import soundfile as sf
import torch
from crewai.tools import BaseTool

class MultilingualVoiceProcessor:
    def __init__(self, model_name="openai/whisper-base", device=None):
        cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
        if device is None:
            device = 0 if torch.cuda.is_available() else -1
        self.pipe = pipeline(
            "automatic-speech-recognition",
            model=model_name,
            device=device,
            generate_kwargs={"task": "transcribe", "return_timestamps": False},
        )

    async def transcribe(self, audio_data: np.ndarray, language: str = None):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
            sf.write(tmp_wav.name, audio_data, samplerate=16000)
            extra = {"language": language} if language else {}
            result = self.pipe(tmp_wav.name, **extra)
        text = result['text']
        return text, language or "unknown"

    async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
        raise NotImplementedError("Use gTTS or edge-tts as before.")

class TranscribeAudioTool(BaseTool):
    name: str = "transcribe_audio"
    description: str = "Transcribe audio to text and detect language."
    model_config = {"arbitrary_types_allowed": True}
    _vp: MultilingualVoiceProcessor = PrivateAttr()
    def __init__(self, config=None):
        super().__init__()
        self._vp = MultilingualVoiceProcessor()
    def _run(self, audio_data: List[float], language: Optional[str] = None):
        audio_np = np.array(audio_data, dtype=np.float32)
        text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language))
        return {"text": text, "language": detected_lang}

class DetectEmotionTool(BaseTool):
    name: str = "detect_emotion"
    description: str = "Detect the emotional state from text."
    model_config = {"arbitrary_types_allowed": True}
    def __init__(self, config=None):
        super().__init__()
    def _run(self, text: str):
        model = TinyGPT2Model()
        prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
        response = model.generate(prompt)
        return {"primary_emotion": "detected_emotion",
                "intensity": "medium",
                "feelings": ["feeling1"],
                "concerns": ["concern1"]}

class GenerateReflectiveQuestionsTool(BaseTool):
    name: str = "generate_reflective_questions"
    description: str = "Generate reflective questions."
    model_config = {"arbitrary_types_allowed": True}
    def __init__(self, config=None):
        super().__init__()
    def _run(self, context: dict):
        emotion = context.get("primary_emotion", "neutral")
        questions_map = {
            "anxiety": ["What triggers your anxiety?", "How do you cope?"],
            "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
        }
        return questions_map.get(emotion, [
            "How are you feeling?",
            "What feels important now?"
        ])

class VoiceTools:
    def __init__(self, config=None):
        self.transcribe_audio = TranscribeAudioTool(config)
        self.detect_emotion = DetectEmotionTool(config)
        self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)