File size: 7,804 Bytes
20d720d
 
b28635c
7a4afbb
 
 
 
 
a69210a
3019028
b28635c
6d99a60
b73fa18
 
0f20f6a
96970ff
0f20f6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4afbb
 
 
 
 
0f20f6a
 
 
 
 
7a4afbb
20d720d
7a4afbb
 
 
 
 
 
20d720d
7a4afbb
 
0f20f6a
c537b15
7a4afbb
 
d39c478
3019028
20d720d
7a4afbb
3019028
0f20f6a
 
 
20d720d
 
c537b15
7a4afbb
 
d39c478
c537b15
7a4afbb
0f20f6a
292f6f6
7a4afbb
20d720d
7a4afbb
 
 
 
20d720d
c537b15
7a4afbb
 
d39c478
c537b15
7a4afbb
0f20f6a
20d720d
 
7a4afbb
 
20d720d
 
7a4afbb
 
c537b15
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import numpy as np
import asyncio
#from .base_tool import BaseTool
from models.tinygpt2_model import TinyGPT2Model
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
import os
import tempfile
import soundfile as sf
import torch
from pydantic import PrivateAttr
from crewai.tools import BaseTool



# class MultilingualVoiceProcessor:
    
#     def __init__(self, model_name="openai/whisper-base", device=None):
#         cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
#         if device is None:
#             device = 0 if torch.cuda.is_available() else -1
        
#         # Load model and processor with cache_dir
#         processor = AutoProcessor.from_pretrained(model_name, cache_dir=cache_dir)
#         model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name, cache_dir=cache_dir)

#         # Create the pipeline, DO NOT PASS cache_dir here
#         # self.pipe = pipeline(
#         #     "automatic-speech-recognition",
#         #     model=model,
#         #     tokenizer=processor,
#         #     feature_extractor=processor,
#         #     device=device,
#         #     generate_kwargs={"task": "transcribe", "return_timestamps": False},
#         # )
#         self.pipe = pipeline(
#    "automatic-speech-recognition",
#    model=model_name,
#    device=device,
#    generate_kwargs={"task": "transcribe", "return_timestamps": False},
# )

#     async def transcribe(self, audio_data: np.ndarray, language: str = None):
#         with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
#             sf.write(tmp_wav.name, audio_data, samplerate=16000)
#             extra = {"language": language} if language else {}
#             result = self.pipe(tmp_wav.name, **extra)
#         text = result['text']
#         return text, language or "unknown"

#     async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
#         raise NotImplementedError("Use gTTS or edge-tts as before.")
# class TranscribeAudioTool(BaseTool):
#     name: str = "transcribe_audio"
#     description: str = "Transcribe audio to text and detect language."
#     model_config = {"arbitrary_types_allowed": True}
#     #_vp: MultilingualVoiceProcessor = PrivateAttr()
#     def __init__(self, config=None):
#         super().__init__()
#         self.vp = MultilingualVoiceProcessor()
#     def  _run(self, audio_data: np.ndarray, language=None):
#         text, detected_lang = asyncio.run(self.vp.transcribe(audio_data, language))
#         return {"text": text, "language": detected_lang}

# class DetectEmotionTool(BaseTool):
#     name: str = "detect_emotion"
#     description: str = "Detect the emotional state from text."
#     model_config = {"arbitrary_types_allowed": True}
#     def __init__(self, config=None):
#         super().__init__()
#     def  _run(self, text: str):
#         model = TinyGPT2Model()
#         prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
#         response = model.generate(prompt)
#         return {"primary_emotion": "detected_emotion",
#                 "intensity": "medium",
#                 "feelings": ["feeling1"],
#                 "concerns": ["concern1"]}

# class GenerateReflectiveQuestionsTool(BaseTool):
#     name: str = "generate_reflective_questions"
#     description: str = "Generate reflective questions."
#     model_config = {"arbitrary_types_allowed": True}
#     def __init__(self, config=None):
#         super().__init__()
#     def  _run(self, context: dict):
#         emotion = context.get("primary_emotion", "neutral")
#         questions_map = {
#             "anxiety": ["What triggers your anxiety?", "How do you cope?"],
#             "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
#         }
#         return questions_map.get(emotion, [
#             "How are you feeling?",
#             "What feels important now?"
#         ])

# class VoiceTools:
#     def __init__(self, config=None):
#         self.transcribe_audio = TranscribeAudioTool(config)
#         self.detect_emotion = DetectEmotionTool(config)
#         self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)
import numpy as np
import asyncio
from typing import List, Optional
from models.tinygpt2_model import TinyGPT2Model
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
import os
import tempfile
import soundfile as sf
import torch
from crewai.tools import BaseTool

class MultilingualVoiceProcessor:
    def __init__(self, model_name="openai/whisper-base", device=None):
        cache_dir = os.getenv("TRANSFORMERS_CACHE", None)
        if device is None:
            device = 0 if torch.cuda.is_available() else -1
        self.pipe = pipeline(
            "automatic-speech-recognition",
            model=model_name,
            device=device,
            generate_kwargs={"task": "transcribe", "return_timestamps": False},
        )

    async def transcribe(self, audio_data: np.ndarray, language: str = None):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp_wav:
            sf.write(tmp_wav.name, audio_data, samplerate=16000)
            extra = {"language": language} if language else {}
            result = self.pipe(tmp_wav.name, **extra)
        text = result['text']
        return text, language or "unknown"

    async def synthesize(self, text, language: str = "en", voice_type: str = "normal"):
        raise NotImplementedError("Use gTTS or edge-tts as before.")

class TranscribeAudioTool(BaseTool):
    name: str = "transcribe_audio"
    description: str = "Transcribe audio to text and detect language."
    model_config = {"arbitrary_types_allowed": True}
    _vp: MultilingualVoiceProcessor = PrivateAttr()
    def __init__(self, config=None):
        super().__init__()
        self._vp = MultilingualVoiceProcessor()
    def _run(self, audio_data: List[float], language: Optional[str] = None):
        audio_np = np.array(audio_data, dtype=np.float32)
        text, detected_lang = asyncio.run(self.vp.transcribe(audio_np, language))
        return {"text": text, "language": detected_lang}

class DetectEmotionTool(BaseTool):
    name: str = "detect_emotion"
    description: str = "Detect the emotional state from text."
    model_config = {"arbitrary_types_allowed": True}
    def __init__(self, config=None):
        super().__init__()
    def _run(self, text: str):
        model = TinyGPT2Model()
        prompt = f'Analyse emotions in: "{text}". Format: JSON with primary_emotion, intensity, feelings, concerns.'
        response = model.generate(prompt)
        return {"primary_emotion": "detected_emotion",
                "intensity": "medium",
                "feelings": ["feeling1"],
                "concerns": ["concern1"]}

class GenerateReflectiveQuestionsTool(BaseTool):
    name: str = "generate_reflective_questions"
    description: str = "Generate reflective questions."
    model_config = {"arbitrary_types_allowed": True}
    def __init__(self, config=None):
        super().__init__()
    def _run(self, context: dict):
        emotion = context.get("primary_emotion", "neutral")
        questions_map = {
            "anxiety": ["What triggers your anxiety?", "How do you cope?"],
            "sadness": ["What helps when you feel sad?", "Who can you talk to?"]
        }
        return questions_map.get(emotion, [
            "How are you feeling?",
            "What feels important now?"
        ])

class VoiceTools:
    def __init__(self, config=None):
        self.transcribe_audio = TranscribeAudioTool(config)
        self.detect_emotion = DetectEmotionTool(config)
        self.generate_reflective_questions = GenerateReflectiveQuestionsTool(config)