Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -128,8 +128,19 @@ class BasicAgent:
|
|
128 |
elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
|
129 |
print("coso Audio file detected, processing with Whisper")
|
130 |
audio_bytes = self._load_bytes(file_info)
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
elif file_info.endswith(".txt"):
|
135 |
print("coso Text file detected")
|
@@ -168,6 +179,26 @@ class BasicAgent:
|
|
168 |
)
|
169 |
return response.choices[0].message.content.strip()
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
def _transcribe_audio(self, audio_bytes: bytes) -> str:
|
172 |
audio_file = BytesIO(audio_bytes)
|
173 |
transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
@@ -183,8 +214,14 @@ class BasicAgent:
|
|
183 |
print_coso(f"_load_image error: {e}")
|
184 |
return None
|
185 |
|
186 |
-
def _load_bytes(self,
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
def _load_text(self, data: str) -> str:
|
190 |
return base64.b64decode(data).decode("utf-8")
|
@@ -269,6 +306,7 @@ def create_mock_questions():
|
|
269 |
#Tools
|
270 |
|
271 |
def transcribe_audio(file_name: str) -> str:
|
|
|
272 |
file_path = os.path.join("/data", file_name)
|
273 |
if not os.path.isfile(file_path):
|
274 |
return f"File not found: {file_path}"
|
@@ -276,7 +314,7 @@ def transcribe_audio(file_name: str) -> str:
|
|
276 |
model = whisper.load_model("base")
|
277 |
result = model.transcribe(file_path)
|
278 |
|
279 |
-
print_coso(f"
|
280 |
return result["text"]
|
281 |
|
282 |
|
|
|
128 |
elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
|
129 |
print("coso Audio file detected, processing with Whisper")
|
130 |
audio_bytes = self._load_bytes(file_info)
|
131 |
+
if audio_bytes is not None:
|
132 |
+
transcription = self._transcribe_audio(audio_bytes)
|
133 |
+
prompt_con_audio = (
|
134 |
+
f"The following is the transcription of an audio file related to the question.\n"
|
135 |
+
f"---\n"
|
136 |
+
f"{transcription}\n"
|
137 |
+
f"---\n"
|
138 |
+
f"Now, based on this transcription, answer the following question:\n"
|
139 |
+
f"{question}"
|
140 |
+
)
|
141 |
+
risposta = self._ask_gpt4o(prompt_con_audio)
|
142 |
+
else:
|
143 |
+
risposta = "Error loading audio file"
|
144 |
|
145 |
elif file_info.endswith(".txt"):
|
146 |
print("coso Text file detected")
|
|
|
179 |
)
|
180 |
return response.choices[0].message.content.strip()
|
181 |
|
182 |
+
|
183 |
+
|
184 |
+
def _ask_gpt4o_with_mp3(self, audio: Image.Image, question: str) -> str:
|
185 |
+
buffered = BytesIO()
|
186 |
+
image.save(buffered, format="PNG")
|
187 |
+
buffered.seek(0)
|
188 |
+
image_bytes = buffered.read()
|
189 |
+
|
190 |
+
response = self.client.chat.completions.create(
|
191 |
+
model="gpt-4o", #ATTENZIONE QUI MODELLO NON MINI
|
192 |
+
messages=[{
|
193 |
+
"role": "user",
|
194 |
+
"content": [
|
195 |
+
{"type": "text", "text": question},
|
196 |
+
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64.b64encode(image_bytes).decode()}}
|
197 |
+
]
|
198 |
+
}]
|
199 |
+
)
|
200 |
+
return response.choices[0].message.content.strip()
|
201 |
+
|
202 |
def _transcribe_audio(self, audio_bytes: bytes) -> str:
|
203 |
audio_file = BytesIO(audio_bytes)
|
204 |
transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
|
|
|
214 |
print_coso(f"_load_image error: {e}")
|
215 |
return None
|
216 |
|
217 |
+
def _load_bytes(self, file_name: str) -> bytes:
|
218 |
+
file_path = os.path.join("/data", file_name)
|
219 |
+
try:
|
220 |
+
with open(file_path, "rb") as f:
|
221 |
+
return f.read()
|
222 |
+
except Exception as e:
|
223 |
+
print_coso(f"Error loading file {file_path}: {e}")
|
224 |
+
return None
|
225 |
|
226 |
def _load_text(self, data: str) -> str:
|
227 |
return base64.b64decode(data).decode("utf-8")
|
|
|
306 |
#Tools
|
307 |
|
308 |
def transcribe_audio(file_name: str) -> str:
|
309 |
+
print_coso(f"usato transcribe_audio tool: {result['text']}")
|
310 |
file_path = os.path.join("/data", file_name)
|
311 |
if not os.path.isfile(file_path):
|
312 |
return f"File not found: {file_path}"
|
|
|
314 |
model = whisper.load_model("base")
|
315 |
result = model.transcribe(file_path)
|
316 |
|
317 |
+
print_coso(f"transcribe_audio tool result: {result['text']}")
|
318 |
return result["text"]
|
319 |
|
320 |
|