GattoNero commited on
Commit
8749396
·
verified ·
1 Parent(s): a98c158

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -5
app.py CHANGED
@@ -128,8 +128,19 @@ class BasicAgent:
128
  elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
129
  print("coso Audio file detected, processing with Whisper")
130
  audio_bytes = self._load_bytes(file_info)
131
- transcription = self._transcribe_audio(audio_bytes)
132
- risposta = self._ask_gpt4o(transcription)
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  elif file_info.endswith(".txt"):
135
  print("coso Text file detected")
@@ -168,6 +179,26 @@ class BasicAgent:
168
  )
169
  return response.choices[0].message.content.strip()
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def _transcribe_audio(self, audio_bytes: bytes) -> str:
172
  audio_file = BytesIO(audio_bytes)
173
  transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
@@ -183,8 +214,14 @@ class BasicAgent:
183
  print_coso(f"_load_image error: {e}")
184
  return None
185
 
186
- def _load_bytes(self, data: str) -> bytes:
187
- return base64.b64decode(data)
 
 
 
 
 
 
188
 
189
  def _load_text(self, data: str) -> str:
190
  return base64.b64decode(data).decode("utf-8")
@@ -269,6 +306,7 @@ def create_mock_questions():
269
  #Tools
270
 
271
  def transcribe_audio(file_name: str) -> str:
 
272
  file_path = os.path.join("/data", file_name)
273
  if not os.path.isfile(file_path):
274
  return f"File not found: {file_path}"
@@ -276,7 +314,7 @@ def transcribe_audio(file_name: str) -> str:
276
  model = whisper.load_model("base")
277
  result = model.transcribe(file_path)
278
 
279
- print_coso(f"usato transcribe_audio tool: {result['text']}")
280
  return result["text"]
281
 
282
 
 
128
  elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
129
  print("coso Audio file detected, processing with Whisper")
130
  audio_bytes = self._load_bytes(file_info)
131
+ if audio_bytes is not None:
132
+ transcription = self._transcribe_audio(audio_bytes)
133
+ prompt_con_audio = (
134
+ f"The following is the transcription of an audio file related to the question.\n"
135
+ f"---\n"
136
+ f"{transcription}\n"
137
+ f"---\n"
138
+ f"Now, based on this transcription, answer the following question:\n"
139
+ f"{question}"
140
+ )
141
+ risposta = self._ask_gpt4o(prompt_con_audio)
142
+ else:
143
+ risposta = "Error loading audio file"
144
 
145
  elif file_info.endswith(".txt"):
146
  print("coso Text file detected")
 
179
  )
180
  return response.choices[0].message.content.strip()
181
 
182
+
183
+
184
+ def _ask_gpt4o_with_mp3(self, audio: Image.Image, question: str) -> str:
185
+ buffered = BytesIO()
186
+ image.save(buffered, format="PNG")
187
+ buffered.seek(0)
188
+ image_bytes = buffered.read()
189
+
190
+ response = self.client.chat.completions.create(
191
+ model="gpt-4o", #ATTENZIONE QUI MODELLO NON MINI
192
+ messages=[{
193
+ "role": "user",
194
+ "content": [
195
+ {"type": "text", "text": question},
196
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64.b64encode(image_bytes).decode()}}
197
+ ]
198
+ }]
199
+ )
200
+ return response.choices[0].message.content.strip()
201
+
202
  def _transcribe_audio(self, audio_bytes: bytes) -> str:
203
  audio_file = BytesIO(audio_bytes)
204
  transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
 
214
  print_coso(f"_load_image error: {e}")
215
  return None
216
 
217
+ def _load_bytes(self, file_name: str) -> bytes:
218
+ file_path = os.path.join("/data", file_name)
219
+ try:
220
+ with open(file_path, "rb") as f:
221
+ return f.read()
222
+ except Exception as e:
223
+ print_coso(f"Error loading file {file_path}: {e}")
224
+ return None
225
 
226
  def _load_text(self, data: str) -> str:
227
  return base64.b64decode(data).decode("utf-8")
 
306
  #Tools
307
 
308
  def transcribe_audio(file_name: str) -> str:
309
+ print_coso(f"usato transcribe_audio tool: {result['text']}")
310
  file_path = os.path.join("/data", file_name)
311
  if not os.path.isfile(file_path):
312
  return f"File not found: {file_path}"
 
314
  model = whisper.load_model("base")
315
  result = model.transcribe(file_path)
316
 
317
+ print_coso(f"transcribe_audio tool result: {result['text']}")
318
  return result["text"]
319
 
320