Spaces:

GattoNero
/

Final_test_course

Running

App Files Files Community

GattoNero commited on 26 days ago

Commit

8749396

verified ·

1 Parent(s): a98c158

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -5

app.py CHANGED Viewed

@@ -128,8 +128,19 @@ class BasicAgent:
         elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
             print("coso Audio file detected, processing with Whisper")
             audio_bytes = self._load_bytes(file_info)
-            transcription = self._transcribe_audio(audio_bytes)
-            risposta = self._ask_gpt4o(transcription)
         elif file_info.endswith(".txt"):
             print("coso Text file detected")
@@ -168,6 +179,26 @@ class BasicAgent:
         )
         return response.choices[0].message.content.strip()
     def _transcribe_audio(self, audio_bytes: bytes) -> str:
         audio_file = BytesIO(audio_bytes)
         transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
@@ -183,8 +214,14 @@ class BasicAgent:
             print_coso(f"_load_image error: {e}")
             return None
-    def _load_bytes(self, data: str) -> bytes:
-        return base64.b64decode(data)
     def _load_text(self, data: str) -> str:
         return base64.b64decode(data).decode("utf-8")
@@ -269,6 +306,7 @@ def create_mock_questions():
 #Tools
 def transcribe_audio(file_name: str) -> str:
     file_path = os.path.join("/data", file_name)
     if not os.path.isfile(file_path):
         return f"File not found: {file_path}"
@@ -276,7 +314,7 @@ def transcribe_audio(file_name: str) -> str:
     model = whisper.load_model("base")
     result = model.transcribe(file_path)
-    print_coso(f"usato transcribe_audio tool: {result['text']}")
     return result["text"]

         elif file_info.endswith(".wav") or file_info.endswith(".mp3"):
             print("coso Audio file detected, processing with Whisper")
             audio_bytes = self._load_bytes(file_info)
+            if audio_bytes is not None:
+                transcription = self._transcribe_audio(audio_bytes)
+                prompt_con_audio = (
+                    f"The following is the transcription of an audio file related to the question.\n"
+                    f"---\n"
+                    f"{transcription}\n"
+                    f"---\n"
+                    f"Now, based on this transcription, answer the following question:\n"
+                    f"{question}"
+                )
+                risposta = self._ask_gpt4o(prompt_con_audio)
+            else:
+                risposta = "Error loading audio file"
         elif file_info.endswith(".txt"):
             print("coso Text file detected")
         )
         return response.choices[0].message.content.strip()
+    def _ask_gpt4o_with_mp3(self, audio: Image.Image, question: str) -> str:
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        buffered.seek(0)
+        image_bytes = buffered.read()
+        response = self.client.chat.completions.create(
+            model="gpt-4o",  #ATTENZIONE QUI MODELLO NON MINI
+            messages=[{
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": question},
+                    {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64.b64encode(image_bytes).decode()}}
+                ]
+            }]
+        )
+        return response.choices[0].message.content.strip()
     def _transcribe_audio(self, audio_bytes: bytes) -> str:
         audio_file = BytesIO(audio_bytes)
         transcription = self.client.audio.transcriptions.create(model="whisper-1", file=audio_file)
             print_coso(f"_load_image error: {e}")
             return None
+    def _load_bytes(self, file_name: str) -> bytes:
+    file_path = os.path.join("/data", file_name)
+    try:
+        with open(file_path, "rb") as f:
+            return f.read()
+    except Exception as e:
+        print_coso(f"Error loading file {file_path}: {e}")
+        return None
     def _load_text(self, data: str) -> str:
         return base64.b64decode(data).decode("utf-8")
 #Tools
 def transcribe_audio(file_name: str) -> str:
+    print_coso(f"usato transcribe_audio tool: {result['text']}")
     file_path = os.path.join("/data", file_name)
     if not os.path.isfile(file_path):
         return f"File not found: {file_path}"
     model = whisper.load_model("base")
     result = model.transcribe(file_path)
+    print_coso(f"transcribe_audio tool result: {result['text']}")
     return result["text"]