Spaces:

fdaudens
/

colqwen-omni-demo

Running on Zero

App Files Files Community

feat: Enable MCP

by multimodalart HF Staff - opened 30 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+22

-3

Files changed (1) hide show

app.py +22 -3

app.py CHANGED Viewed

@@ -113,7 +113,26 @@ def audio_to_base64(data, rate=16000):
     return encoded_string
 def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False, openai_key=None):
-    """Main processing function"""
     if not audio_file_path:
         return "Please upload an audio file", None, None
@@ -210,7 +229,7 @@ with gr.Blocks(title="AudioRAG Demo") as demo:
     gr.Examples(
         examples=[
-            ["test.m4a", "Who’s the podcast host?", 30],
         ],
         inputs=[audio_input, query_input, chunk_length]
     )
@@ -224,4 +243,4 @@ with gr.Blocks(title="AudioRAG Demo") as demo:
 if __name__ == "__main__":
     # Load model on startup
     load_model()
-    demo.launch()

     return encoded_string
 def process_audio_rag(audio_file_path, query, chunk_length=30, use_openai=False, openai_key=None):
+    """
+    Main processing function for audio RAG (Retrieval-Augmented Generation).
+    This function takes an audio file, splits it into chunks, embeds each chunk,
+    searches for the most relevant chunks based on a query, and optionally generates
+    a textual answer using OpenAI's API.
+    Args:
+        audio_file_path (str): Path to the uploaded audio file.
+        query (str): Search query to find relevant audio chunks.
+        chunk_length (int): Length of each audio chunk in seconds. Defaults to 30.
+        use_openai (bool): Whether to use OpenAI API for answer generation. Defaults to False.
+        openai_key (str): OpenAI API key for generating textual answers. Defaults to None.
+    Returns:
+        tuple: A tuple containing:
+            - result_text (str): Text describing the search results and optional AI-generated answer.
+            - first_chunk_path (str): Path to the saved audio file of the top matching chunk.
+            - fig (matplotlib.figure.Figure): Matplotlib figure showing the waveform of the top chunk.
+    """
     if not audio_file_path:
         return "Please upload an audio file", None, None
     gr.Examples(
         examples=[
+            ["test.m4a", "Who's the podcast host?", 30],
         ],
         inputs=[audio_input, query_input, chunk_length]
     )
 if __name__ == "__main__":
     # Load model on startup
     load_model()
+    demo.launch(mcp_server=True)