import gradio as gr import httpx import asyncio from typing import Dict, Any import os from config import CONFIG # API endpoint configuration from config API_BASE_URL = CONFIG["api"]["base_url"] API_TIMEOUT = CONFIG["api"]["timeout"] if API_BASE_URL is None: raise ValueError("API_BASE_URL is not set") async def transcribe_audio(audio_file: str, output_format: str = "text") -> Dict[str, Any]: """ Transcribe the audio file to text or SRT subtitles. Args: audio_file (str): Path to the audio file to transcribe output_format (str): Output format - "text" for plain text, "srt" for SRT subtitles Returns: Dict containing the transcription result """ try: # Determine the endpoint based on output format if output_format == "srt": endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe_srt']}" else: endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe']}" # Prepare the file for upload async with httpx.AsyncClient(timeout=API_TIMEOUT) as client: with open(audio_file, "rb") as f: files = {"file": (os.path.basename(audio_file), f, "audio/wav")} response = await client.post(endpoint, files=files) response.raise_for_status() if output_format == "srt": # For SRT format, return the raw text content return { "success": True, "transcription": response.text, "format": "srt" } else: # For JSON format, parse the response result = response.json() # Handle both old format (direct text) and new format (segments array) transcription_text = "" if "success" in result and result["success"] is True and "segments" in result and result["segments"]: # New format with segments array - include timestamps formatted_segments = [] for segment in result["segments"]: text = segment.get("text", "") formatted_segments.append(f"{text}") transcription_text = "\n".join(formatted_segments) else: transcription_text = "No transcription text found in response" return { "success": True, "transcription": transcription_text, "format": "text", "metadata": result } except httpx.TimeoutException: return { "success": False, "error": "Request timed out. The audio file might be too long or the server is busy.", "format": output_format } except httpx.HTTPStatusError as e: return { "success": False, "error": f"HTTP error {e.response.status_code}: {e.response.text}", "format": output_format } except Exception as e: return { "success": False, "error": f"Unexpected error: {str(e)}", "format": output_format } def transcribe_audio_sync(audio_file: str, output_format: str = "text") -> Dict[str, Any]: """ Synchronous wrapper for the async transcribe function. """ return asyncio.run(transcribe_audio(audio_file, output_format)) def transcribe_to_text(audio_file: str) -> str: """ Transcribe the audio file to plain text. Args: audio_file (str): The URL to the audio file. Returns: str: Transcribed text """ if not audio_file: return "Please provide an audio file." result = transcribe_audio_sync(audio_file, "text") if result["success"]: return result["transcription"] else: return f"Error: {result['error']}" def transcribe_to_srt(audio_file: str) -> str: """ Transcribe the audio file to SRT subtitle format. Args: audio_file (str): The URL to the audio file. Returns: str: SRT formatted subtitles """ if not audio_file: return "Please provide an audio file." result = transcribe_audio_sync(audio_file, "srt") if result["success"]: return result["transcription"] else: return f"Error: {result['error']}" # Create the Gradio interface with gr.Blocks(title="Parakeet ASR MCP Server") as demo: gr.Markdown(""" # 🎙️ Parakeet ASR MCP Server A Model Context Protocol (MCP) server built with Gradio interfaces with a speech-to-text API, serving the model [Parakeet TDT 0.6B V2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) open-sourced by NVIDIA and hosted on Novita AI (https://novita.ai/templates-library/105929). This server is free to use and provides high-quality English transcription capabilities, supporting both plain text and SRT subtitle formats. ## MCP Server URL ``` https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse ``` ## Available MCP Tools - `transcribe_to_text`: Transcribe the audio file to plain text. - `transcribe_to_srt`: Transcribe the audio file to SRT subtitle format. ## Integration To add this MCP to clients that support SSE (e.g. Cursor, Windsurf, Cline), simply add the following configuration to your MCP config: ``` { "mcpServers": { "parakeet-asr": { "url": "https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse" } } } ``` """) with gr.Tab("Transcribe to text"): with gr.Row(): with gr.Column(): audio_input_text = gr.Audio( label="Upload Audio File", type="filepath", sources=["upload", "microphone"] ) transcribe_text_btn = gr.Button("Transcribe to Text", variant="primary") with gr.Column(): text_output = gr.Textbox( label="Transcription Result", lines=10, placeholder="Transcribed text will appear here..." ) transcribe_text_btn.click( fn=transcribe_to_text, inputs=[audio_input_text], outputs=[text_output] ) with gr.Tab("Transcribe to SRT Subtitles"): with gr.Row(): with gr.Column(): audio_input_srt = gr.Audio( label="Upload Audio File", type="filepath", sources=["upload", "microphone"] ) transcribe_srt_btn = gr.Button("Transcribe to SRT", variant="primary") with gr.Column(): srt_output = gr.Textbox( label="SRT Subtitles", lines=15, placeholder="SRT formatted subtitles will appear here..." ) transcribe_srt_btn.click( fn=transcribe_to_srt, inputs=[audio_input_srt], outputs=[srt_output] ) if __name__ == "__main__": # Launch with MCP server enabled try: demo.launch( mcp_server=True, share=False, server_name=CONFIG["server"]["host"], server_port=CONFIG["server"]["port"], ) except Exception as e: print(f"Error launching server: {e}")