|
import gradio as gr |
|
import httpx |
|
import asyncio |
|
from typing import Dict, Any |
|
import os |
|
from config import CONFIG |
|
|
|
|
|
API_BASE_URL = CONFIG["api"]["base_url"] |
|
API_TIMEOUT = CONFIG["api"]["timeout"] |
|
|
|
if API_BASE_URL is None: |
|
raise ValueError("API_BASE_URL is not set") |
|
|
|
async def transcribe_audio(audio_file: str, output_format: str = "text") -> Dict[str, Any]: |
|
""" |
|
Transcribe the audio file to text or SRT subtitles. |
|
|
|
Args: |
|
audio_file (str): Path to the audio file to transcribe |
|
output_format (str): Output format - "text" for plain text, "srt" for SRT subtitles |
|
|
|
Returns: |
|
Dict containing the transcription result |
|
""" |
|
|
|
try: |
|
|
|
if output_format == "srt": |
|
endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe_srt']}" |
|
else: |
|
endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe']}" |
|
|
|
|
|
async with httpx.AsyncClient(timeout=API_TIMEOUT) as client: |
|
with open(audio_file, "rb") as f: |
|
files = {"file": (os.path.basename(audio_file), f, "audio/wav")} |
|
|
|
response = await client.post(endpoint, files=files) |
|
response.raise_for_status() |
|
|
|
if output_format == "srt": |
|
|
|
return { |
|
"success": True, |
|
"transcription": response.text, |
|
"format": "srt" |
|
} |
|
else: |
|
|
|
result = response.json() |
|
|
|
|
|
transcription_text = "" |
|
if "success" in result and result["success"] is True and "segments" in result and result["segments"]: |
|
|
|
formatted_segments = [] |
|
for segment in result["segments"]: |
|
text = segment.get("text", "") |
|
formatted_segments.append(f"{text}") |
|
transcription_text = "\n".join(formatted_segments) |
|
else: |
|
transcription_text = "No transcription text found in response" |
|
|
|
return { |
|
"success": True, |
|
"transcription": transcription_text, |
|
"format": "text", |
|
"metadata": result |
|
} |
|
|
|
except httpx.TimeoutException: |
|
return { |
|
"success": False, |
|
"error": "Request timed out. The audio file might be too long or the server is busy.", |
|
"format": output_format |
|
} |
|
except httpx.HTTPStatusError as e: |
|
return { |
|
"success": False, |
|
"error": f"HTTP error {e.response.status_code}: {e.response.text}", |
|
"format": output_format |
|
} |
|
except Exception as e: |
|
return { |
|
"success": False, |
|
"error": f"Unexpected error: {str(e)}", |
|
"format": output_format |
|
} |
|
|
|
def transcribe_audio_sync(audio_file: str, output_format: str = "text") -> Dict[str, Any]: |
|
""" |
|
Synchronous wrapper for the async transcribe function. |
|
""" |
|
return asyncio.run(transcribe_audio(audio_file, output_format)) |
|
|
|
def transcribe_to_text(audio_file: str) -> str: |
|
""" |
|
Transcribe the audio file to plain text. |
|
|
|
Args: |
|
audio_file (str): The URL to the audio file. |
|
|
|
Returns: |
|
str: Transcribed text |
|
""" |
|
if not audio_file: |
|
return "Please provide an audio file." |
|
|
|
result = transcribe_audio_sync(audio_file, "text") |
|
|
|
if result["success"]: |
|
return result["transcription"] |
|
else: |
|
return f"Error: {result['error']}" |
|
|
|
def transcribe_to_srt(audio_file: str) -> str: |
|
""" |
|
Transcribe the audio file to SRT subtitle format. |
|
|
|
Args: |
|
audio_file (str): The URL to the audio file. |
|
|
|
Returns: |
|
str: SRT formatted subtitles |
|
""" |
|
|
|
if not audio_file: |
|
return "Please provide an audio file." |
|
|
|
result = transcribe_audio_sync(audio_file, "srt") |
|
|
|
if result["success"]: |
|
return result["transcription"] |
|
else: |
|
return f"Error: {result['error']}" |
|
|
|
|
|
with gr.Blocks(title="Parakeet ASR MCP Server") as demo: |
|
gr.Markdown(""" |
|
# 🎙️ Parakeet ASR MCP Server |
|
|
|
A Model Context Protocol (MCP) server built with Gradio interfaces with a speech-to-text API, serving the model [Parakeet TDT 0.6B V2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) open-sourced by NVIDIA and hosted on Novita AI (https://novita.ai/templates-library/105929). |
|
|
|
This server is free to use and provides high-quality English transcription capabilities, supporting both plain text and SRT subtitle formats. |
|
|
|
## MCP Server URL |
|
``` |
|
https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse |
|
``` |
|
|
|
## Available MCP Tools |
|
- `transcribe_to_text`: Transcribe the audio file to plain text. |
|
- `transcribe_to_srt`: Transcribe the audio file to SRT subtitle format. |
|
|
|
## Integration |
|
|
|
To add this MCP to clients that support SSE (e.g. Cursor, Windsurf, Cline), simply add the following configuration to your MCP config: |
|
|
|
``` |
|
{ |
|
"mcpServers": { |
|
"parakeet-asr": { |
|
"url": "https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse" |
|
} |
|
} |
|
} |
|
``` |
|
""") |
|
|
|
with gr.Tab("Transcribe to text"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_input_text = gr.Audio( |
|
label="Upload Audio File", |
|
type="filepath", |
|
sources=["upload", "microphone"] |
|
) |
|
transcribe_text_btn = gr.Button("Transcribe to Text", variant="primary") |
|
|
|
with gr.Column(): |
|
text_output = gr.Textbox( |
|
label="Transcription Result", |
|
lines=10, |
|
placeholder="Transcribed text will appear here..." |
|
) |
|
|
|
transcribe_text_btn.click( |
|
fn=transcribe_to_text, |
|
inputs=[audio_input_text], |
|
outputs=[text_output] |
|
) |
|
|
|
with gr.Tab("Transcribe to SRT Subtitles"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_input_srt = gr.Audio( |
|
label="Upload Audio File", |
|
type="filepath", |
|
sources=["upload", "microphone"] |
|
) |
|
transcribe_srt_btn = gr.Button("Transcribe to SRT", variant="primary") |
|
|
|
with gr.Column(): |
|
srt_output = gr.Textbox( |
|
label="SRT Subtitles", |
|
lines=15, |
|
placeholder="SRT formatted subtitles will appear here..." |
|
) |
|
|
|
transcribe_srt_btn.click( |
|
fn=transcribe_to_srt, |
|
inputs=[audio_input_srt], |
|
outputs=[srt_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
|
|
try: |
|
demo.launch( |
|
mcp_server=True, |
|
share=False, |
|
server_name=CONFIG["server"]["host"], |
|
server_port=CONFIG["server"]["port"], |
|
) |
|
except Exception as e: |
|
print(f"Error launching server: {e}") |
|
|