MIAO HU
feat. update readme
4980f4b
import gradio as gr
import httpx
import asyncio
from typing import Dict, Any
import os
from config import CONFIG
# API endpoint configuration from config
API_BASE_URL = CONFIG["api"]["base_url"]
API_TIMEOUT = CONFIG["api"]["timeout"]
if API_BASE_URL is None:
raise ValueError("API_BASE_URL is not set")
async def transcribe_audio(audio_file: str, output_format: str = "text") -> Dict[str, Any]:
"""
Transcribe the audio file to text or SRT subtitles.
Args:
audio_file (str): Path to the audio file to transcribe
output_format (str): Output format - "text" for plain text, "srt" for SRT subtitles
Returns:
Dict containing the transcription result
"""
try:
# Determine the endpoint based on output format
if output_format == "srt":
endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe_srt']}"
else:
endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe']}"
# Prepare the file for upload
async with httpx.AsyncClient(timeout=API_TIMEOUT) as client:
with open(audio_file, "rb") as f:
files = {"file": (os.path.basename(audio_file), f, "audio/wav")}
response = await client.post(endpoint, files=files)
response.raise_for_status()
if output_format == "srt":
# For SRT format, return the raw text content
return {
"success": True,
"transcription": response.text,
"format": "srt"
}
else:
# For JSON format, parse the response
result = response.json()
# Handle both old format (direct text) and new format (segments array)
transcription_text = ""
if "success" in result and result["success"] is True and "segments" in result and result["segments"]:
# New format with segments array - include timestamps
formatted_segments = []
for segment in result["segments"]:
text = segment.get("text", "")
formatted_segments.append(f"{text}")
transcription_text = "\n".join(formatted_segments)
else:
transcription_text = "No transcription text found in response"
return {
"success": True,
"transcription": transcription_text,
"format": "text",
"metadata": result
}
except httpx.TimeoutException:
return {
"success": False,
"error": "Request timed out. The audio file might be too long or the server is busy.",
"format": output_format
}
except httpx.HTTPStatusError as e:
return {
"success": False,
"error": f"HTTP error {e.response.status_code}: {e.response.text}",
"format": output_format
}
except Exception as e:
return {
"success": False,
"error": f"Unexpected error: {str(e)}",
"format": output_format
}
def transcribe_audio_sync(audio_file: str, output_format: str = "text") -> Dict[str, Any]:
"""
Synchronous wrapper for the async transcribe function.
"""
return asyncio.run(transcribe_audio(audio_file, output_format))
def transcribe_to_text(audio_file: str) -> str:
"""
Transcribe the audio file to plain text.
Args:
audio_file (str): The URL to the audio file.
Returns:
str: Transcribed text
"""
if not audio_file:
return "Please provide an audio file."
result = transcribe_audio_sync(audio_file, "text")
if result["success"]:
return result["transcription"]
else:
return f"Error: {result['error']}"
def transcribe_to_srt(audio_file: str) -> str:
"""
Transcribe the audio file to SRT subtitle format.
Args:
audio_file (str): The URL to the audio file.
Returns:
str: SRT formatted subtitles
"""
if not audio_file:
return "Please provide an audio file."
result = transcribe_audio_sync(audio_file, "srt")
if result["success"]:
return result["transcription"]
else:
return f"Error: {result['error']}"
# Create the Gradio interface
with gr.Blocks(title="Parakeet ASR MCP Server") as demo:
gr.Markdown("""
# 🎙️ Parakeet ASR MCP Server
A Model Context Protocol (MCP) server built with Gradio interfaces with a speech-to-text API, serving the model [Parakeet TDT 0.6B V2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) open-sourced by NVIDIA and hosted on Novita AI (https://novita.ai/templates-library/105929).
This server is free to use and provides high-quality English transcription capabilities, supporting both plain text and SRT subtitle formats.
## MCP Server URL
```
https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse
```
## Available MCP Tools
- `transcribe_to_text`: Transcribe the audio file to plain text.
- `transcribe_to_srt`: Transcribe the audio file to SRT subtitle format.
## Integration
To add this MCP to clients that support SSE (e.g. Cursor, Windsurf, Cline), simply add the following configuration to your MCP config:
```
{
"mcpServers": {
"parakeet-asr": {
"url": "https://viktor-hu-parakeet-asr-mcp-server.hf.space/gradio_api/mcp/sse"
}
}
}
```
""")
with gr.Tab("Transcribe to text"):
with gr.Row():
with gr.Column():
audio_input_text = gr.Audio(
label="Upload Audio File",
type="filepath",
sources=["upload", "microphone"]
)
transcribe_text_btn = gr.Button("Transcribe to Text", variant="primary")
with gr.Column():
text_output = gr.Textbox(
label="Transcription Result",
lines=10,
placeholder="Transcribed text will appear here..."
)
transcribe_text_btn.click(
fn=transcribe_to_text,
inputs=[audio_input_text],
outputs=[text_output]
)
with gr.Tab("Transcribe to SRT Subtitles"):
with gr.Row():
with gr.Column():
audio_input_srt = gr.Audio(
label="Upload Audio File",
type="filepath",
sources=["upload", "microphone"]
)
transcribe_srt_btn = gr.Button("Transcribe to SRT", variant="primary")
with gr.Column():
srt_output = gr.Textbox(
label="SRT Subtitles",
lines=15,
placeholder="SRT formatted subtitles will appear here..."
)
transcribe_srt_btn.click(
fn=transcribe_to_srt,
inputs=[audio_input_srt],
outputs=[srt_output]
)
if __name__ == "__main__":
# Launch with MCP server enabled
try:
demo.launch(
mcp_server=True,
share=False,
server_name=CONFIG["server"]["host"],
server_port=CONFIG["server"]["port"],
)
except Exception as e:
print(f"Error launching server: {e}")