Spaces:

viktor-hu
/

parakeet-asr-mcp-server

Running

App Files Files Community

MIAO HU commited on May 26

Commit

5dcd53e

1 Parent(s): 077f094

init version

Browse files

Files changed (5) hide show

.gitignore +184 -0
README.md +10 -3
app.py +204 -0
config.py +27 -0
requirements.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,184 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.DS_Store
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the enitre vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore

README.md CHANGED Viewed

@@ -1,12 +1,19 @@
 ---
-title: Mcp Parakeet Tdt 0.6b V2
-emoji: 🐢
 colorFrom: indigo
 colorTo: green
 sdk: gradio
 sdk_version: 5.31.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Parakeet ASR MCP Server
+emoji: 🎧
 colorFrom: indigo
 colorTo: green
 sdk: gradio
 sdk_version: 5.31.0
 app_file: app.py
 pinned: false
+license: mit
+models:
+  - nvidia/parakeet-tdt-0.6b-v2
 ---
+# 🎙️ Parakeet ASR MCP Server
+A Model Context Protocol (MCP) server built with Gradio interfaces with a speech-to-text API, serving the model [Parakeet TDT 0.6B V2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) open-sourced by NVIDIA and hosted on Novita AI (https://novita.ai/templates-library/105929).
+This server is free to use and provides high-quality English transcription capabilities, supporting both plain text and SRT subtitle formats.

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import gradio as gr
+import httpx
+import asyncio
+from typing import Dict, Any
+import os
+from config import CONFIG
+# API endpoint configuration from config
+API_BASE_URL = CONFIG["api"]["base_url"]
+API_TIMEOUT = CONFIG["api"]["timeout"]
+if API_BASE_URL is None:
+    raise ValueError("API_BASE_URL is not set")
+async def transcribe_audio(audio_file: str, output_format: str = "text") -> Dict[str, Any]:
+    """
+    Transcribe the audio file to text or SRT subtitles.
+    Args:
+        audio_file (str): Path to the audio file to transcribe
+        output_format (str): Output format - "text" for plain text, "srt" for SRT subtitles
+    Returns:
+        Dict containing the transcription result
+    """
+    try:
+        # Determine the endpoint based on output format
+        if output_format == "srt":
+            endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe_srt']}"
+        else:
+            endpoint = f"{API_BASE_URL}{CONFIG['api']['endpoints']['transcribe']}"
+        # Prepare the file for upload
+        async with httpx.AsyncClient(timeout=API_TIMEOUT) as client:
+            with open(audio_file, "rb") as f:
+                files = {"file": (os.path.basename(audio_file), f, "audio/wav")}
+                response = await client.post(endpoint, files=files)
+                response.raise_for_status()
+                if output_format == "srt":
+                    # For SRT format, return the raw text content
+                    return {
+                        "success": True,
+                        "transcription": response.text,
+                        "format": "srt"
+                    }
+                else:
+                    # For JSON format, parse the response
+                    result = response.json()
+                    # Handle both old format (direct text) and new format (segments array)
+                    transcription_text = ""
+                    if "success" in result and result["success"] is True and "segments" in result and result["segments"]:
+                        # New format with segments array - include timestamps
+                        formatted_segments = []
+                        for segment in result["segments"]:
+                            text = segment.get("text", "")
+                            formatted_segments.append(f"{text}")
+                        transcription_text = "\n".join(formatted_segments)
+                    else:
+                        transcription_text = "No transcription text found in response"
+                    return {
+                        "success": True,
+                        "transcription": transcription_text,
+                        "format": "text",
+                        "metadata": result
+                    }
+    except httpx.TimeoutException:
+        return {
+            "success": False,
+            "error": "Request timed out. The audio file might be too long or the server is busy.",
+            "format": output_format
+        }
+    except httpx.HTTPStatusError as e:
+        return {
+            "success": False,
+            "error": f"HTTP error {e.response.status_code}: {e.response.text}",
+            "format": output_format
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Unexpected error: {str(e)}",
+            "format": output_format
+        }
+def transcribe_audio_sync(audio_file: str, output_format: str = "text") -> Dict[str, Any]:
+    """
+    Synchronous wrapper for the async transcribe function.
+    """
+    return asyncio.run(transcribe_audio(audio_file, output_format))
+def transcribe_to_text(audio_file: str) -> str:
+    """
+    Transcribe the audio file to plain text.
+    Args:
+        audio_file (str): The URL to the audio file.
+    Returns:
+        str: Transcribed text
+    """
+    if not audio_file:
+        return "Please provide an audio file."
+    result = transcribe_audio_sync(audio_file, "text")
+    if result["success"]:
+        return result["transcription"]
+    else:
+        return f"Error: {result['error']}"
+def transcribe_to_srt(audio_file: str) -> str:
+    """
+    Transcribe the audio file to SRT subtitle format.
+    Args:
+        audio_file (str): The URL to the audio file.
+    Returns:
+        str: SRT formatted subtitles
+    """
+    if not audio_file:
+        return "Please provide an audio file."
+    result = transcribe_audio_sync(audio_file, "srt")
+    if result["success"]:
+        return result["transcription"]
+    else:
+        return f"Error: {result['error']}"
+# Create the Gradio interface
+with gr.Blocks(title="Parakeet ASR MCP Server") as demo:
+    gr.Markdown("""
+    # 🎙️ Parakeet ASR MCP Server
+    A Model Context Protocol (MCP) server built with Gradio interfaces with a speech-to-text API, serving the model [Parakeet TDT 0.6B V2](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2) open-sourced by NVIDIA and hosted on Novita AI (https://novita.ai/templates-library/105929).
+    This server is free to use and provides high-quality English transcription capabilities, supporting both plain text and SRT subtitle formats.
+    """)
+    with gr.Tab("Transcribe to text"):
+        with gr.Row():
+            with gr.Column():
+                audio_input_text = gr.Audio(
+                    label="Upload Audio File",
+                    type="filepath",
+                    sources=["upload", "microphone"]
+                )
+                transcribe_text_btn = gr.Button("Transcribe to Text", variant="primary")
+            with gr.Column():
+                text_output = gr.Textbox(
+                    label="Transcription Result",
+                    lines=10,
+                    placeholder="Transcribed text will appear here..."
+                )
+        transcribe_text_btn.click(
+            fn=transcribe_to_text,
+            inputs=[audio_input_text],
+            outputs=[text_output]
+        )
+    with gr.Tab("Transcribe to SRT Subtitles"):
+        with gr.Row():
+            with gr.Column():
+                audio_input_srt = gr.Audio(
+                    label="Upload Audio File",
+                    type="filepath",
+                    sources=["upload", "microphone"]
+                )
+                transcribe_srt_btn = gr.Button("Transcribe to SRT", variant="primary")
+            with gr.Column():
+                srt_output = gr.Textbox(
+                    label="SRT Subtitles",
+                    lines=15,
+                    placeholder="SRT formatted subtitles will appear here..."
+                )
+        transcribe_srt_btn.click(
+            fn=transcribe_to_srt,
+            inputs=[audio_input_srt],
+            outputs=[srt_output]
+        )
+if __name__ == "__main__":
+    # Launch with MCP server enabled
+    try:
+        demo.launch(
+            mcp_server=True,
+            share=False,
+            server_name=CONFIG["server"]["host"],
+            server_port=CONFIG["server"]["port"],
+        )
+    except Exception as e:
+        print(f"Error launching server: {e}")

config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""
+Configuration settings for the ASR (Automatic Speech Recognition) MCP Server.
+"""
+import os
+# API Configuration
+API_CONFIG = {
+    "base_url": os.getenv("API_BASE_URL", ""),
+    "endpoints": {
+        "transcribe": "/transcribe",
+        "transcribe_srt": "/transcribe/srt"
+    },
+    "timeout": 300.0,
+    "max_retries": None
+}
+# Server Configuration
+SERVER_CONFIG = {
+    "host": "0.0.0.0",
+    "port": 7860,
+}
+# Export the configuration
+CONFIG = {
+    "api": API_CONFIG,
+    "server": SERVER_CONFIG
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio[mcp]>=5.31.0
2	+ httpx>=0.25.0