Whisper-Malasar-ASR

Runtime error

File size: 2,141 Bytes

8332e59
c32de1c
bbcc137
8332e59
 
 
 
 
 
c32de1c
8332e59
 
 
 
 
c32de1c
8332e59
 
 
 
 
 
 
c32de1c
8332e59
 
 
 
 
c32de1c
8332e59
c32de1c
8332e59
 
 
 
c32de1c
8332e59
 
c32de1c
8332e59
c32de1c
8332e59
 
 
 
 
c32de1c
8332e59
 
60f2f1a
8332e59
 
 
 
 
023b708
8332e59

import gradio as gr

demo = gr.load("models/vrclc/Malasar_medium_MTF",
               examples = [["./luke1.1.wav", "transcribe"],
                          ],
               title = "VRCLC Malasar Speech Recognition Demo",
               description=("Transcribe microphone or audio inputs with the click of a button!"),
               cache_examples=False
              )

demo.launch()
# import gradio as gr
# import torch
# import soundfile as sf
# from transformers import pipeline

# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# pipe = pipeline(
#     "automatic-speech-recognition",
#     model="vrclc/Malasar_medium_MTF",
#     chunk_length_s=10,
#     device=device,
# )

# def transcribe(audio):
#     """Transcribes Malasar speech from an audio file."""
#     try:
#         if audio is None:
#             return "Please record or upload an audio file."

#         print(f"[DEBUG] Received audio: {audio}")

#         # Handle filepath case from Gradio
#         audio_path = audio if isinstance(audio, str) else audio.get("name", None)
#         if audio_path is None:
#             return "Could not read audio file."

#         print(f"[DEBUG] Reading audio file: {audio_path}")
#         audio_data, sample_rate = sf.read(audio_path)

#         print(f"[DEBUG] Audio sample rate: {sample_rate}, shape: {audio_data.shape}")

#         transcription = pipe(
#             {"array": audio_data, "sampling_rate": sample_rate},
#             chunk_length_s=10,
#             batch_size=8,
#         )["text"]

#         print(f"[DEBUG] Transcription: {transcription}")
#         return transcription

#     except Exception as e:
#         import traceback
#         print("[ERROR] Exception during transcription:")
#         traceback.print_exc()
#         return f"Error: {str(e)}"

# iface = gr.Interface(
#     fn=transcribe,
#     inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
#     outputs="text",
#     title="Malasar Speech Recognition",
#     description="Record or upload Malasar speech and submit to get the transcribed text.",
#     examples=[["luke1.1.wav"]],
# )
# iface.launch()