Spaces:
Running
Running
# import gradio as gr | |
# import os | |
# gr.load("models/vrclc/Whisper-medium-Malayalam", examples = [ | |
# [os.path.join(os.path.abspath(''),"./sample1.wav")] | |
# ]).launch() | |
import gradio as gr | |
import torch | |
import soundfile as sf | |
from transformers import pipeline | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model="vrclc/Whisper-small-Malayalam", | |
chunk_length_s=10, | |
device=device, | |
) | |
def transcribe(audio): | |
"""Transcribes Malayalam speech from an audio file.""" | |
try: | |
if audio is None: | |
return "Please record or upload an audio file." | |
print(f"[DEBUG] Received audio: {audio}") | |
# Handle filepath case from Gradio | |
audio_path = audio if isinstance(audio, str) else audio.get("name", None) | |
if audio_path is None: | |
return "Could not read audio file." | |
print(f"[DEBUG] Reading audio file: {audio_path}") | |
audio_data, sample_rate = sf.read(audio_path) | |
print(f"[DEBUG] Audio sample rate: {sample_rate}, shape: {audio_data.shape}") | |
transcription = pipe( | |
{"array": audio_data, "sampling_rate": sample_rate}, | |
chunk_length_s=10, | |
batch_size=8, | |
)["text"] | |
print(f"[DEBUG] Transcription: {transcription}") | |
return transcription | |
except Exception as e: | |
import traceback | |
print("[ERROR] Exception during transcription:") | |
traceback.print_exc() | |
return f"Error: {str(e)}" | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
outputs="text", | |
title="Malayalam Speech Recognition", | |
description="Record or upload Malayalam speech and submit to get the transcribed text.", | |
examples=[["sample1.wav"]], | |
) | |
iface.launch() |