# import gradio as gr # import os # gr.load("models/vrclc/Whisper-medium-Malayalam", examples = [ # [os.path.join(os.path.abspath(''),"./sample1.wav")] # ]).launch() import gradio as gr import torch import soundfile as sf from transformers import pipeline device = "cuda:0" if torch.cuda.is_available() else "cpu" pipe = pipeline( "automatic-speech-recognition", model="vrclc/Whisper-small-Malayalam", chunk_length_s=10, device=device, ) def transcribe(audio): """Transcribes Malayalam speech from an audio file.""" try: if audio is None: return "Please record or upload an audio file." print(f"[DEBUG] Received audio: {audio}") # Handle filepath case from Gradio audio_path = audio if isinstance(audio, str) else audio.get("name", None) if audio_path is None: return "Could not read audio file." print(f"[DEBUG] Reading audio file: {audio_path}") audio_data, sample_rate = sf.read(audio_path) print(f"[DEBUG] Audio sample rate: {sample_rate}, shape: {audio_data.shape}") transcription = pipe( {"array": audio_data, "sampling_rate": sample_rate}, chunk_length_s=10, batch_size=8, )["text"] print(f"[DEBUG] Transcription: {transcription}") return transcription except Exception as e: import traceback print("[ERROR] Exception during transcription:") traceback.print_exc() return f"Error: {str(e)}" iface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), outputs="text", title="Malayalam Speech Recognition", description="Record or upload Malayalam speech and submit to get the transcribed text.", examples=[["sample1.wav"]], ) iface.launch()