pidgin-demo / app.py
Mardiyyah's picture
Update app.py
8ed33a3 verified
import os
import shutil
# Remove Hugging Face cache at startup
hf_cache = os.path.expanduser("~/.cache/huggingface")
if os.path.exists(hf_cache):
shutil.rmtree(hf_cache)
import gradio as gr
import numpy as np
import torchaudio
from transformers import pipeline
# Load pidgin model via pipeline
transcriber = pipeline("automatic-speech-recognition", model="asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53")
# Transcription function
# def transcribe(audio):
# if audio is None:
# return "No audio provided."
# sr, y = audio
# # Convert to mono if stereo
# if y.ndim > 1:
# y = y.mean(axis=1)
# y = y.astype(np.float32)
# y /= np.max(np.abs(y))
# return transcriber({"sampling_rate": sr, "raw": y})["text"]
def transcribe(audio_filepath):
if audio_filepath is None:
return ""
# load & preprocess
waveform, sr = torchaudio.load(audio_filepath)
if waveform.shape[0] > 1:
waveform = waveform.mean(dim=0, keepdim=True)
audio = waveform.squeeze().numpy().astype(np.float32)
audio /= np.max(np.abs(audio)) + 1e-9
return transcriber({"sampling_rate": sr, "raw": audio})["text"]
# Define the Gradio UI components
with gr.Blocks() as demo:
gr.Markdown("# πŸ—£οΈ Nigerian Pidgin ASR Demo")
gr.Markdown("""Upload or record audio in Nigerian Pidgin to get transcription. This Demo uses the
Nigerian pidgin ASR checkpoint -[Pidgin-Wav2Vec2-XLSR53](https://huggingface.co/asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53)
and πŸ€— Transformers to transcribe audio files of max 30s length.
""")
with gr.Column():
audio_in = gr.Audio(
label="🎀 Record or upload your audio",
type="filepath",
sources=["upload", "microphone"],
interactive=True,
min_length=1,
max_length=31
)
with gr.Row():
submit_btn = gr.Button("Submit")
#clear_btn = gr.Button("Clear")
with gr.Column():
transcription_txt = gr.Textbox(
label="πŸ“ Transcription",
interactive=False,
show_label=True,
show_copy_button=True,
)
with gr.Row():
flag_btn = gr.Button("🚩 Flag this output as incorrect", size="sm")
share_btn = gr.Button("πŸ”— Share", size="sm")
# Button wiring:
submit_btn.click(fn=transcribe, inputs=audio_in, outputs=transcription_txt)
flag_btn.click(fn=lambda: "Thank you for your feedback.", inputs=None, outputs=transcription_txt)
#clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[audio_in, transcription_txt])
demo.launch(share=True)