Spaces:
Running
Running
import os | |
import shutil | |
# Remove Hugging Face cache at startup | |
hf_cache = os.path.expanduser("~/.cache/huggingface") | |
if os.path.exists(hf_cache): | |
shutil.rmtree(hf_cache) | |
import gradio as gr | |
import numpy as np | |
import torchaudio | |
from transformers import pipeline | |
# Load pidgin model via pipeline | |
transcriber = pipeline("automatic-speech-recognition", model="asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53") | |
# Transcription function | |
# def transcribe(audio): | |
# if audio is None: | |
# return "No audio provided." | |
# sr, y = audio | |
# # Convert to mono if stereo | |
# if y.ndim > 1: | |
# y = y.mean(axis=1) | |
# y = y.astype(np.float32) | |
# y /= np.max(np.abs(y)) | |
# return transcriber({"sampling_rate": sr, "raw": y})["text"] | |
def transcribe(audio_filepath): | |
if audio_filepath is None: | |
return "" | |
# load & preprocess | |
waveform, sr = torchaudio.load(audio_filepath) | |
if waveform.shape[0] > 1: | |
waveform = waveform.mean(dim=0, keepdim=True) | |
audio = waveform.squeeze().numpy().astype(np.float32) | |
audio /= np.max(np.abs(audio)) + 1e-9 | |
return transcriber({"sampling_rate": sr, "raw": audio})["text"] | |
# Define the Gradio UI components | |
with gr.Blocks() as demo: | |
gr.Markdown("# π£οΈ Nigerian Pidgin ASR Demo") | |
gr.Markdown("""Upload or record audio in Nigerian Pidgin to get transcription. This Demo uses the | |
Nigerian pidgin ASR checkpoint -[Pidgin-Wav2Vec2-XLSR53](https://huggingface.co/asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53) | |
and π€ Transformers to transcribe audio files of max 30s length. | |
""") | |
with gr.Column(): | |
audio_in = gr.Audio( | |
label="π€ Record or upload your audio", | |
type="filepath", | |
sources=["upload", "microphone"], | |
interactive=True, | |
min_length=1, | |
max_length=31 | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Submit") | |
#clear_btn = gr.Button("Clear") | |
with gr.Column(): | |
transcription_txt = gr.Textbox( | |
label="π Transcription", | |
interactive=False, | |
show_label=True, | |
show_copy_button=True, | |
) | |
with gr.Row(): | |
flag_btn = gr.Button("π© Flag this output as incorrect", size="sm") | |
share_btn = gr.Button("π Share", size="sm") | |
# Button wiring: | |
submit_btn.click(fn=transcribe, inputs=audio_in, outputs=transcription_txt) | |
flag_btn.click(fn=lambda: "Thank you for your feedback.", inputs=None, outputs=transcription_txt) | |
#clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[audio_in, transcription_txt]) | |
demo.launch(share=True) | |