Spaces:
Running
Running
File size: 6,248 Bytes
1f9fa5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
import numpy as np
import tempfile
import os
from pathlib import Path
# Mock model for testing when real model can't load
USE_MOCK_MODEL = True
def initialize_model():
"""Initialize model - using mock for testing"""
global USE_MOCK_MODEL
if USE_MOCK_MODEL:
print("π§ͺ Using mock model for testing (real model has PyTorch compatibility issues)")
return "mock_model", None
return None, None
def transcribe_audio(audio_file):
"""
Transcribe audio using mock model for testing.
"""
if audio_file is None:
return "Please upload an audio file."
try:
# Initialize model if needed
model, processor = initialize_model()
if model is None:
return "β Error: Could not load the model. Please try again later."
filename = Path(audio_file).name
print(f"π΅ Processing audio file: {filename}")
# Mock transcription based on sample files
if "sample_1" in filename:
return "Muraho, witwa gute?"
elif "sample_2" in filename:
return "Ndashaka kwiga Ikinyarwanda."
elif "sample_3" in filename:
return "Urakoze cyane kubafasha."
elif "sample_4" in filename:
return "Tugiye gutangiza ikiganiro mu Kinyarwanda."
else:
return f"Mock transcription for {filename}: [This would be the actual Kinyarwanda transcription]"
except Exception as e:
print(f"β Transcription error: {e}")
return f"β Error during transcription: {str(e)}"
def transcribe_microphone(audio_data):
"""
Transcribe audio from microphone input.
"""
if audio_data is None:
return "Please record some audio first."
try:
sample_rate, audio_array = audio_data
duration = len(audio_array) / sample_rate
print(f"ποΈ Processing microphone input: {duration:.1f} seconds at {sample_rate}Hz")
return f"Mock transcription for {duration:.1f}s audio: [This would be the actual Kinyarwanda transcription]"
except Exception as e:
print(f"β Microphone processing error: {e}")
return f"β Error processing microphone input: {str(e)}"
# Create a simple Gradio interface
def create_interface():
"""Create a clean, simple Gradio interface."""
with gr.Blocks(title="Wakanda Whisper - Kinyarwanda ASR") as interface:
gr.Markdown("# π€ Wakanda Whisper")
gr.Markdown("### Kinyarwanda Automatic Speech Recognition")
gr.Markdown("Upload an audio file or record your voice to get Kinyarwanda transcription")
with gr.Tabs():
# File Upload Tab
with gr.TabItem("π Upload Audio File"):
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
label="Choose Audio File",
type="filepath"
)
# Sample audio files
gr.Markdown("**Try these sample Kinyarwanda audio files:**")
with gr.Row():
sample_1 = gr.Button("Sample 1", size="sm")
sample_2 = gr.Button("Sample 2", size="sm")
sample_3 = gr.Button("Sample 3", size="sm")
sample_4 = gr.Button("Sample 4", size="sm")
upload_btn = gr.Button("π― Transcribe Audio", variant="primary")
with gr.Column():
upload_output = gr.Textbox(
label="Transcription Result",
placeholder="Your Kinyarwanda transcription will appear here...",
lines=6,
show_copy_button=True
)
# Microphone Tab
with gr.TabItem("ποΈ Record Audio"):
with gr.Row():
with gr.Column():
mic_input = gr.Audio(
label="Record Your Voice",
type="numpy"
)
mic_btn = gr.Button("π― Transcribe Recording", variant="primary")
with gr.Column():
mic_output = gr.Textbox(
label="Transcription Result",
placeholder="Your Kinyarwanda transcription will appear here...",
lines=6,
show_copy_button=True
)
# Set up event handlers
upload_btn.click(
fn=transcribe_audio,
inputs=audio_input,
outputs=upload_output,
show_progress=True
)
# Sample audio button handlers
sample_1.click(
fn=lambda: "sample_1.wav",
outputs=audio_input
)
sample_2.click(
fn=lambda: "sample_2.wav",
outputs=audio_input
)
sample_3.click(
fn=lambda: "sample_3.wav",
outputs=audio_input
)
sample_4.click(
fn=lambda: "sample_4.wav",
outputs=audio_input
)
mic_btn.click(
fn=transcribe_microphone,
inputs=mic_input,
outputs=mic_output,
show_progress=True
)
gr.Markdown("---")
gr.Markdown("**Powered by WakandaAI** | Model: [wakanda-whisper-small-rw-v1](https://huggingface.co/WakandaAI/wakanda-whisper-small-rw-v1)")
return interface
# Launch the app
if __name__ == "__main__":
print("π Starting Wakanda Whisper ASR (Mock Mode for Testing)...")
# Create and launch the interface
demo = create_interface()
# Launch configuration - let Gradio find an available port
demo.launch(
server_name="127.0.0.1",
share=False,
show_error=True
)
|