File size: 2,752 Bytes
f12e919 a08af5b f12e919 82addfa a08af5b fd823ab e1eeb39 9e2c699 a08af5b e1eeb39 a08af5b 9bfa1a3 0955f87 a08af5b 0955f87 a08af5b 9bfa1a3 a08af5b 9bfa1a3 0955f87 fd823ab 82addfa a08af5b 82addfa a08af5b f12e919 0955f87 f12e919 a08af5b fd823ab a08af5b fd823ab e1eeb39 fd823ab a08af5b fd823ab 9e2c699 9bfa1a3 fd823ab a08af5b fd823ab a08af5b fd823ab a08af5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import os
import traceback
# Load Hugging Face API token from environment variables
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if not HUGGINGFACE_TOKEN:
raise ValueError("Missing Hugging Face token. Please set HF_TOKEN in Space settings.")
# Function to transcribe audio
def transcribe_audio(audio):
if audio is None:
return "Error: Please upload or record an audio file."
# Check file size
file_size_mb = os.path.getsize(audio) / (1024 * 1024)
if file_size_mb > 100:
return f"Error: File size ({file_size_mb:.2f}MB) exceeds 100MB limit."
try:
global model
if model is None:
return "Error: Model failed to load."
# Transcribe
result = model(audio)
print("Model output:", result) # Debugging output
# Handle output format
if isinstance(result, dict) and 'text' in result:
return result['text']
elif isinstance(result, str):
return result
else:
return f"Unexpected output from model: {result}"
except Exception as e:
error_str = str(e)
if "3000 mel input features" in error_str:
return "Error: Audio exceeds 30 seconds. Long-form transcription not supported in this configuration."
return f"Error during transcription: {error_str}"
# Load model securely using HF_TOKEN
model = None
try:
model = gr.load(
"models/vinai/PhoWhisper-large",
provider="hf-inference",
api_key=HUGGINGFACE_TOKEN # Pass token securely
)
print("Model loaded successfully!")
except Exception as e:
print("Error loading model:", str(e))
traceback.print_exc()
# Build Gradio UI
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# Inference Provider")
gr.Markdown("This Space showcases the vinai/PhoWhisper-large model, served by the hf-inference API.")
button = gr.LoginButton("Sign in")
with gr.Column():
gr.Markdown("# Audio Transcription with PhoWhisper-large")
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload an audio file or record (max 100MB)"
)
transcription_output = gr.Textbox(
label="Transcription",
lines=10,
placeholder="Transcription or error message will appear here...",
interactive=False
)
submit_btn = gr.Button("Transcribe")
submit_btn.click(
fn=transcribe_audio,
inputs=audio_input,
outputs=transcription_output,
queue=True,
concurrency_limit=2
)
# Launch the app
demo.launch()
|