import gradio as gr import os import traceback # Load Hugging Face API token from environment variables HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN") if not HUGGINGFACE_TOKEN: raise ValueError("Missing Hugging Face token. Please set HF_TOKEN in Space settings.") # Function to transcribe audio def transcribe_audio(audio): if audio is None: return "Error: Please upload or record an audio file." # Check file size file_size_mb = os.path.getsize(audio) / (1024 * 1024) if file_size_mb > 100: return f"Error: File size ({file_size_mb:.2f}MB) exceeds 100MB limit." try: global model if model is None: return "Error: Model failed to load." # Transcribe result = model(audio) print("Model output:", result) # Debugging output # Handle output format if isinstance(result, dict) and 'text' in result: return result['text'] elif isinstance(result, str): return result else: return f"Unexpected output from model: {result}" except Exception as e: error_str = str(e) if "3000 mel input features" in error_str: return "Error: Audio exceeds 30 seconds. Long-form transcription not supported in this configuration." return f"Error during transcription: {error_str}" # Load model securely using HF_TOKEN model = None try: model = gr.load( "models/vinai/PhoWhisper-large", provider="hf-inference", api_key=HUGGINGFACE_TOKEN # Pass token securely ) print("Model loaded successfully!") except Exception as e: print("Error loading model:", str(e)) traceback.print_exc() # Build Gradio UI with gr.Blocks(fill_height=True) as demo: with gr.Sidebar(): gr.Markdown("# Inference Provider") gr.Markdown("This Space showcases the vinai/PhoWhisper-large model, served by the hf-inference API.") button = gr.LoginButton("Sign in") with gr.Column(): gr.Markdown("# Audio Transcription with PhoWhisper-large") audio_input = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload an audio file or record (max 100MB)" ) transcription_output = gr.Textbox( label="Transcription", lines=10, placeholder="Transcription or error message will appear here...", interactive=False ) submit_btn = gr.Button("Transcribe") submit_btn.click( fn=transcribe_audio, inputs=audio_input, outputs=transcription_output, queue=True, concurrency_limit=2 ) # Launch the app demo.launch()