Spaces:

nguyenductho89
/

vinai-PhoWhisper-large

Sleeping

App Files Files Community

vinai-PhoWhisper-large / app.py

nguyenductho89

Update app.py

82addfa verified 5 months ago

raw

history blame contribute delete

2.75 kB

	import gradio as gr
	import os
	import traceback

	# Load Hugging Face API token from environment variables
	HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")

	if not HUGGINGFACE_TOKEN:
	raise ValueError("Missing Hugging Face token. Please set HF_TOKEN in Space settings.")

	# Function to transcribe audio
	def transcribe_audio(audio):
	if audio is None:
	return "Error: Please upload or record an audio file."

	# Check file size
	file_size_mb = os.path.getsize(audio) / (1024 * 1024)
	if file_size_mb > 100:
	return f"Error: File size ({file_size_mb:.2f}MB) exceeds 100MB limit."

	try:
	global model
	if model is None:
	return "Error: Model failed to load."

	# Transcribe
	result = model(audio)
	print("Model output:", result) # Debugging output

	# Handle output format
	if isinstance(result, dict) and 'text' in result:
	return result['text']
	elif isinstance(result, str):
	return result
	else:
	return f"Unexpected output from model: {result}"

	except Exception as e:
	error_str = str(e)
	if "3000 mel input features" in error_str:
	return "Error: Audio exceeds 30 seconds. Long-form transcription not supported in this configuration."
	return f"Error during transcription: {error_str}"

	# Load model securely using HF_TOKEN
	model = None
	try:
	model = gr.load(
	"models/vinai/PhoWhisper-large",
	provider="hf-inference",
	api_key=HUGGINGFACE_TOKEN # Pass token securely
	)
	print("Model loaded successfully!")
	except Exception as e:
	print("Error loading model:", str(e))
	traceback.print_exc()

	# Build Gradio UI
	with gr.Blocks(fill_height=True) as demo:
	with gr.Sidebar():
	gr.Markdown("# Inference Provider")
	gr.Markdown("This Space showcases the vinai/PhoWhisper-large model, served by the hf-inference API.")
	button = gr.LoginButton("Sign in")

	with gr.Column():
	gr.Markdown("# Audio Transcription with PhoWhisper-large")

	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload an audio file or record (max 100MB)"
	)

	transcription_output = gr.Textbox(
	label="Transcription",
	lines=10,
	placeholder="Transcription or error message will appear here...",
	interactive=False
	)

	submit_btn = gr.Button("Transcribe")

	submit_btn.click(
	fn=transcribe_audio,
	inputs=audio_input,
	outputs=transcription_output,
	queue=True,
	concurrency_limit=2
	)

	# Launch the app
	demo.launch()