File size: 2,752 Bytes
f12e919
a08af5b
 
f12e919
82addfa
 
 
 
 
 
a08af5b
fd823ab
e1eeb39
9e2c699
a08af5b
 
e1eeb39
 
 
a08af5b
9bfa1a3
0955f87
a08af5b
 
 
 
0955f87
a08af5b
 
 
9bfa1a3
 
a08af5b
 
 
 
 
9bfa1a3
0955f87
 
 
 
fd823ab
82addfa
a08af5b
 
 
 
82addfa
 
a08af5b
 
 
 
 
 
 
f12e919
 
 
0955f87
f12e919
a08af5b
fd823ab
 
a08af5b
fd823ab
 
e1eeb39
 
fd823ab
a08af5b
fd823ab
 
 
9e2c699
9bfa1a3
fd823ab
a08af5b
fd823ab
a08af5b
fd823ab
 
 
 
 
 
 
 
a08af5b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import os
import traceback

# Load Hugging Face API token from environment variables
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")

if not HUGGINGFACE_TOKEN:
    raise ValueError("Missing Hugging Face token. Please set HF_TOKEN in Space settings.")

# Function to transcribe audio
def transcribe_audio(audio):
    if audio is None:
        return "Error: Please upload or record an audio file."

    # Check file size
    file_size_mb = os.path.getsize(audio) / (1024 * 1024)
    if file_size_mb > 100:
        return f"Error: File size ({file_size_mb:.2f}MB) exceeds 100MB limit."

    try:
        global model
        if model is None:
            return "Error: Model failed to load."

        # Transcribe
        result = model(audio)
        print("Model output:", result)  # Debugging output

        # Handle output format
        if isinstance(result, dict) and 'text' in result:
            return result['text']
        elif isinstance(result, str):
            return result
        else:
            return f"Unexpected output from model: {result}"

    except Exception as e:
        error_str = str(e)
        if "3000 mel input features" in error_str:
            return "Error: Audio exceeds 30 seconds. Long-form transcription not supported in this configuration."
        return f"Error during transcription: {error_str}"

# Load model securely using HF_TOKEN
model = None
try:
    model = gr.load(
        "models/vinai/PhoWhisper-large",
        provider="hf-inference",
        api_key=HUGGINGFACE_TOKEN  # Pass token securely
    )
    print("Model loaded successfully!")
except Exception as e:
    print("Error loading model:", str(e))
    traceback.print_exc()

# Build Gradio UI
with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# Inference Provider")
        gr.Markdown("This Space showcases the vinai/PhoWhisper-large model, served by the hf-inference API.")
        button = gr.LoginButton("Sign in")

    with gr.Column():
        gr.Markdown("# Audio Transcription with PhoWhisper-large")

        audio_input = gr.Audio(
            sources=["upload", "microphone"],
            type="filepath",
            label="Upload an audio file or record (max 100MB)"
        )

        transcription_output = gr.Textbox(
            label="Transcription",
            lines=10,
            placeholder="Transcription or error message will appear here...",
            interactive=False
        )

        submit_btn = gr.Button("Transcribe")

        submit_btn.click(
            fn=transcribe_audio,
            inputs=audio_input,
            outputs=transcription_output,
            queue=True,
            concurrency_limit=2
        )

# Launch the app
demo.launch()