arkay22's picture
New file creation
1821112 verified
import gradio as gr
import transformers
import librosa
import numpy as np
# Load the model pipeline
pipe = transformers.pipeline(
model='fixie-ai/ultravox-v0_5-llama-3_1-8b',
trust_remote_code=True
)
def transcribe(audio):
if audio is None:
return "No audio provided."
# Load audio using librosa
audio_array, sr = librosa.load(audio, sr=16000)
# Define initial system prompt
turns = [
{
"role": "system",
"content": "You are a friendly and helpful character. You love to answer questions for people."
},
]
# Run inference
result = pipe(
{'audio': audio_array, 'turns': turns, 'sampling_rate': sr},
max_new_tokens=30
)
# Return result content
return result[0]['content'] if isinstance(result, list) else str(result)
# Build Gradio Interface
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="upload", type="filepath", label="Upload an Audio File"),
outputs=gr.Textbox(label="Ultravox Response"),
title="πŸŽ™οΈ Ultravox AI Voicebot",
description="Upload an audio file and Ultravox will respond intelligently!"
)
# Launch app
if __name__ == "__main__":
demo.launch()