Spaces:
Runtime error
Runtime error
File size: 2,700 Bytes
c336d2f 3e435ed 04cf931 3e435ed c336d2f 3e435ed 04cf931 c336d2f 3e435ed c336d2f 3e435ed c336d2f 3e435ed c336d2f 3e435ed c336d2f 3e435ed c336d2f 04cf931 c336d2f 04cf931 c336d2f 04cf931 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
import torch
import transformers
import librosa
import numpy as np
import os
class UltravoxInterface:
def __init__(self):
"""Initialize with smaller model footprint"""
print("Initializing voice interface...")
# Use smaller whisper model
self.model_name = "openai/whisper-small"
self.pipe = transformers.pipeline(
"automatic-speech-recognition",
model=self.model_name,
torch_dtype=torch.float16,
device="cpu" # Explicitly set to CPU
)
print("Model loaded successfully!")
def process_audio(self, audio_path, custom_prompt=None):
"""Process audio with optimized memory usage"""
try:
if audio_path is None:
return "Please provide an audio input."
# Load audio in chunks to save memory
audio, sr = librosa.load(audio_path, sr=16000, mono=True)
# Process audio in smaller segments if needed
max_length = 30 * sr # 30 seconds chunks
if len(audio) > max_length:
segments = []
for i in range(0, len(audio), max_length):
segment = audio[i:i + max_length]
result = self.pipe(segment, batch_size=1)
segments.append(result["text"])
return " ".join(segments)
# Process shorter audio directly
result = self.pipe(audio, batch_size=1)
return result["text"]
except Exception as e:
return f"Error processing audio: {str(e)}"
def create_interface(self):
"""Create and configure the Gradio interface"""
interface = gr.Interface(
fn=self.process_audio,
inputs=[
gr.Audio(
label="Speak here",
sources=["microphone"],
type="filepath"
)
],
outputs=[
gr.Textbox(
label="Transcription",
lines=5,
placeholder="Transcription will appear here..."
)
],
title="Voice Assistant",
description="Speak into the microphone and get text transcription!",
theme=gr.themes.Soft(primary_hue="orange"),
examples=[[None]],
)
return interface
# Create the interface
app = UltravoxInterface()
interface = app.create_interface()
# Launch the interface - this is crucial for Hugging Face Spaces
interface.launch() |