test-do-call / app.py
Mendoza33's picture
Update app.py
95daf41 verified
raw
history blame
2.08 kB
import gradio as gr
from transformers import pipeline
import torch
import librosa
import os
# Custom imports for Kokoro-82M
from models import build_model
from kokoro import generate
from IPython.display import Audio
# Load pre-trained models
stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
# Device setup (use GPU if available, otherwise fallback to CPU)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load Kokoro-82M model and voicepack
def load_kokoro_model():
model = build_model('kokoro-v0_19.pth', device) # Adjust with the model path
voice_name = 'af' # Default voice is a 50-50 mix of Bella & Sarah
voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device)
return model, voicepack
# Load the Kokoro model once when the app starts
kokoro_model, kokoro_voicepack = load_kokoro_model()
# Define the function to handle the full workflow
def conversation(audio):
# Step 1: Convert speech to text
audio_input, _ = librosa.load(audio, sr=16000) # Ensure correct audio sample rate
text = stt_model(audio_input)["text"]
# Step 2: Generate a response using GPT-2
response = nlp_model(text, max_length=50)[0]["generated_text"]
# Step 3: Convert response text to speech using Kokoro-82M
audio_response, _ = generate(kokoro_model, response, kokoro_voicepack, lang='af') # Using 'af' as language (adjust if needed)
# Return transcription, AI response, and generated audio
return text, response, Audio(data=audio_response, rate=24000, autoplay=True)
# Create Gradio Interface
interface = gr.Interface(
fn=conversation,
inputs=gr.Audio(source="microphone", type="filepath"), # Microphone input for live audio
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="AI Response"),
gr.Audio(label="Generated Speech")
]
)
# Launch the app
interface.launch(share=True) # Set `share=True` if you want to share the app via a link