Spaces:

Mendoza33
/

test-do-call

Runtime error

App Files Files Community

test-do-call / app.py

Mendoza33

Update app.py

95daf41 verified 7 months ago

raw

history blame

2.08 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	import librosa
	import os

	# Custom imports for Kokoro-82M
	from models import build_model
	from kokoro import generate
	from IPython.display import Audio

	# Load pre-trained models
	stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
	nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")

	# Device setup (use GPU if available, otherwise fallback to CPU)
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# Load Kokoro-82M model and voicepack
	def load_kokoro_model():
	model = build_model('kokoro-v0_19.pth', device) # Adjust with the model path
	voice_name = 'af' # Default voice is a 50-50 mix of Bella & Sarah
	voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device)
	return model, voicepack

	# Load the Kokoro model once when the app starts
	kokoro_model, kokoro_voicepack = load_kokoro_model()

	# Define the function to handle the full workflow
	def conversation(audio):
	# Step 1: Convert speech to text
	audio_input, _ = librosa.load(audio, sr=16000) # Ensure correct audio sample rate
	text = stt_model(audio_input)["text"]

	# Step 2: Generate a response using GPT-2
	response = nlp_model(text, max_length=50)[0]["generated_text"]

	# Step 3: Convert response text to speech using Kokoro-82M
	audio_response, _ = generate(kokoro_model, response, kokoro_voicepack, lang='af') # Using 'af' as language (adjust if needed)

	# Return transcription, AI response, and generated audio
	return text, response, Audio(data=audio_response, rate=24000, autoplay=True)

	# Create Gradio Interface
	interface = gr.Interface(
	fn=conversation,
	inputs=gr.Audio(source="microphone", type="filepath"), # Microphone input for live audio
	outputs=[
	gr.Textbox(label="Transcription"),
	gr.Textbox(label="AI Response"),
	gr.Audio(label="Generated Speech")
	]
	)

	# Launch the app
	interface.launch(share=True) # Set `share=True` if you want to share the app via a link