Spaces:
Sleeping
Sleeping
File size: 1,567 Bytes
d44a0d5 926a4ba d44a0d5 4da6e73 731531e d44a0d5 4da6e73 d44a0d5 4da6e73 d44a0d5 4da6e73 d44a0d5 4da6e73 d44a0d5 4da6e73 d44a0d5 4da6e73 d44a0d5 4da6e73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import whisper
import gradio as gr
from TTS.api import TTS
from groq import Groq
# β
Hardcoded Groq API key (for local use only)
GROQ_API_KEY = "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"
# β
Initialize Groq API client
client = Groq(api_key=GROQ_API_KEY)
# β
Load Whisper model (Speech-to-Text)
whisper_model = whisper.load_model("base")
# β
Load Coqui TTS model (Text-to-Speech)
tts = TTS("tts_models/en/ljspeech/speedy-speech")
# β
Chatbot logic
def chatbot(audio_file):
if not audio_file:
return "No audio provided.", None
# 1. Transcribe audio to text
result = whisper_model.transcribe(audio_file)
user_text = result["text"]
# 2. Get chatbot response from Groq (π οΈ Fix incorrect model name)
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": user_text}],
model="llama-3-8b-instruct", # β
Valid model name
)
response_text = chat_completion.choices[0].message.content
# 3. Convert response to speech
audio_output_path = "output.wav"
tts.tts_to_file(text=response_text, file_path=audio_output_path)
return response_text, audio_output_path
# β
Gradio UI
iface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(label="Voice Output")],
title="ποΈ Voice Chatbot with Whisper + Groq LLaMA-3",
description="Upload a voice file, and the chatbot will generate a response in both text and voice.",
)
# β
Launch
iface.launch()
|