Spaces:

pradeepsengarr
/

vlogs

Sleeping

File size: 1,567 Bytes

d44a0d5
926a4ba
d44a0d5
 
 
4da6e73
731531e
d44a0d5
4da6e73
d44a0d5
 
4da6e73
d44a0d5
4da6e73
 
d44a0d5
 
4da6e73
 
 
 
d44a0d5
4da6e73
 
 
d44a0d5
4da6e73
 
 
 
d44a0d5
4da6e73

import whisper
import gradio as gr
from TTS.api import TTS
from groq import Groq

# ✅ Hardcoded Groq API key (for local use only)
GROQ_API_KEY = "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"

# ✅ Initialize Groq API client
client = Groq(api_key=GROQ_API_KEY)

# ✅ Load Whisper model (Speech-to-Text)
whisper_model = whisper.load_model("base")

# ✅ Load Coqui TTS model (Text-to-Speech)
tts = TTS("tts_models/en/ljspeech/speedy-speech")

# ✅ Chatbot logic
def chatbot(audio_file):
    if not audio_file:
        return "No audio provided.", None

    # 1. Transcribe audio to text
    result = whisper_model.transcribe(audio_file)
    user_text = result["text"]

    # 2. Get chatbot response from Groq (🛠️ Fix incorrect model name)
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": user_text}],
        model="llama-3-8b-instruct",  # ✅ Valid model name
    )
    response_text = chat_completion.choices[0].message.content

    # 3. Convert response to speech
    audio_output_path = "output.wav"
    tts.tts_to_file(text=response_text, file_path=audio_output_path)

    return response_text, audio_output_path

# ✅ Gradio UI
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(label="Voice Output")],
    title="🎙️ Voice Chatbot with Whisper + Groq LLaMA-3",
    description="Upload a voice file, and the chatbot will generate a response in both text and voice.",
)

# ✅ Launch
iface.launch()