File size: 1,567 Bytes
d44a0d5
926a4ba
d44a0d5
 
 
4da6e73
731531e
d44a0d5
4da6e73
d44a0d5
 
4da6e73
d44a0d5
4da6e73
 
d44a0d5
 
4da6e73
 
 
 
d44a0d5
4da6e73
 
 
d44a0d5
4da6e73
 
 
 
d44a0d5
4da6e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import whisper
import gradio as gr
from TTS.api import TTS
from groq import Groq

# βœ… Hardcoded Groq API key (for local use only)
GROQ_API_KEY = "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"

# βœ… Initialize Groq API client
client = Groq(api_key=GROQ_API_KEY)

# βœ… Load Whisper model (Speech-to-Text)
whisper_model = whisper.load_model("base")

# βœ… Load Coqui TTS model (Text-to-Speech)
tts = TTS("tts_models/en/ljspeech/speedy-speech")

# βœ… Chatbot logic
def chatbot(audio_file):
    if not audio_file:
        return "No audio provided.", None

    # 1. Transcribe audio to text
    result = whisper_model.transcribe(audio_file)
    user_text = result["text"]

    # 2. Get chatbot response from Groq (πŸ› οΈ Fix incorrect model name)
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": user_text}],
        model="llama-3-8b-instruct",  # βœ… Valid model name
    )
    response_text = chat_completion.choices[0].message.content

    # 3. Convert response to speech
    audio_output_path = "output.wav"
    tts.tts_to_file(text=response_text, file_path=audio_output_path)

    return response_text, audio_output_path

# βœ… Gradio UI
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(label="Voice Output")],
    title="πŸŽ™οΈ Voice Chatbot with Whisper + Groq LLaMA-3",
    description="Upload a voice file, and the chatbot will generate a response in both text and voice.",
)

# βœ… Launch
iface.launch()