File size: 2,534 Bytes
19ab03c
 
 
 
 
 
 
23a063a
 
 
 
 
85ac7a5
19ab03c
23a063a
9ce69f4
 
 
 
 
19ab03c
 
9ce69f4
 
 
19ab03c
 
 
 
 
 
 
 
23a063a
19ab03c
 
 
 
 
 
 
 
 
 
 
 
 
 
e4852e2
19ab03c
e4852e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19ab03c
 
 
 
e4852e2
 
19ab03c
e4852e2
19ab03c
 
10654d8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import tempfile
import whisper
import gradio as gr
from gtts import gTTS
from groq import Groq

# Load Groq API key from environment variable
groq_api_key = os.getenv('GROQ_API_KEY')
if not groq_api_key:
    raise ValueError("GROQ_API_KEY environment variable is not set.")

groq_client = Groq(api_key=groq_api_key)

# Attempt to load Whisper model
try:
    whisper_model = whisper.load_model("base")
except AttributeError:
    print("Error: The 'whisper' library does not support 'load_model'.")
    whisper_model = None

def process_audio(audio_file):
    if whisper_model is None:
        return "Whisper model could not be loaded.", None

    try:
        # Transcribe audio using Whisper
        result = whisper_model.transcribe(audio_file)
        user_text = result['text']

        # Generate response using Llama 8b model with Groq API
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "user", "content": user_text}
            ],
            model="llama3-8b-8192",
        )
        response_text = chat_completion.choices[0].message.content

        # Convert response text to speech using gTTS
        tts = gTTS(text=response_text, lang='en')
        audio_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3').name
        tts.save(audio_file_path)

        return response_text, audio_file_path
    except Exception as e:
        return str(e), None

# Create Gradio interface with custom CSS
css = """
    .gradio-container {
        background: linear-gradient(135deg, #6e45e2, #88d3ce);
        border-radius: 15px;
        box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
        padding: 20px;
    }
    .gradio-input, .gradio-output {
        border-radius: 10px;
        box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.15);
    }
    .gradio-textbox {
        border: 2px solid #6e45e2;
        background: #fff;
        color: #333;
    }
    .gradio-audio {
        border: 2px solid #88d3ce;
        background: #fff;
    }
    .gradio-button {
        background: #6e45e2;
        color: #fff;
        border: none;
        border-radius: 8px;
        padding: 10px 20px;
        font-size: 16px;
        cursor: pointer;
    }
    .gradio-button:hover {
        background: #5a3d9c;
    }
"""

iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
    live=True,
    css=css
)

if __name__ == "__main__":
    iface.launch()