File size: 4,642 Bytes
1437966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55ffd9c
1437966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55ffd9c
1437966
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr
import groq
import io
import numpy as np
import soundfile as sf
import requests

def transcribe_audio(audio, api_key):
    if audio is None:
        return ""
    
    client = groq.Client(api_key=api_key)
    
    # Convert audio to the format expected by the model
    audio_data = audio[1]  # Get the numpy array from the tuple
    buffer = io.BytesIO()
    sf.write(buffer, audio_data, audio[0], format='wav')
    buffer.seek(0)

    try:
        # Use Distil-Whisper English powered by Groq for transcription
        completion = client.audio.transcriptions.create(
            model="distil-whisper-large-v3-en",
            file=("audio.wav", buffer),
            response_format="text"
        )
        return completion
    except Exception as e:
        return f"Error in transcription: {str(e)}"

def generate_response(transcription, api_key):
    if not transcription:
        return "No transcription available. Please try speaking again."
    
    client = groq.Client(api_key=api_key)
    
    try:
        # Use Llama 3 70B powered by Groq for text generation
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": transcription}
            ],
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

def text_to_speech(text, elevenlabs_api_key):
    url = "https://api.elevenlabs.io/v1/text-to-speech"
    headers = {
        "xi-api-key": elevenlabs_api_key,
        "Content-Type": "application/json"
    }
    data = {
        "text": text,
        "voice_settings": {
            "stability": 0.75,
            "similarity_boost": 0.75
        }
    }
    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code == 200:
        return io.BytesIO(response.content)
    else:
        return None

def process_audio(audio, api_key, elevenlabs_api_key):
    if not api_key:
        return "Please enter your Groq API key.", "API key is required.", None
    if not elevenlabs_api_key:
        return "Please enter your Eleven Labs API key.", "API key is required.", None
    
    transcription = transcribe_audio(audio, api_key)
    response = generate_response(transcription, api_key)
    
    # Convert the response text to speech
    speech_audio = text_to_speech(response, elevenlabs_api_key)
    
    return transcription, response, speech_audio

# Custom CSS for the Groq badge and color scheme
custom_css = """
.gradio-container {
    background-color: #f5f5f5;
}
.gr-button-primary {
    background-color: #f55036 !important;
    border-color: #f55036 !important;
}
.gr-button-secondary {
    color: #f55036 !important;
    border-color: #f55036 !important;
}
#groq-badge {
    position: fixed;
    bottom: 20px;
    right: 20px;
    z-index: 1000;
}
"""

with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant")
    
    api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
    elevenlabs_api_key_input = gr.Textbox(type="password", label="Enter your Eleven Labs API Key")
    
    with gr.Row():
        audio_input = gr.Audio(label="Speak!", type="numpy")
    
    with gr.Row():
        transcription_output = gr.Textbox(label="Transcription")
        response_output = gr.Textbox(label="AI Assistant Response")
        speech_output = gr.Audio(label="AI Assistant Response (Speech)")

    submit_button = gr.Button("Process", variant="primary")
    
    # Add the Groq badge
    gr.HTML("""
    <div id="groq-badge">
        <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
    </div>
    """)
    
    submit_button.click(
        process_audio,
        inputs=[audio_input, api_key_input, elevenlabs_api_key_input],
        outputs=[transcription_output, response_output, speech_output]
    )
    
    gr.Markdown("""
    ## How to use this app:
    1. Enter your [Groq API Key](https://console.groq.com/keys) in the provided field.
    2. Enter your [Eleven Labs API Key](https://elevenlabs.io/) in the provided field.
    3. Click on the microphone icon and speak your message! You can also provide a supported audio file.
    4. Click the "Process" button to transcribe your speech and generate a response.
    5. The transcription and AI assistant response will appear in the respective text boxes, and you can listen to the response as speech.
    """)

demo.launch()