Spaces:
Runtime error
Runtime error
File size: 4,642 Bytes
1437966 55ffd9c 1437966 55ffd9c 1437966 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import groq
import io
import numpy as np
import soundfile as sf
import requests
def transcribe_audio(audio, api_key):
if audio is None:
return ""
client = groq.Client(api_key=api_key)
# Convert audio to the format expected by the model
audio_data = audio[1] # Get the numpy array from the tuple
buffer = io.BytesIO()
sf.write(buffer, audio_data, audio[0], format='wav')
buffer.seek(0)
try:
# Use Distil-Whisper English powered by Groq for transcription
completion = client.audio.transcriptions.create(
model="distil-whisper-large-v3-en",
file=("audio.wav", buffer),
response_format="text"
)
return completion
except Exception as e:
return f"Error in transcription: {str(e)}"
def generate_response(transcription, api_key):
if not transcription:
return "No transcription available. Please try speaking again."
client = groq.Client(api_key=api_key)
try:
# Use Llama 3 70B powered by Groq for text generation
completion = client.chat.completions.create(
model="llama3-70b-8192",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": transcription}
],
)
return completion.choices[0].message.content
except Exception as e:
return f"Error in response generation: {str(e)}"
def text_to_speech(text, elevenlabs_api_key):
url = "https://api.elevenlabs.io/v1/text-to-speech"
headers = {
"xi-api-key": elevenlabs_api_key,
"Content-Type": "application/json"
}
data = {
"text": text,
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
return io.BytesIO(response.content)
else:
return None
def process_audio(audio, api_key, elevenlabs_api_key):
if not api_key:
return "Please enter your Groq API key.", "API key is required.", None
if not elevenlabs_api_key:
return "Please enter your Eleven Labs API key.", "API key is required.", None
transcription = transcribe_audio(audio, api_key)
response = generate_response(transcription, api_key)
# Convert the response text to speech
speech_audio = text_to_speech(response, elevenlabs_api_key)
return transcription, response, speech_audio
# Custom CSS for the Groq badge and color scheme
custom_css = """
.gradio-container {
background-color: #f5f5f5;
}
.gr-button-primary {
background-color: #f55036 !important;
border-color: #f55036 !important;
}
.gr-button-secondary {
color: #f55036 !important;
border-color: #f55036 !important;
}
#groq-badge {
position: fixed;
bottom: 20px;
right: 20px;
z-index: 1000;
}
"""
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant")
api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
elevenlabs_api_key_input = gr.Textbox(type="password", label="Enter your Eleven Labs API Key")
with gr.Row():
audio_input = gr.Audio(label="Speak!", type="numpy")
with gr.Row():
transcription_output = gr.Textbox(label="Transcription")
response_output = gr.Textbox(label="AI Assistant Response")
speech_output = gr.Audio(label="AI Assistant Response (Speech)")
submit_button = gr.Button("Process", variant="primary")
# Add the Groq badge
gr.HTML("""
<div id="groq-badge">
<div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
</div>
""")
submit_button.click(
process_audio,
inputs=[audio_input, api_key_input, elevenlabs_api_key_input],
outputs=[transcription_output, response_output, speech_output]
)
gr.Markdown("""
## How to use this app:
1. Enter your [Groq API Key](https://console.groq.com/keys) in the provided field.
2. Enter your [Eleven Labs API Key](https://elevenlabs.io/) in the provided field.
3. Click on the microphone icon and speak your message! You can also provide a supported audio file.
4. Click the "Process" button to transcribe your speech and generate a response.
5. The transcription and AI assistant response will appear in the respective text boxes, and you can listen to the response as speech.
""")
demo.launch()
|