ibrahim313's picture
Update app.py
23a063a verified
import os
import tempfile
import whisper
import gradio as gr
from gtts import gTTS
from groq import Groq
# Load Groq API key from environment variable
groq_api_key = os.getenv('GROQ_API_KEY')
if not groq_api_key:
raise ValueError("GROQ_API_KEY environment variable is not set.")
groq_client = Groq(api_key=groq_api_key)
# Attempt to load Whisper model
try:
whisper_model = whisper.load_model("base")
except AttributeError:
print("Error: The 'whisper' library does not support 'load_model'.")
whisper_model = None
def process_audio(audio_file):
if whisper_model is None:
return "Whisper model could not be loaded.", None
try:
# Transcribe audio using Whisper
result = whisper_model.transcribe(audio_file)
user_text = result['text']
# Generate response using Llama 8b model with Groq API
chat_completion = groq_client.chat.completions.create(
messages=[
{"role": "user", "content": user_text}
],
model="llama3-8b-8192",
)
response_text = chat_completion.choices[0].message.content
# Convert response text to speech using gTTS
tts = gTTS(text=response_text, lang='en')
audio_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3').name
tts.save(audio_file_path)
return response_text, audio_file_path
except Exception as e:
return str(e), None
# Create Gradio interface with custom CSS
css = """
.gradio-container {
background: linear-gradient(135deg, #6e45e2, #88d3ce);
border-radius: 15px;
box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
padding: 20px;
}
.gradio-input, .gradio-output {
border-radius: 10px;
box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.15);
}
.gradio-textbox {
border: 2px solid #6e45e2;
background: #fff;
color: #333;
}
.gradio-audio {
border: 2px solid #88d3ce;
background: #fff;
}
.gradio-button {
background: #6e45e2;
color: #fff;
border: none;
border-radius: 8px;
padding: 10px 20px;
font-size: 16px;
cursor: pointer;
}
.gradio-button:hover {
background: #5a3d9c;
}
"""
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
live=True,
css=css
)
if __name__ == "__main__":
iface.launch()