Spaces:
Sleeping
Sleeping
File size: 2,534 Bytes
19ab03c 23a063a 85ac7a5 19ab03c 23a063a 9ce69f4 19ab03c 9ce69f4 19ab03c 23a063a 19ab03c e4852e2 19ab03c e4852e2 19ab03c e4852e2 19ab03c e4852e2 19ab03c 10654d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import tempfile
import whisper
import gradio as gr
from gtts import gTTS
from groq import Groq
# Load Groq API key from environment variable
groq_api_key = os.getenv('GROQ_API_KEY')
if not groq_api_key:
raise ValueError("GROQ_API_KEY environment variable is not set.")
groq_client = Groq(api_key=groq_api_key)
# Attempt to load Whisper model
try:
whisper_model = whisper.load_model("base")
except AttributeError:
print("Error: The 'whisper' library does not support 'load_model'.")
whisper_model = None
def process_audio(audio_file):
if whisper_model is None:
return "Whisper model could not be loaded.", None
try:
# Transcribe audio using Whisper
result = whisper_model.transcribe(audio_file)
user_text = result['text']
# Generate response using Llama 8b model with Groq API
chat_completion = groq_client.chat.completions.create(
messages=[
{"role": "user", "content": user_text}
],
model="llama3-8b-8192",
)
response_text = chat_completion.choices[0].message.content
# Convert response text to speech using gTTS
tts = gTTS(text=response_text, lang='en')
audio_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3').name
tts.save(audio_file_path)
return response_text, audio_file_path
except Exception as e:
return str(e), None
# Create Gradio interface with custom CSS
css = """
.gradio-container {
background: linear-gradient(135deg, #6e45e2, #88d3ce);
border-radius: 15px;
box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
padding: 20px;
}
.gradio-input, .gradio-output {
border-radius: 10px;
box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.15);
}
.gradio-textbox {
border: 2px solid #6e45e2;
background: #fff;
color: #333;
}
.gradio-audio {
border: 2px solid #88d3ce;
background: #fff;
}
.gradio-button {
background: #6e45e2;
color: #fff;
border: none;
border-radius: 8px;
padding: 10px 20px;
font-size: 16px;
cursor: pointer;
}
.gradio-button:hover {
background: #5a3d9c;
}
"""
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
live=True,
css=css
)
if __name__ == "__main__":
iface.launch()
|