Abbas0786's picture
Update app.py
e41a473 verified
import streamlit as st
import requests
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
import os
from gtts import gTTS
import speech_recognition as sr
from groq import Groq
import tempfile
# Set up Groq API
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)
def process_text(text):
# Use Groq to generate a response
try:
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content": text}
],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error fetching Groq data: {e}"
def text_to_speech(text, lang='ur'):
# Convert text to speech
tts = gTTS(text=text, lang=lang)
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
return audio_file
def audio_to_text(audio_file):
# Convert audio to text
recognizer = sr.Recognizer()
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(audio_file.read())
temp_file.seek(0)
with sr.AudioFile(temp_file.name) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data, language='ur')
return text
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError as e:
return f"Could not request results; {e}"
# Streamlit UI
st.title("Urdu Voice Assistant")
mode = st.radio("Choose input method", ("Real-time Voice", "Upload Voice File"))
if mode == "Real-time Voice":
st.write("Click the button and start speaking.")
if st.button("Start Recording"):
st.write("Recording... Please wait.")
recognizer = sr.Recognizer()
with sr.Microphone() as source:
audio_data = recognizer.listen(source)
st.write("Processing...")
try:
text = recognizer.recognize_google(audio_data, language='ur')
st.write(f"You said: {text}")
# Get response from Groq
response_text = process_text(text)
st.write(f"Response: {response_text}")
# Convert response to audio
audio_file = text_to_speech(response_text)
st.audio(audio_file, format='audio/mp3')
except sr.UnknownValueError:
st.write("Sorry, could not understand the audio.")
except sr.RequestError as e:
st.write(f"Sorry, there was an error with the request: {e}")
elif mode == "Upload Voice File":
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
if uploaded_file:
st.write("Processing...")
# Convert uploaded file to WAV format if needed
if uploaded_file.type == "audio/mpeg":
audio = AudioSegment.from_mp3(uploaded_file)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
audio.export(temp_file.name, format="wav")
with open(temp_file.name, "rb") as temp_file_content:
text = audio_to_text(temp_file_content)
else:
text = audio_to_text(uploaded_file)
st.write(f"Transcribed Text: {text}")
# Get response from Groq
response_text = process_text(text)
st.write(f"Response: {response_text}")
# Convert response to audio
audio_file = text_to_speech(response_text)
st.audio(audio_file, format='audio/mp3')