import whisper import gradio as gr from TTS.api import TTS from groq import Groq # ✅ Hardcoded Groq API key (for local use only) GROQ_API_KEY = "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" # ✅ Initialize Groq API client client = Groq(api_key=GROQ_API_KEY) # ✅ Load Whisper model (Speech-to-Text) whisper_model = whisper.load_model("base") # ✅ Load Coqui TTS model (Text-to-Speech) tts = TTS("tts_models/en/ljspeech/speedy-speech") # ✅ Chatbot logic def chatbot(audio_file): if not audio_file: return "No audio provided.", None # 1. Transcribe audio to text result = whisper_model.transcribe(audio_file) user_text = result["text"] # 2. Get chatbot response from Groq (🛠️ Fix incorrect model name) chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_text}], model="llama-3-8b-instruct", # ✅ Valid model name ) response_text = chat_completion.choices[0].message.content # 3. Convert response to speech audio_output_path = "output.wav" tts.tts_to_file(text=response_text, file_path=audio_output_path) return response_text, audio_output_path # ✅ Gradio UI iface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(label="Voice Output")], title="🎙️ Voice Chatbot with Whisper + Groq LLaMA-3", description="Upload a voice file, and the chatbot will generate a response in both text and voice.", ) # ✅ Launch iface.launch()