import gradio as gr import requests import json import random from gradio_client import Client from dotenv import load_dotenv import os import speech_recognition as sr from pydub import AudioSegment import re load_dotenv() API_KEY = os.getenv("DEEPSEEK_API_KEY") HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") TTS_PASSWORD = os.getenv("TTS_PASSWORD") if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]): raise ValueError("Missing required environment variables!") TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN) recognizer = sr.Recognizer() MAIN_SYSTEM_PROMPT = { "role": "system", "content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys: 'response': Your main response (keep it conversational and engaging), 'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"), 'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary), 'level_assessment': Current assessment (beginner/intermediate/advanced), 'encouragement': A motivating comment, 'context_memory': Important details about the user, 'next_question': A follow-up question to keep conversation flowing IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions. Your personality: - Be the conversation driver - ask follow-up questions - Show genuine interest in the student's life - Provide corrections naturally without stopping the flow - Use the student's name frequently - Build on previous topics - Be encouraging but provide constructive feedback - Ask about their day, work, hobbies, culture, goals Correction guidelines: - ALWAYS provide corrections field - even if it's positive feedback - ALWAYS provide vocabulary field - suggest alternatives or related words - Use format: "Instead of 'X', try saying 'Y'" - Give pronunciation tips when needed - If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!" Vocabulary guidelines: - ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives - Provide explanations for suggested words - Use format: "Instead of 'good', try 'excellent' or 'outstanding'" - Suggest topic-related vocabulary Conversation flow: - Start with personal questions (name, country, job, hobbies) - Build conversations around their interests - Use profession-specific vocabulary - Ask about their culture and experiences - Keep the conversation natural and flowing - Always end with a question to continue the dialogue Response length: Keep responses conversational (2-3 sentences max for response field).""" } WELCOME_PROMPT = { "role": "system", "content": """Create a heartfelt welcome message that: 1. Introduces you as Sam, an enthusiastic and friendly English tutor who’s excited to guide them 2. Kindly asks for their name and where they’re from in a natural conversational way 3. Expresses genuine excitement about helping them grow Return the message in JSON format with the key 'greeting'. Make it feel personal, warm, and inviting β€” like a tutor who truly cares. Keep it within 2 sentences. Example: {"greeting": "Hi there! I'm Sam, your friendly English tutor β€” so glad you're here! What's your name and where are you from?"} """ } class EnglishTutor: def __init__(self): self.chat_history = [MAIN_SYSTEM_PROMPT] self.user_info = { "name": None, "level": "beginner", "interests": [], "country": None, "profession": None, "goals": None } def get_welcome_message(self): try: response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers={"Authorization": f"Bearer {API_KEY}"}, json={ "model": "deepseek-chat", "messages": [WELCOME_PROMPT], "temperature": random.uniform(0.5, 1.0), "response_format": {"type": "json_object"} } ) welcome_json = json.loads(response.json()["choices"][0]["message"]["content"]) return welcome_json["greeting"] except Exception as e: print(f"Error in welcome message: {str(e)}") return "Hi! I'm Sam, your English tutor. What's your name and where are you from?" def get_bot_response(self, user_message): try: # Add user context to the message context_info = f"User info: {self.user_info}" enhanced_message = f"{user_message}\n\n[Context: {context_info}]" self.chat_history.append({"role": "user", "content": enhanced_message}) response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers={"Authorization": f"Bearer {API_KEY}"}, json={ "model": "deepseek-chat", "messages": self.chat_history, "temperature": random.uniform(0.8, 1.0), "response_format": {"type": "json_object"} } ) bot_response = json.loads(response.json()["choices"][0]["message"]["content"]) # Update user info if "level_assessment" in bot_response: self.user_info["level"] = bot_response["level_assessment"] if "context_memory" in bot_response: self._update_user_info(bot_response["context_memory"]) self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)}) return bot_response except Exception as e: print(f"Error getting bot response: {str(e)}") return { "response": "I apologize, but I couldn't process that properly. Could you try again?", "corrections": "", "vocabulary": "", "level_assessment": "beginner", "encouragement": "Don't worry, let's keep practicing!", "context_memory": "", "next_question": "What would you like to talk about?" } def _update_user_info(self, context_memory): if isinstance(context_memory, str): # Try to extract name if mentioned if "name" in context_memory.lower(): name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory) if name_match: self.user_info["name"] = name_match.group(1) # Try to extract country if mentioned if "country" in context_memory.lower() or "from" in context_memory.lower(): country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory) if country_match: self.user_info["country"] = country_match.group(1).strip() elif isinstance(context_memory, dict): for key in self.user_info: if key in context_memory: self.user_info[key] = context_memory[key] def clean_text_for_tts(self, text): # Remove emojis and special characters that might cause TTS issues text = re.sub(r'[πŸŽ―πŸŒŸβœ¨πŸ’«πŸŽ€πŸ€–]', '', text) # Remove extra spaces and newlines text = re.sub(r'\s+', ' ', text).strip() # Remove duplicate words at the beginning words = text.split() if len(words) > 1 and words[0].lower() == words[1].lower(): text = ' '.join(words[1:]) return text def convert_audio_to_text(audio_path): try: if not audio_path.endswith('.wav'): audio = AudioSegment.from_file(audio_path) wav_path = audio_path + '.wav' audio.export(wav_path, format='wav') audio_path = wav_path with sr.AudioFile(audio_path) as source: audio = recognizer.record(source) text = recognizer.recognize_google(audio, language='en-US') return text except Exception as e: print(f"Error in speech recognition: {str(e)}") return None def text_to_speech(text): try: result = TTS_CLIENT.predict( password=TTS_PASSWORD, prompt=text, voice="coral", emotion="Warm and friendly", use_random_seed=True, specific_seed=12345, api_name="/text_to_speech_app" ) return result[0] if isinstance(result, (list, tuple)) else result except Exception as e: print(f"Error in text to speech: {str(e)}") return None tutor = EnglishTutor() def initialize_chat(): try: welcome = tutor.get_welcome_message() clean_welcome = tutor.clean_text_for_tts(welcome) welcome_audio = text_to_speech(clean_welcome) history = [{"role": "assistant", "content": welcome}] return history, welcome_audio, f"πŸ€– Sam: {welcome}", "" except Exception as e: print(f"Error initializing chat: {str(e)}") welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?" history = [{"role": "assistant", "content": welcome_msg}] return history, None, f"πŸ€– Sam: {welcome_msg}", "" def process_audio(audio, history, transcript, corrections): try: if audio is None: return history, None, transcript, corrections user_message = convert_audio_to_text(audio) if not user_message: return history, None, transcript, corrections bot_response = tutor.get_bot_response(user_message) # Create the main response with follow-up question main_response = bot_response.get("response", "") if bot_response.get("next_question"): main_response += f" {bot_response['next_question']}" # Add encouragement if bot_response.get("encouragement"): main_response += f" {bot_response['encouragement']}" # Clean text for TTS clean_response = tutor.clean_text_for_tts(main_response) audio_response = text_to_speech(clean_response) # Update chat history history = history or [] history.append({"role": "user", "content": user_message}) history.append({"role": "assistant", "content": main_response}) # Update transcript new_transcript = transcript + f"\n\n🎀 You: {user_message}\nπŸ€– Sam: {main_response}" # Update corrections and vocabulary with debugging new_corrections = corrections correction_parts = [] # Debug: Print the bot response to see what we're getting print(f"DEBUG - Bot response keys: {bot_response.keys()}") print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'") print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'") print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'") # Always show current level if bot_response.get("level_assessment"): correction_parts.append(f"πŸ“Š **Current Level:** {bot_response['level_assessment'].title()}") # Show corrections if available if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "": correction_parts.append(f"✍️ **Grammar Corrections:**\n{bot_response['corrections']}") # Show vocabulary if available if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "": vocab = bot_response['vocabulary'] if isinstance(vocab, dict): vocab_text = "\n".join([f"β€’ '{k}' β†’ '{v}'" for k, v in vocab.items()]) else: vocab_text = str(vocab) correction_parts.append(f"πŸ“š **Vocabulary Suggestions:**\n{vocab_text}") # Show encouragement if bot_response.get("encouragement"): correction_parts.append(f"πŸ’‘ **Encouragement:**\n{bot_response['encouragement']}") # Always show user info if available if tutor.user_info.get("name"): info_parts = [] if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}") if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}") if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}") if info_parts: correction_parts.append(f"πŸ‘€ **Your Profile:**\n{' | '.join(info_parts)}") # If still no corrections, show a default message if not correction_parts: correction_parts.append("🎯 **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.") # Create the new corrections text new_correction_text = "\n\n".join(correction_parts) timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]" if new_corrections: new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}" else: new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}" return history, audio_response, new_transcript, new_corrections except Exception as e: print(f"Error in process_audio: {str(e)}") return history, None, transcript, corrections def submit_recording(audio, history, transcript, corrections): return process_audio(audio, history, transcript, corrections) def clear_chat(): global tutor tutor = EnglishTutor() return initialize_chat() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# πŸŽ“ English Learning Assistant with Sam") gr.Markdown("🎀 **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( height=500, show_label=False, type='messages', avatar_images=("🎀", "πŸ€–") ) with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( label="πŸŽ™οΈ Record your voice (auto-submits when finished)", type="filepath", show_label=True ) with gr.Column(scale=1): audio_output = gr.Audio( label="πŸ”Š Sam's response", type="filepath", show_label=True, autoplay=True ) with gr.Column(scale=2): gr.Markdown("### πŸ“ Live Transcript") transcript_display = gr.Textbox( lines=10, max_lines=10, show_label=False, interactive=False, placeholder="Your conversation will appear here...", container=True ) gr.Markdown("### πŸ“š Learning Corner") corrections_display = gr.Textbox( lines=8, max_lines=8, show_label=False, interactive=False, placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...", container=True ) with gr.Row(): clear_btn = gr.Button("πŸ”„ Start New Conversation", variant="secondary", size="lg") gr.Markdown("πŸ’‘ **Tip**: Sam will actively guide the conversation and provide personalized feedback!") # Auto-submit when audio is recorded audio_input.change( process_audio, inputs=[audio_input, chatbot, transcript_display, corrections_display], outputs=[chatbot, audio_output, transcript_display, corrections_display] ) clear_btn.click( clear_chat, outputs=[chatbot, audio_output, transcript_display, corrections_display] ) demo.load( initialize_chat, outputs=[chatbot, audio_output, transcript_display, corrections_display] ) if __name__ == "__main__": demo.launch()