Spaces:

YoussefA7med
/

ChatBot_English_Helper

Sleeping

File size: 17,381 Bytes

8f1aebf
fa95df1
 
 
bd50dab
 
 
ccc23c4
8f1aebf
27ccdcb
fa95df1
bd50dab
 
 
 
 
 
8f1aebf
 
bd50dab
8f1aebf
 
9512144
1575111
 
27ccdcb
 
489e668
 
27ccdcb
1575111
27ccdcb
 
1575111
489e668
 
27ccdcb
 
 
 
 
 
 
 
 
 
489e668
 
27ccdcb
489e668
 
 
 
 
 
 
 
27ccdcb
 
 
 
 
 
 
 
 
 
1575111
 
553e56f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1575111
 
 
 
 
27ccdcb
1575111
 
 
 
 
 
 
553e56f
 
 
 
 
 
 
625bc4b
553e56f
 
 
 
 
 
 
 
1575111
 
 
27ccdcb
 
 
 
 
1575111
 
 
 
 
 
 
27ccdcb
1575111
 
 
 
 
 
27ccdcb
1575111
 
 
 
 
 
 
27ccdcb
1575111
 
27ccdcb
 
 
 
 
 
 
 
 
1575111
 
27ccdcb
 
 
 
 
 
 
 
 
 
 
 
 
 
1575111
 
 
 
27ccdcb
 
 
1575111
27ccdcb
 
1575111
27ccdcb
 
 
 
 
 
1575111
8f1aebf
9512144
8f1aebf
 
 
 
 
 
bd50dab
 
8f1aebf
 
bd50dab
8f1aebf
bd50dab
fa95df1
8f1aebf
 
 
bd50dab
8f1aebf
 
 
bd50dab
8f1aebf
bd50dab
 
8f1aebf
 
 
 
bd50dab
1575111
 
d4e37c6
8f1aebf
d4e37c6
27ccdcb
 
9254af3
27ccdcb
d4e37c6
 
27ccdcb
9254af3
27ccdcb
1575111
45e169e
d4e37c6
a994885
45e169e
bd50dab
a994885
8f1aebf
45e169e
bd50dab
27ccdcb
45e169e
27ccdcb
 
 
 
45e169e
27ccdcb
 
 
45e169e
27ccdcb
 
 
45e169e
27ccdcb
8f1aebf
9254af3
27ccdcb
45e169e
27ccdcb
 
8f1aebf
489e668
45e169e
27ccdcb
 
489e668
 
 
 
 
 
 
 
 
 
 
 
27ccdcb
 
489e668
 
27ccdcb
 
 
 
 
 
 
489e668
 
 
 
 
 
 
 
 
 
 
 
27ccdcb
489e668
 
 
 
 
 
 
 
 
 
 
 
d4e37c6
45e169e
8f1aebf
d4e37c6
45e169e
 
 
 
d4e37c6
 
 
 
 
bd50dab
1575111
27ccdcb
489e668
a994885
d4e37c6
45e169e
d4e37c6
45e169e
 
9254af3
45e169e
d4e37c6
 
 
489e668
45e169e
489e668
45e169e
 
 
 
 
 
 
 
 
 
 
 
9254af3
 
 
 
 
 
 
 
 
45e169e
9254af3
 
 
 
 
 
27ccdcb
9254af3
 
bd50dab
 
45e169e
27ccdcb
d4e37c6
489e668
 
 
45e169e
 
d4e37c6
 
 
 
45e169e
d4e37c6
 
 
 
45e169e
bd50dab
e1aa210
 
5c16342

import gradio as gr
import requests
import json
import random
from gradio_client import Client
from dotenv import load_dotenv
import os
import speech_recognition as sr
from pydub import AudioSegment
import re

load_dotenv()

API_KEY = os.getenv("DEEPSEEK_API_KEY")
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
TTS_PASSWORD = os.getenv("TTS_PASSWORD")

if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
    raise ValueError("Missing required environment variables!")

TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
recognizer = sr.Recognizer()

MAIN_SYSTEM_PROMPT = {
    "role": "system",
    "content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys:
        'response': Your main response (keep it conversational and engaging),
        'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"),
        'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary),
        'level_assessment': Current assessment (beginner/intermediate/advanced),
        'encouragement': A motivating comment,
        'context_memory': Important details about the user,
        'next_question': A follow-up question to keep conversation flowing
        
        IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions.
        
        Your personality:
        - Be the conversation driver - ask follow-up questions
        - Show genuine interest in the student's life
        - Provide corrections naturally without stopping the flow
        - Use the student's name frequently
        - Build on previous topics
        - Be encouraging but provide constructive feedback
        - Ask about their day, work, hobbies, culture, goals
        
        Correction guidelines:
        - ALWAYS provide corrections field - even if it's positive feedback
        - ALWAYS provide vocabulary field - suggest alternatives or related words
        - Use format: "Instead of 'X', try saying 'Y'"
        - Give pronunciation tips when needed
        - If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!"
        
        Vocabulary guidelines:
        - ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives
        - Provide explanations for suggested words
        - Use format: "Instead of 'good', try 'excellent' or 'outstanding'"
        - Suggest topic-related vocabulary
        
        Conversation flow:
        - Start with personal questions (name, country, job, hobbies)
        - Build conversations around their interests
        - Use profession-specific vocabulary
        - Ask about their culture and experiences
        - Keep the conversation natural and flowing
        - Always end with a question to continue the dialogue
        
        Response length: Keep responses conversational (2-3 sentences max for response field)."""
}

WELCOME_PROMPT = {
    "role": "system",
    "content": """Create a heartfelt welcome message that:
        1. Introduces you as Sam, an enthusiastic and friendly English tutor who’s excited to guide them
        2. Kindly asks for their name and where they’re from in a natural conversational way
        3. Expresses genuine excitement about helping them grow
        
        Return the message in JSON format with the key 'greeting'.
        Make it feel personal, warm, and inviting — like a tutor who truly cares. Keep it within 2 sentences.
        
        Example: 
        {"greeting": "Hi there! I'm Sam, your friendly English tutor — so glad you're here! What's your name and where are you from?"}
        """
}


class EnglishTutor:
    def __init__(self):
        self.chat_history = [MAIN_SYSTEM_PROMPT]
        self.user_info = {
            "name": None,
            "level": "beginner",
            "interests": [],
            "country": None,
            "profession": None,
            "goals": None
        }

    def get_welcome_message(self):
        try:
            response = requests.post(
                "https://api.deepseek.com/v1/chat/completions",
                headers={"Authorization": f"Bearer {API_KEY}"},
                json={
                    "model": "deepseek-chat",
                    "messages": [WELCOME_PROMPT],
                    "temperature": random.uniform(0.5, 1.0),
                    "response_format": {"type": "json_object"}
                }
            )
            welcome_json = json.loads(response.json()["choices"][0]["message"]["content"])
            return welcome_json["greeting"]
        except Exception as e:
            print(f"Error in welcome message: {str(e)}")
            return "Hi! I'm Sam, your English tutor. What's your name and where are you from?"

    def get_bot_response(self, user_message):
        try:
            # Add user context to the message
            context_info = f"User info: {self.user_info}"
            enhanced_message = f"{user_message}\n\n[Context: {context_info}]"
            
            self.chat_history.append({"role": "user", "content": enhanced_message})
            
            response = requests.post(
                "https://api.deepseek.com/v1/chat/completions",
                headers={"Authorization": f"Bearer {API_KEY}"},
                json={
                    "model": "deepseek-chat",
                    "messages": self.chat_history,
                    "temperature": random.uniform(0.8, 1.0),
                    "response_format": {"type": "json_object"}
                }
            )
            
            bot_response = json.loads(response.json()["choices"][0]["message"]["content"])
            
            # Update user info
            if "level_assessment" in bot_response:
                self.user_info["level"] = bot_response["level_assessment"]
            if "context_memory" in bot_response:
                self._update_user_info(bot_response["context_memory"])
            
            self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)})
            
            return bot_response
        except Exception as e:
            print(f"Error getting bot response: {str(e)}")
            return {
                "response": "I apologize, but I couldn't process that properly. Could you try again?",
                "corrections": "",
                "vocabulary": "",
                "level_assessment": "beginner",
                "encouragement": "Don't worry, let's keep practicing!",
                "context_memory": "",
                "next_question": "What would you like to talk about?"
            }

    def _update_user_info(self, context_memory):
        if isinstance(context_memory, str):
            # Try to extract name if mentioned
            if "name" in context_memory.lower():
                name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory)
                if name_match:
                    self.user_info["name"] = name_match.group(1)
            
            # Try to extract country if mentioned
            if "country" in context_memory.lower() or "from" in context_memory.lower():
                country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory)
                if country_match:
                    self.user_info["country"] = country_match.group(1).strip()
        
        elif isinstance(context_memory, dict):
            for key in self.user_info:
                if key in context_memory:
                    self.user_info[key] = context_memory[key]

    def clean_text_for_tts(self, text):
        # Remove emojis and special characters that might cause TTS issues
        text = re.sub(r'[🎯🌟✨💫🎤🤖]', '', text)
        
        # Remove extra spaces and newlines
        text = re.sub(r'\s+', ' ', text).strip()
        
        # Remove duplicate words at the beginning
        words = text.split()
        if len(words) > 1 and words[0].lower() == words[1].lower():
            text = ' '.join(words[1:])
        
        return text

def convert_audio_to_text(audio_path):
    try:
        if not audio_path.endswith('.wav'):
            audio = AudioSegment.from_file(audio_path)
            wav_path = audio_path + '.wav'
            audio.export(wav_path, format='wav')
            audio_path = wav_path

        with sr.AudioFile(audio_path) as source:
            audio = recognizer.record(source)
            text = recognizer.recognize_google(audio, language='en-US')
            return text
    except Exception as e:
        print(f"Error in speech recognition: {str(e)}")
        return None

def text_to_speech(text):
    try:
        result = TTS_CLIENT.predict(
            password=TTS_PASSWORD,
            prompt=text,
            voice="coral",
            emotion="Warm and friendly",
            use_random_seed=True,
            specific_seed=12345,
            api_name="/text_to_speech_app"
        )
        return result[0] if isinstance(result, (list, tuple)) else result
    except Exception as e:
        print(f"Error in text to speech: {str(e)}")
        return None

tutor = EnglishTutor()

def initialize_chat():
    try:
        welcome = tutor.get_welcome_message()
        clean_welcome = tutor.clean_text_for_tts(welcome)
        welcome_audio = text_to_speech(clean_welcome)
        history = [{"role": "assistant", "content": welcome}]
        return history, welcome_audio, f"🤖 Sam: {welcome}", ""
    except Exception as e:
        print(f"Error initializing chat: {str(e)}")
        welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?"
        history = [{"role": "assistant", "content": welcome_msg}]
        return history, None, f"🤖 Sam: {welcome_msg}", ""

def process_audio(audio, history, transcript, corrections):
    try:
        if audio is None:
            return history, None, transcript, corrections

        user_message = convert_audio_to_text(audio)
        if not user_message:
            return history, None, transcript, corrections

        bot_response = tutor.get_bot_response(user_message)
        
        # Create the main response with follow-up question
        main_response = bot_response.get("response", "")
        if bot_response.get("next_question"):
            main_response += f" {bot_response['next_question']}"
        
        # Add encouragement
        if bot_response.get("encouragement"):
            main_response += f" {bot_response['encouragement']}"
        
        # Clean text for TTS
        clean_response = tutor.clean_text_for_tts(main_response)
        audio_response = text_to_speech(clean_response)
        
        # Update chat history
        history = history or []
        history.append({"role": "user", "content": user_message})
        history.append({"role": "assistant", "content": main_response})
        
        # Update transcript
        new_transcript = transcript + f"\n\n🎤 You: {user_message}\n🤖 Sam: {main_response}"
        
        # Update corrections and vocabulary with debugging
        new_corrections = corrections
        correction_parts = []
        
        # Debug: Print the bot response to see what we're getting
        print(f"DEBUG - Bot response keys: {bot_response.keys()}")
        print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'")
        print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'")
        print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'")
        
        # Always show current level
        if bot_response.get("level_assessment"):
            correction_parts.append(f"📊 **Current Level:** {bot_response['level_assessment'].title()}")
        
        # Show corrections if available
        if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "":
            correction_parts.append(f"✍️ **Grammar Corrections:**\n{bot_response['corrections']}")
        
        # Show vocabulary if available
        if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "":
            vocab = bot_response['vocabulary']
            if isinstance(vocab, dict):
                vocab_text = "\n".join([f"• '{k}' → '{v}'" for k, v in vocab.items()])
            else:
                vocab_text = str(vocab)
            correction_parts.append(f"📚 **Vocabulary Suggestions:**\n{vocab_text}")
        
        # Show encouragement
        if bot_response.get("encouragement"):
            correction_parts.append(f"💡 **Encouragement:**\n{bot_response['encouragement']}")
        
        # Always show user info if available
        if tutor.user_info.get("name"):
            info_parts = []
            if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}")
            if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}")
            if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}")
            if info_parts:
                correction_parts.append(f"👤 **Your Profile:**\n{' | '.join(info_parts)}")
        
        # If still no corrections, show a default message
        if not correction_parts:
            correction_parts.append("🎯 **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.")
        
        # Create the new corrections text
        new_correction_text = "\n\n".join(correction_parts)
        timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]"
        
        if new_corrections:
            new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}"
        else:
            new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}"
        
        return history, audio_response, new_transcript, new_corrections
    except Exception as e:
        print(f"Error in process_audio: {str(e)}")
        return history, None, transcript, corrections

def submit_recording(audio, history, transcript, corrections):
    return process_audio(audio, history, transcript, corrections)

def clear_chat():
    global tutor
    tutor = EnglishTutor()
    return initialize_chat()

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎓 English Learning Assistant with Sam")
    gr.Markdown("🎤 **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!")
    
    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                height=500,
                show_label=False,
                type='messages',
                avatar_images=("🎤", "🤖")
            )
            
            with gr.Row():
                with gr.Column(scale=1):
                    audio_input = gr.Audio(
                        label="🎙️ Record your voice (auto-submits when finished)",
                        type="filepath",
                        show_label=True
                    )
                with gr.Column(scale=1):
                    audio_output = gr.Audio(
                        label="🔊 Sam's response",
                        type="filepath",
                        show_label=True,
                        autoplay=True
                    )
        
        with gr.Column(scale=2):
            gr.Markdown("### 📝 Live Transcript")
            transcript_display = gr.Textbox(
                lines=10,
                max_lines=10,
                show_label=False,
                interactive=False,
                placeholder="Your conversation will appear here...",
                container=True
            )
            
            gr.Markdown("### 📚 Learning Corner")
            corrections_display = gr.Textbox(
                lines=8,
                max_lines=8,
                show_label=False,
                interactive=False,
                placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...",
                container=True
            )
    
    with gr.Row():
        clear_btn = gr.Button("🔄 Start New Conversation", variant="secondary", size="lg")
        gr.Markdown("💡 **Tip**: Sam will actively guide the conversation and provide personalized feedback!")
    
    # Auto-submit when audio is recorded
    audio_input.change(
        process_audio,
        inputs=[audio_input, chatbot, transcript_display, corrections_display],
        outputs=[chatbot, audio_output, transcript_display, corrections_display]
    )
    
    clear_btn.click(
        clear_chat,
        outputs=[chatbot, audio_output, transcript_display, corrections_display]
    )
    
    demo.load(
        initialize_chat,
        outputs=[chatbot, audio_output, transcript_display, corrections_display]
    )

if __name__ == "__main__":
    demo.launch()