Spaces:
Sleeping
Sleeping
File size: 17,381 Bytes
8f1aebf fa95df1 bd50dab ccc23c4 8f1aebf 27ccdcb fa95df1 bd50dab 8f1aebf bd50dab 8f1aebf 9512144 1575111 27ccdcb 489e668 27ccdcb 1575111 27ccdcb 1575111 489e668 27ccdcb 489e668 27ccdcb 489e668 27ccdcb 1575111 553e56f 1575111 27ccdcb 1575111 553e56f 625bc4b 553e56f 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 27ccdcb 1575111 8f1aebf 9512144 8f1aebf bd50dab 8f1aebf bd50dab 8f1aebf bd50dab fa95df1 8f1aebf bd50dab 8f1aebf bd50dab 8f1aebf bd50dab 8f1aebf bd50dab 1575111 d4e37c6 8f1aebf d4e37c6 27ccdcb 9254af3 27ccdcb d4e37c6 27ccdcb 9254af3 27ccdcb 1575111 45e169e d4e37c6 a994885 45e169e bd50dab a994885 8f1aebf 45e169e bd50dab 27ccdcb 45e169e 27ccdcb 45e169e 27ccdcb 45e169e 27ccdcb 45e169e 27ccdcb 8f1aebf 9254af3 27ccdcb 45e169e 27ccdcb 8f1aebf 489e668 45e169e 27ccdcb 489e668 27ccdcb 489e668 27ccdcb 489e668 27ccdcb 489e668 d4e37c6 45e169e 8f1aebf d4e37c6 45e169e d4e37c6 bd50dab 1575111 27ccdcb 489e668 a994885 d4e37c6 45e169e d4e37c6 45e169e 9254af3 45e169e d4e37c6 489e668 45e169e 489e668 45e169e 9254af3 45e169e 9254af3 27ccdcb 9254af3 bd50dab 45e169e 27ccdcb d4e37c6 489e668 45e169e d4e37c6 45e169e d4e37c6 45e169e bd50dab e1aa210 5c16342 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
import gradio as gr
import requests
import json
import random
from gradio_client import Client
from dotenv import load_dotenv
import os
import speech_recognition as sr
from pydub import AudioSegment
import re
load_dotenv()
API_KEY = os.getenv("DEEPSEEK_API_KEY")
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
TTS_PASSWORD = os.getenv("TTS_PASSWORD")
if not all([API_KEY, HF_TOKEN, TTS_PASSWORD]):
raise ValueError("Missing required environment variables!")
TTS_CLIENT = Client("KindSynapse/Youssef-Ahmed-Private-Text-To-Speech-Unlimited", hf_token=HF_TOKEN)
recognizer = sr.Recognizer()
MAIN_SYSTEM_PROMPT = {
"role": "system",
"content": """You are Sam, an intelligent and proactive English tutor. You drive the conversation and actively engage students. Your responses must be in JSON format with these keys:
'response': Your main response (keep it conversational and engaging),
'corrections': ALWAYS provide specific grammar or pronunciation corrections with examples (if none needed, say "Great grammar!"),
'vocabulary': ALWAYS suggest alternative words/phrases with explanations (if none needed, suggest related vocabulary),
'level_assessment': Current assessment (beginner/intermediate/advanced),
'encouragement': A motivating comment,
'context_memory': Important details about the user,
'next_question': A follow-up question to keep conversation flowing
IMPORTANT: You MUST always provide corrections and vocabulary suggestions in every response. Even if the student speaks perfectly, provide positive feedback and suggest advanced vocabulary or alternative expressions.
Your personality:
- Be the conversation driver - ask follow-up questions
- Show genuine interest in the student's life
- Provide corrections naturally without stopping the flow
- Use the student's name frequently
- Build on previous topics
- Be encouraging but provide constructive feedback
- Ask about their day, work, hobbies, culture, goals
Correction guidelines:
- ALWAYS provide corrections field - even if it's positive feedback
- ALWAYS provide vocabulary field - suggest alternatives or related words
- Use format: "Instead of 'X', try saying 'Y'"
- Give pronunciation tips when needed
- If no mistakes, say "Excellent grammar!" or "Perfect sentence structure!"
Vocabulary guidelines:
- ALWAYS suggest vocabulary - even if it's synonyms or advanced alternatives
- Provide explanations for suggested words
- Use format: "Instead of 'good', try 'excellent' or 'outstanding'"
- Suggest topic-related vocabulary
Conversation flow:
- Start with personal questions (name, country, job, hobbies)
- Build conversations around their interests
- Use profession-specific vocabulary
- Ask about their culture and experiences
- Keep the conversation natural and flowing
- Always end with a question to continue the dialogue
Response length: Keep responses conversational (2-3 sentences max for response field)."""
}
WELCOME_PROMPT = {
"role": "system",
"content": """Create a heartfelt welcome message that:
1. Introduces you as Sam, an enthusiastic and friendly English tutor whoβs excited to guide them
2. Kindly asks for their name and where theyβre from in a natural conversational way
3. Expresses genuine excitement about helping them grow
Return the message in JSON format with the key 'greeting'.
Make it feel personal, warm, and inviting β like a tutor who truly cares. Keep it within 2 sentences.
Example:
{"greeting": "Hi there! I'm Sam, your friendly English tutor β so glad you're here! What's your name and where are you from?"}
"""
}
class EnglishTutor:
def __init__(self):
self.chat_history = [MAIN_SYSTEM_PROMPT]
self.user_info = {
"name": None,
"level": "beginner",
"interests": [],
"country": None,
"profession": None,
"goals": None
}
def get_welcome_message(self):
try:
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"model": "deepseek-chat",
"messages": [WELCOME_PROMPT],
"temperature": random.uniform(0.5, 1.0),
"response_format": {"type": "json_object"}
}
)
welcome_json = json.loads(response.json()["choices"][0]["message"]["content"])
return welcome_json["greeting"]
except Exception as e:
print(f"Error in welcome message: {str(e)}")
return "Hi! I'm Sam, your English tutor. What's your name and where are you from?"
def get_bot_response(self, user_message):
try:
# Add user context to the message
context_info = f"User info: {self.user_info}"
enhanced_message = f"{user_message}\n\n[Context: {context_info}]"
self.chat_history.append({"role": "user", "content": enhanced_message})
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"model": "deepseek-chat",
"messages": self.chat_history,
"temperature": random.uniform(0.8, 1.0),
"response_format": {"type": "json_object"}
}
)
bot_response = json.loads(response.json()["choices"][0]["message"]["content"])
# Update user info
if "level_assessment" in bot_response:
self.user_info["level"] = bot_response["level_assessment"]
if "context_memory" in bot_response:
self._update_user_info(bot_response["context_memory"])
self.chat_history.append({"role": "assistant", "content": json.dumps(bot_response)})
return bot_response
except Exception as e:
print(f"Error getting bot response: {str(e)}")
return {
"response": "I apologize, but I couldn't process that properly. Could you try again?",
"corrections": "",
"vocabulary": "",
"level_assessment": "beginner",
"encouragement": "Don't worry, let's keep practicing!",
"context_memory": "",
"next_question": "What would you like to talk about?"
}
def _update_user_info(self, context_memory):
if isinstance(context_memory, str):
# Try to extract name if mentioned
if "name" in context_memory.lower():
name_match = re.search(r"name[:\s]+([A-Za-z]+)", context_memory)
if name_match:
self.user_info["name"] = name_match.group(1)
# Try to extract country if mentioned
if "country" in context_memory.lower() or "from" in context_memory.lower():
country_match = re.search(r"(?:from|country)[:\s]+([A-Za-z\s]+)", context_memory)
if country_match:
self.user_info["country"] = country_match.group(1).strip()
elif isinstance(context_memory, dict):
for key in self.user_info:
if key in context_memory:
self.user_info[key] = context_memory[key]
def clean_text_for_tts(self, text):
# Remove emojis and special characters that might cause TTS issues
text = re.sub(r'[π―πβ¨π«π€π€]', '', text)
# Remove extra spaces and newlines
text = re.sub(r'\s+', ' ', text).strip()
# Remove duplicate words at the beginning
words = text.split()
if len(words) > 1 and words[0].lower() == words[1].lower():
text = ' '.join(words[1:])
return text
def convert_audio_to_text(audio_path):
try:
if not audio_path.endswith('.wav'):
audio = AudioSegment.from_file(audio_path)
wav_path = audio_path + '.wav'
audio.export(wav_path, format='wav')
audio_path = wav_path
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
text = recognizer.recognize_google(audio, language='en-US')
return text
except Exception as e:
print(f"Error in speech recognition: {str(e)}")
return None
def text_to_speech(text):
try:
result = TTS_CLIENT.predict(
password=TTS_PASSWORD,
prompt=text,
voice="coral",
emotion="Warm and friendly",
use_random_seed=True,
specific_seed=12345,
api_name="/text_to_speech_app"
)
return result[0] if isinstance(result, (list, tuple)) else result
except Exception as e:
print(f"Error in text to speech: {str(e)}")
return None
tutor = EnglishTutor()
def initialize_chat():
try:
welcome = tutor.get_welcome_message()
clean_welcome = tutor.clean_text_for_tts(welcome)
welcome_audio = text_to_speech(clean_welcome)
history = [{"role": "assistant", "content": welcome}]
return history, welcome_audio, f"π€ Sam: {welcome}", ""
except Exception as e:
print(f"Error initializing chat: {str(e)}")
welcome_msg = "Hi! I'm Sam, your English tutor. What's your name and where are you from?"
history = [{"role": "assistant", "content": welcome_msg}]
return history, None, f"π€ Sam: {welcome_msg}", ""
def process_audio(audio, history, transcript, corrections):
try:
if audio is None:
return history, None, transcript, corrections
user_message = convert_audio_to_text(audio)
if not user_message:
return history, None, transcript, corrections
bot_response = tutor.get_bot_response(user_message)
# Create the main response with follow-up question
main_response = bot_response.get("response", "")
if bot_response.get("next_question"):
main_response += f" {bot_response['next_question']}"
# Add encouragement
if bot_response.get("encouragement"):
main_response += f" {bot_response['encouragement']}"
# Clean text for TTS
clean_response = tutor.clean_text_for_tts(main_response)
audio_response = text_to_speech(clean_response)
# Update chat history
history = history or []
history.append({"role": "user", "content": user_message})
history.append({"role": "assistant", "content": main_response})
# Update transcript
new_transcript = transcript + f"\n\nπ€ You: {user_message}\nπ€ Sam: {main_response}"
# Update corrections and vocabulary with debugging
new_corrections = corrections
correction_parts = []
# Debug: Print the bot response to see what we're getting
print(f"DEBUG - Bot response keys: {bot_response.keys()}")
print(f"DEBUG - Corrections: '{bot_response.get('corrections', 'NOT FOUND')}'")
print(f"DEBUG - Vocabulary: '{bot_response.get('vocabulary', 'NOT FOUND')}'")
print(f"DEBUG - Level: '{bot_response.get('level_assessment', 'NOT FOUND')}'")
# Always show current level
if bot_response.get("level_assessment"):
correction_parts.append(f"π **Current Level:** {bot_response['level_assessment'].title()}")
# Show corrections if available
if bot_response.get("corrections") and str(bot_response["corrections"]).strip() and bot_response["corrections"] != "":
correction_parts.append(f"βοΈ **Grammar Corrections:**\n{bot_response['corrections']}")
# Show vocabulary if available
if bot_response.get("vocabulary") and str(bot_response["vocabulary"]).strip() and bot_response["vocabulary"] != "":
vocab = bot_response['vocabulary']
if isinstance(vocab, dict):
vocab_text = "\n".join([f"β’ '{k}' β '{v}'" for k, v in vocab.items()])
else:
vocab_text = str(vocab)
correction_parts.append(f"π **Vocabulary Suggestions:**\n{vocab_text}")
# Show encouragement
if bot_response.get("encouragement"):
correction_parts.append(f"π‘ **Encouragement:**\n{bot_response['encouragement']}")
# Always show user info if available
if tutor.user_info.get("name"):
info_parts = []
if tutor.user_info.get("name"): info_parts.append(f"Name: {tutor.user_info['name']}")
if tutor.user_info.get("country"): info_parts.append(f"Country: {tutor.user_info['country']}")
if tutor.user_info.get("level"): info_parts.append(f"Level: {tutor.user_info['level']}")
if info_parts:
correction_parts.append(f"π€ **Your Profile:**\n{' | '.join(info_parts)}")
# If still no corrections, show a default message
if not correction_parts:
correction_parts.append("π― **Feedback:** Keep practicing! Sam is analyzing your English and will provide feedback soon.")
# Create the new corrections text
new_correction_text = "\n\n".join(correction_parts)
timestamp = f"[{user_message[:30]}...]" if len(user_message) > 30 else f"[{user_message}]"
if new_corrections:
new_corrections = new_corrections + f"\n\n--- Latest Response {timestamp} ---\n{new_correction_text}"
else:
new_corrections = f"--- Latest Response {timestamp} ---\n{new_correction_text}"
return history, audio_response, new_transcript, new_corrections
except Exception as e:
print(f"Error in process_audio: {str(e)}")
return history, None, transcript, corrections
def submit_recording(audio, history, transcript, corrections):
return process_audio(audio, history, transcript, corrections)
def clear_chat():
global tutor
tutor = EnglishTutor()
return initialize_chat()
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# π English Learning Assistant with Sam")
gr.Markdown("π€ **Record your voice** - Sam will automatically respond when you finish recording and help improve your English!")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
height=500,
show_label=False,
type='messages',
avatar_images=("π€", "π€")
)
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
label="ποΈ Record your voice (auto-submits when finished)",
type="filepath",
show_label=True
)
with gr.Column(scale=1):
audio_output = gr.Audio(
label="π Sam's response",
type="filepath",
show_label=True,
autoplay=True
)
with gr.Column(scale=2):
gr.Markdown("### π Live Transcript")
transcript_display = gr.Textbox(
lines=10,
max_lines=10,
show_label=False,
interactive=False,
placeholder="Your conversation will appear here...",
container=True
)
gr.Markdown("### π Learning Corner")
corrections_display = gr.Textbox(
lines=8,
max_lines=8,
show_label=False,
interactive=False,
placeholder="Grammar corrections, vocabulary suggestions, and level assessment will appear here...",
container=True
)
with gr.Row():
clear_btn = gr.Button("π Start New Conversation", variant="secondary", size="lg")
gr.Markdown("π‘ **Tip**: Sam will actively guide the conversation and provide personalized feedback!")
# Auto-submit when audio is recorded
audio_input.change(
process_audio,
inputs=[audio_input, chatbot, transcript_display, corrections_display],
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
clear_btn.click(
clear_chat,
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
demo.load(
initialize_chat,
outputs=[chatbot, audio_output, transcript_display, corrections_display]
)
if __name__ == "__main__":
demo.launch() |