# app.py import gradio as gr import wikipedia import numpy as np import tempfile import os import time from datetime import datetime, timedelta from gtts import gTTS from langdetect import detect from pydub import AudioSegment from pydub.silence import split_on_silence import speech_recognition as sr from sentence_transformers import SentenceTransformer from transformers import pipeline import re import torch # --- USER MANAGEMENT SYSTEM --- class UserManager: def __init__(self): self.user_data = {} self.max_warnings = 1 self.block_duration = timedelta(days=30) def get_user_status(self, user_id): if user_id not in self.user_data: return "active" if self.user_data[user_id].get('permanently_banned', False): return "banned" if 'blocked_until' in self.user_data[user_id]: if datetime.now() < self.user_data[user_id]['blocked_until']: return "blocked" del self.user_data[user_id]['blocked_until'] return "active" def add_warning(self, user_id, violation_type): if user_id not in self.user_data: self.user_data[user_id] = {'warnings': 1, 'flags': [violation_type]} else: self.user_data[user_id]['warnings'] += 1 self.user_data[user_id]['flags'].append(violation_type) if self.user_data[user_id]['warnings'] > self.max_warnings: self.user_data[user_id]['blocked_until'] = datetime.now() + self.block_duration return "blocked" return "warned" user_manager = UserManager() # --- MODEL INITIALIZATION --- def load_models(): models = { 'translator': pipeline('translation', model='Helsinki-NLP/opus-mt-mul-en'), 'answer_gen': pipeline('text2text-generation', model='google/flan-t5-base'), 'encoder': SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'), 'toxic-bert': pipeline("text-classification", model="unitary/toxic-bert"), 'roberta-hate': pipeline("text-classification", model="facebook/roberta-hate-speech-dynabench-r4-target") } for lang in ['fr', 'ar', 'zh', 'es']: models[f'en_to_{lang}'] = pipeline(f'translation_en_to_{lang}', model=f'Helsinki-NLP/opus-mt-en-{lang}') return models models = load_models() # --- UNIVERSAL HATE SPEECH DETECTION --- class HateSpeechDetector: def __init__(self): self.keyword_banks = { 'racial': ['nigger', 'chink', 'spic', 'kike', 'gook', 'wetback'], 'gender': ['fag', 'dyke', 'tranny', 'whore', 'slut', 'bitch'], 'violence': ['kill', 'murder', 'harm', 'hurt', 'abuse', 'torture'], 'general': ['scum', 'vermin', 'subhuman', 'untermensch'] } self.patterns = [ (r'\b(all|every)\s\w+\s(should|must)\s(die|burn)', 'group violence'), (r'\b(how to|ways? to)\s(kill|harm|hurt)', 'harm instructions'), (r'[!@#$%^&*]igg[!@#$%^&*]', 'coded racial slur') ] def detect(self, text): text_lower = text.lower() violations = [] # Keyword detection for category, keywords in self.keyword_banks.items(): found = [kw for kw in keywords if kw in text_lower] if found: violations.append(f"{category} terms: {', '.join(found[:3])}") # Pattern detection for pattern, desc in self.patterns: if re.search(pattern, text_lower): violations.append(f"pattern: {desc}") # Model detection try: toxic_result = models['toxic-bert'](text)[0] if toxic_result['label'].lower() in ['toxic', 'hate'] and toxic_result['score'] > 0.7: violations.append(f"toxic-bert: {toxic_result['label']} ({toxic_result['score']:.2f})") hate_result = models['roberta-hate'](text)[0] if hate_result['label'].lower() in ['hate', 'offensive'] and hate_result['score'] > 0.7: violations.append(f"roberta-hate: {hate_result['label']} ({hate_result['score']:.2f})") except Exception as e: print(f"Model error: {e}") return violations if violations else None hate_detector = HateSpeechDetector() # --- RESPONSE GENERATION --- def generate_response(text, topic, lang): try: wikipedia.set_lang('en') try: page = wikipedia.page(topic, auto_suggest=False) context = page.summary[:1000] except wikipedia.exceptions.DisambiguationError as e: page = wikipedia.page(e.options[0]) context = page.summary[:1000] except Exception as e: print(f"Wikipedia error: {e}") return "Could not find information. Please try another topic.", None prompt = f"Context: {context}\nQuestion: {text}\nAnswer:" answer = models['answer_gen'](prompt, max_length=200)[0]['generated_text'] translated = translate(answer, 'en', lang) if lang != 'en' else answer audio_path = text_to_speech(translated, lang) return translated, audio_path # --- WARNING MESSAGES --- def create_warning_message(violations): return gr.HTML(f"""
Powered by Transformers and Gradio
Answers questions using Wikipedia content
Speak or type your questions
English, French, Spanish, Chinese, Arabic
Automated hate speech detection