import gradio as gr import pdfplumber, docx, sqlite3, os, random, tempfile, shutil from datetime import datetime import pandas as pd from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from duckduckgo_search import DDGS from fpdf import FPDF # ----------------------------- # CONFIG # ----------------------------- DB_NAME = "db.sqlite3" USERNAME = "aixbi" PASSWORD = "aixbi@123" MAX_SENTENCES_CHECK = 10 LOGO_PATH = "aixbi.jpg" # Place your logo file here # ----------------------------- # DB INIT # ----------------------------- def init_db(): conn = sqlite3.connect(DB_NAME) c = conn.cursor() c.execute("""CREATE TABLE IF NOT EXISTS results ( id INTEGER PRIMARY KEY AUTOINCREMENT, student_id TEXT, student_name TEXT, ai_score REAL, plagiarism_score REAL, timestamp TEXT )""") conn.commit() conn.close() init_db() # ----------------------------- # MODEL LOADING # ----------------------------- embedder = SentenceTransformer('all-MiniLM-L6-v2') tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta") model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta") # ----------------------------- # FILE HANDLING # ----------------------------- def extract_text(file_obj): """Extracts text safely from PDF/DOCX/TXT""" if file_obj is None: return None name = file_obj.name ext = os.path.splitext(name)[1].lower() # Copy to temp file preserving extension with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp: shutil.copy(file_obj.name, tmp.name) tmp_path = tmp.name try: if ext == ".pdf": with pdfplumber.open(tmp_path) as pdf: text = " ".join(page.extract_text() or "" for page in pdf.pages) elif ext == ".docx": doc = docx.Document(tmp_path) text = " ".join(p.text for p in doc.paragraphs) elif ext == ".txt": with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f: text = f.read() else: return None except: return None return text.strip() if text else None # ----------------------------- # AI & PLAGIARISM DETECTION # ----------------------------- def detect_ai_text(text): inputs = tokenizer(text[:512], return_tensors="pt", truncation=True) with torch.no_grad(): outputs = model(**inputs) score = torch.softmax(outputs.logits, dim=1)[0][1].item() return score # probability of AI-generated def live_plagiarism_check(sentences): ddgs = DDGS() samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences))) suspicious_sentences = [] plagiarism_hits = 0 for sentence in samples: results = list(ddgs.text(sentence, max_results=2)) if results: plagiarism_hits += 1 suspicious_sentences.append(sentence) score = (plagiarism_hits / len(samples)) * 100 if samples else 0 return score, suspicious_sentences # ----------------------------- # DB OPS # ----------------------------- def save_result(student_id, student_name, ai_score, plagiarism_score): conn = sqlite3.connect(DB_NAME) c = conn.cursor() c.execute("INSERT INTO results (student_id, student_name, ai_score, plagiarism_score, timestamp) VALUES (?,?,?,?,?)", (student_id, student_name, ai_score, plagiarism_score, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) conn.commit() conn.close() def load_results(): conn = sqlite3.connect(DB_NAME) df = pd.read_sql_query("SELECT * FROM results", conn) conn.close() return df # ----------------------------- # PDF REPORT # ----------------------------- class HighlightPDF(FPDF): def add_highlighted_sentence(self, sentence, color): self.set_fill_color(*color) self.multi_cell(0, 10, sentence, fill=True) self.ln(1) def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_path): pdf = HighlightPDF() pdf.add_page() # Logo if os.path.exists(LOGO_PATH): pdf.image(LOGO_PATH, 10, 8, 20, 20) pdf.set_font("Arial", style='B', size=14) pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C') pdf.ln(20) pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, txt=f"Student: {student_name} ({student_id})") pdf.multi_cell(0, 10, txt=f"AI Probability: {ai_score:.2f}%") pdf.multi_cell(0, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%") pdf.multi_cell(0, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") pdf.ln(10) pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:") if suspicious_sentences: for s in suspicious_sentences: pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200)) # Red else: pdf.multi_cell(0, 10, "None detected.") pdf.ln(10) pdf.multi_cell(0, 10, txt="Sample Detected Text (AI/Plagiarized Excerpt):") pdf.add_highlighted_sentence(sample_text, (255, 230, 200)) # Orange pdf.ln(10) pdf.multi_cell(0, 10, txt="Recommendations for Student:") recommendations = """1. Rewrite detected sentences in your own words. 2. Add citations for copied/referenced material. 3. Avoid direct AI-generated content. 4. Proofread and recheck plagiarism before submission.""" pdf.multi_cell(0, 10, recommendations) pdf.output(output_path) # ----------------------------- # APP LOGIC # ----------------------------- def login(user, pwd): if user == USERNAME and pwd == PASSWORD: return gr.update(visible=False), gr.update(visible=True), "" else: return gr.update(), gr.update(), "Invalid username or password!" def analyze(student_name, student_id, file_obj): if file_obj is None or not student_name or not student_id: return "Please fill all fields and upload a document.", None, None, None, None text = extract_text(file_obj) if not text: return "Error: Could not read the file. Please upload a valid PDF, DOCX, or TXT.", None, None, None, None sentences = [s.strip() for s in text.split(". ") if len(s) > 30] # AI Detection ai_score = detect_ai_text(text) * 100 # Live plagiarism plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences) # Pick sample excerpt sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200] # Save to DB save_result(student_id, student_name, ai_score, plagiarism_score) # Generate PDF output_pdf = f"{student_id}_report.pdf" generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_pdf) highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found." return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text def show_dashboard(): df = load_results() return df # ----------------------------- # GRADIO UI (LIGHT THEME) # ----------------------------- with gr.Blocks(theme="default") as demo: with gr.Row(): if os.path.exists(LOGO_PATH): gr.Image(LOGO_PATH, elem_id="logo", show_label=False, scale=0.2) gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title") # Login login_box = gr.Group(visible=True) with login_box: user = gr.Textbox(label="Username") pwd = gr.Textbox(label="Password", type="password") login_btn = gr.Button("Login", variant="primary") login_msg = gr.Markdown("") # Main App app_box = gr.Group(visible=False) with app_box: with gr.Tab("Check Thesis"): with gr.Row(): student_name = gr.Textbox(label="Student Name") student_id = gr.Textbox(label="Student ID") file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"]) analyze_btn = gr.Button("Analyze Document", variant="primary") status = gr.Textbox(label="Status") ai_score = gr.Number(label="AI Probability (%)") plagiarism_score = gr.Number(label="Plagiarism Score (%)") suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10) pdf_output = gr.File(label="Download PDF Report") with gr.Tab("Summary Dashboard"): dashboard_btn = gr.Button("Refresh Dashboard", variant="secondary") dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"]) login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg]) analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text]) dashboard_btn.click(show_dashboard, outputs=[dashboard]) if __name__ == "__main__": demo.launch()