|
import gradio as gr |
|
import pdfplumber, docx, sqlite3, os, random, tempfile, shutil |
|
from datetime import datetime |
|
import pandas as pd |
|
from sentence_transformers import SentenceTransformer |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
from duckduckgo_search import DDGS |
|
from fpdf import FPDF |
|
|
|
|
|
|
|
|
|
DB_NAME = "db.sqlite3" |
|
USERNAME = "aixbi" |
|
PASSWORD = "aixbi@123" |
|
MAX_SENTENCES_CHECK = 10 |
|
LOGO_PATH = "aixbi.jpg" |
|
|
|
|
|
|
|
|
|
def init_db(): |
|
conn = sqlite3.connect(DB_NAME) |
|
c = conn.cursor() |
|
c.execute("""CREATE TABLE IF NOT EXISTS results ( |
|
id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
student_id TEXT, |
|
student_name TEXT, |
|
ai_score REAL, |
|
plagiarism_score REAL, |
|
timestamp TEXT |
|
)""") |
|
conn.commit() |
|
conn.close() |
|
|
|
init_db() |
|
|
|
|
|
|
|
|
|
embedder = SentenceTransformer('all-MiniLM-L6-v2') |
|
tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta") |
|
model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta") |
|
|
|
|
|
|
|
|
|
def extract_text(file_obj): |
|
"""Extracts text safely from PDF/DOCX/TXT""" |
|
if file_obj is None: |
|
return None |
|
|
|
name = file_obj.name |
|
ext = os.path.splitext(name)[1].lower() |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp: |
|
shutil.copy(file_obj.name, tmp.name) |
|
tmp_path = tmp.name |
|
|
|
try: |
|
if ext == ".pdf": |
|
with pdfplumber.open(tmp_path) as pdf: |
|
text = " ".join(page.extract_text() or "" for page in pdf.pages) |
|
elif ext == ".docx": |
|
doc = docx.Document(tmp_path) |
|
text = " ".join(p.text for p in doc.paragraphs) |
|
elif ext == ".txt": |
|
with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f: |
|
text = f.read() |
|
else: |
|
return None |
|
except: |
|
return None |
|
|
|
return text.strip() if text else None |
|
|
|
|
|
|
|
|
|
def detect_ai_text(text): |
|
inputs = tokenizer(text[:512], return_tensors="pt", truncation=True) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
score = torch.softmax(outputs.logits, dim=1)[0][1].item() |
|
return score |
|
|
|
def live_plagiarism_check(sentences): |
|
ddgs = DDGS() |
|
samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences))) |
|
suspicious_sentences = [] |
|
plagiarism_hits = 0 |
|
|
|
for sentence in samples: |
|
results = list(ddgs.text(sentence, max_results=2)) |
|
if results: |
|
plagiarism_hits += 1 |
|
suspicious_sentences.append(sentence) |
|
|
|
score = (plagiarism_hits / len(samples)) * 100 if samples else 0 |
|
return score, suspicious_sentences |
|
|
|
|
|
|
|
|
|
def save_result(student_id, student_name, ai_score, plagiarism_score): |
|
conn = sqlite3.connect(DB_NAME) |
|
c = conn.cursor() |
|
c.execute("INSERT INTO results (student_id, student_name, ai_score, plagiarism_score, timestamp) VALUES (?,?,?,?,?)", |
|
(student_id, student_name, ai_score, plagiarism_score, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) |
|
conn.commit() |
|
conn.close() |
|
|
|
def load_results(): |
|
conn = sqlite3.connect(DB_NAME) |
|
df = pd.read_sql_query("SELECT * FROM results", conn) |
|
conn.close() |
|
return df |
|
|
|
|
|
|
|
|
|
class HighlightPDF(FPDF): |
|
def add_highlighted_sentence(self, sentence, color): |
|
self.set_fill_color(*color) |
|
self.multi_cell(0, 10, sentence, fill=True) |
|
self.ln(1) |
|
|
|
def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_path): |
|
pdf = HighlightPDF() |
|
pdf.add_page() |
|
|
|
|
|
if os.path.exists(LOGO_PATH): |
|
pdf.image(LOGO_PATH, 10, 8, 20, 20) |
|
|
|
pdf.set_font("Arial", style='B', size=14) |
|
pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C') |
|
pdf.ln(20) |
|
|
|
pdf.set_font("Arial", size=12) |
|
pdf.multi_cell(0, 10, txt=f"Student: {student_name} ({student_id})") |
|
pdf.multi_cell(0, 10, txt=f"AI Probability: {ai_score:.2f}%") |
|
pdf.multi_cell(0, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%") |
|
pdf.multi_cell(0, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
pdf.ln(10) |
|
|
|
pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:") |
|
if suspicious_sentences: |
|
for s in suspicious_sentences: |
|
pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200)) |
|
else: |
|
pdf.multi_cell(0, 10, "None detected.") |
|
pdf.ln(10) |
|
|
|
pdf.multi_cell(0, 10, txt="Sample Detected Text (AI/Plagiarized Excerpt):") |
|
pdf.add_highlighted_sentence(sample_text, (255, 230, 200)) |
|
pdf.ln(10) |
|
|
|
pdf.multi_cell(0, 10, txt="Recommendations for Student:") |
|
recommendations = """1. Rewrite detected sentences in your own words. |
|
2. Add citations for copied/referenced material. |
|
3. Avoid direct AI-generated content. |
|
4. Proofread and recheck plagiarism before submission.""" |
|
pdf.multi_cell(0, 10, recommendations) |
|
|
|
pdf.output(output_path) |
|
|
|
|
|
|
|
|
|
def login(user, pwd): |
|
if user == USERNAME and pwd == PASSWORD: |
|
return gr.update(visible=False), gr.update(visible=True), "" |
|
else: |
|
return gr.update(), gr.update(), "Invalid username or password!" |
|
|
|
def analyze(student_name, student_id, file_obj): |
|
if file_obj is None or not student_name or not student_id: |
|
return "Please fill all fields and upload a document.", None, None, None, None |
|
|
|
text = extract_text(file_obj) |
|
if not text: |
|
return "Error: Could not read the file. Please upload a valid PDF, DOCX, or TXT.", None, None, None, None |
|
|
|
sentences = [s.strip() for s in text.split(". ") if len(s) > 30] |
|
|
|
|
|
ai_score = detect_ai_text(text) * 100 |
|
|
|
|
|
plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences) |
|
|
|
|
|
sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200] |
|
|
|
|
|
save_result(student_id, student_name, ai_score, plagiarism_score) |
|
|
|
|
|
output_pdf = f"{student_id}_report.pdf" |
|
generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_pdf) |
|
|
|
highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found." |
|
return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text |
|
|
|
def show_dashboard(): |
|
df = load_results() |
|
return df |
|
|
|
|
|
|
|
|
|
with gr.Blocks(theme="default") as demo: |
|
with gr.Row(): |
|
if os.path.exists(LOGO_PATH): |
|
gr.Image(LOGO_PATH, elem_id="logo", show_label=False, scale=0.2) |
|
gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title") |
|
|
|
|
|
login_box = gr.Group(visible=True) |
|
with login_box: |
|
user = gr.Textbox(label="Username") |
|
pwd = gr.Textbox(label="Password", type="password") |
|
login_btn = gr.Button("Login", variant="primary") |
|
login_msg = gr.Markdown("") |
|
|
|
|
|
app_box = gr.Group(visible=False) |
|
with app_box: |
|
with gr.Tab("Check Thesis"): |
|
with gr.Row(): |
|
student_name = gr.Textbox(label="Student Name") |
|
student_id = gr.Textbox(label="Student ID") |
|
file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"]) |
|
analyze_btn = gr.Button("Analyze Document", variant="primary") |
|
status = gr.Textbox(label="Status") |
|
ai_score = gr.Number(label="AI Probability (%)") |
|
plagiarism_score = gr.Number(label="Plagiarism Score (%)") |
|
suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10) |
|
pdf_output = gr.File(label="Download PDF Report") |
|
|
|
with gr.Tab("Summary Dashboard"): |
|
dashboard_btn = gr.Button("Refresh Dashboard", variant="secondary") |
|
dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"]) |
|
|
|
login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg]) |
|
analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text]) |
|
dashboard_btn.click(show_dashboard, outputs=[dashboard]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|