import gradio as gr
import pdfplumber, docx, sqlite3, os, random, tempfile, shutil
from datetime import datetime
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from duckduckgo_search import DDGS
from fpdf import FPDF

# -----------------------------
# CONFIG
# -----------------------------
DB_NAME = "db.sqlite3"
USERNAME = "aixbi"
PASSWORD = "aixbi@123"
MAX_SENTENCES_CHECK = 10
LOGO_PATH = "aixbi.jpg"  # Place your logo file here

# -----------------------------
# DB INIT
# -----------------------------
def init_db():
    conn = sqlite3.connect(DB_NAME)
    c = conn.cursor()
    c.execute("""CREATE TABLE IF NOT EXISTS results (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    student_id TEXT,
                    student_name TEXT,
                    ai_score REAL,
                    plagiarism_score REAL,
                    timestamp TEXT
                )""")
    conn.commit()
    conn.close()

init_db()

# -----------------------------
# MODEL LOADING
# -----------------------------
embedder = SentenceTransformer('all-MiniLM-L6-v2')
tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta")

# -----------------------------
# FILE HANDLING
# -----------------------------
def extract_text(file_obj):
    """Extracts text safely from PDF/DOCX/TXT"""
    if file_obj is None:
        return None

    name = file_obj.name
    ext = os.path.splitext(name)[1].lower()

    # Copy to temp file preserving extension
    with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
        shutil.copy(file_obj.name, tmp.name)
        tmp_path = tmp.name

    try:
        if ext == ".pdf":
            with pdfplumber.open(tmp_path) as pdf:
                text = " ".join(page.extract_text() or "" for page in pdf.pages)
        elif ext == ".docx":
            doc = docx.Document(tmp_path)
            text = " ".join(p.text for p in doc.paragraphs)
        elif ext == ".txt":
            with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f:
                text = f.read()
        else:
            return None
    except:
        return None

    return text.strip() if text else None

# -----------------------------
# AI & PLAGIARISM DETECTION
# -----------------------------
def detect_ai_text(text):
    inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    score = torch.softmax(outputs.logits, dim=1)[0][1].item()
    return score  # probability of AI-generated

def live_plagiarism_check(sentences):
    ddgs = DDGS()
    samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences)))
    suspicious_sentences = []
    plagiarism_hits = 0

    for sentence in samples:
        results = list(ddgs.text(sentence, max_results=2))
        if results:
            plagiarism_hits += 1
            suspicious_sentences.append(sentence)

    score = (plagiarism_hits / len(samples)) * 100 if samples else 0
    return score, suspicious_sentences

# -----------------------------
# DB OPS
# -----------------------------
def save_result(student_id, student_name, ai_score, plagiarism_score):
    conn = sqlite3.connect(DB_NAME)
    c = conn.cursor()
    c.execute("INSERT INTO results (student_id, student_name, ai_score, plagiarism_score, timestamp) VALUES (?,?,?,?,?)",
              (student_id, student_name, ai_score, plagiarism_score, datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    conn.commit()
    conn.close()

def load_results():
    conn = sqlite3.connect(DB_NAME)
    df = pd.read_sql_query("SELECT * FROM results", conn)
    conn.close()
    return df

# -----------------------------
# PDF REPORT
# -----------------------------
class HighlightPDF(FPDF):
    def add_highlighted_sentence(self, sentence, color):
        self.set_fill_color(*color)
        self.multi_cell(0, 10, sentence, fill=True)
        self.ln(1)

def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_path):
    pdf = HighlightPDF()
    pdf.add_page()

    # Logo
    if os.path.exists(LOGO_PATH):
        pdf.image(LOGO_PATH, 10, 8, 20, 20)

    pdf.set_font("Arial", style='B', size=14)
    pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C')
    pdf.ln(20)

    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, txt=f"Student: {student_name} ({student_id})")
    pdf.multi_cell(0, 10, txt=f"AI Probability: {ai_score:.2f}%")
    pdf.multi_cell(0, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%")
    pdf.multi_cell(0, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    pdf.ln(10)

    pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:")
    if suspicious_sentences:
        for s in suspicious_sentences:
            pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200))  # Red
    else:
        pdf.multi_cell(0, 10, "None detected.")
    pdf.ln(10)

    pdf.multi_cell(0, 10, txt="Sample Detected Text (AI/Plagiarized Excerpt):")
    pdf.add_highlighted_sentence(sample_text, (255, 230, 200))  # Orange
    pdf.ln(10)

    pdf.multi_cell(0, 10, txt="Recommendations for Student:")
    recommendations = """1. Rewrite detected sentences in your own words.
2. Add citations for copied/referenced material.
3. Avoid direct AI-generated content.
4. Proofread and recheck plagiarism before submission."""
    pdf.multi_cell(0, 10, recommendations)

    pdf.output(output_path)

# -----------------------------
# APP LOGIC
# -----------------------------
def login(user, pwd):
    if user == USERNAME and pwd == PASSWORD:
        return gr.update(visible=False), gr.update(visible=True), ""
    else:
        return gr.update(), gr.update(), "Invalid username or password!"

def analyze(student_name, student_id, file_obj):
    if file_obj is None or not student_name or not student_id:
        return "Please fill all fields and upload a document.", None, None, None, None

    text = extract_text(file_obj)
    if not text:
        return "Error: Could not read the file. Please upload a valid PDF, DOCX, or TXT.", None, None, None, None

    sentences = [s.strip() for s in text.split(". ") if len(s) > 30]

    # AI Detection
    ai_score = detect_ai_text(text) * 100

    # Live plagiarism
    plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)

    # Pick sample excerpt
    sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200]

    # Save to DB
    save_result(student_id, student_name, ai_score, plagiarism_score)

    # Generate PDF
    output_pdf = f"{student_id}_report.pdf"
    generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_pdf)

    highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found."
    return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text

def show_dashboard():
    df = load_results()
    return df

# -----------------------------
# GRADIO UI (LIGHT THEME)
# -----------------------------
with gr.Blocks(theme="default") as demo:
    with gr.Row():
        if os.path.exists(LOGO_PATH):
            gr.Image(LOGO_PATH, elem_id="logo", show_label=False, scale=0.2)
        gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title")

    # Login
    login_box = gr.Group(visible=True)
    with login_box:
        user = gr.Textbox(label="Username")
        pwd = gr.Textbox(label="Password", type="password")
        login_btn = gr.Button("Login", variant="primary")
        login_msg = gr.Markdown("")
    
    # Main App
    app_box = gr.Group(visible=False)
    with app_box:
        with gr.Tab("Check Thesis"):
            with gr.Row():
                student_name = gr.Textbox(label="Student Name")
                student_id = gr.Textbox(label="Student ID")
            file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"])
            analyze_btn = gr.Button("Analyze Document", variant="primary")
            status = gr.Textbox(label="Status")
            ai_score = gr.Number(label="AI Probability (%)")
            plagiarism_score = gr.Number(label="Plagiarism Score (%)")
            suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10)
            pdf_output = gr.File(label="Download PDF Report")
            
        with gr.Tab("Summary Dashboard"):
            dashboard_btn = gr.Button("Refresh Dashboard", variant="secondary")
            dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])

    login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
    analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text])
    dashboard_btn.click(show_dashboard, outputs=[dashboard])

if __name__ == "__main__":
    demo.launch()