import gradio as gr import numpy as np import os import pandas as pd from transformers import AutoModelForCausalLM, AutoTokenizer import torch import pandas as pd import numpy as np import torch import zlib from scipy.stats import skew, kurtosis, entropy from tqdm import tqdm from torch.nn import CrossEntropyLoss from pathlib import Path import spaces import os theme = gr.Theme.from_hub("gstaff/xkcd") # =========================================================== @spaces.GPU def detect_ai_text(text): global loaded import xgboost as xgb model_path = Path(__file__).parent / "model.json" model = xgb.XGBClassifier() model.load_model(model_path) if not loaded: return "β Model not loaded. We require a GPU to run DivEye.", 0.0, pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [0, 0] }) text = text.strip() if not text or len(text.split()) < 15: return ( "β Please enter some text with at least 15 words.", 0.0, pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [0, 0] }) ) # Call software # =========================================================== global div_model, div_tokenizer, bi_model, bi_tokenizer # ===================================================================== # DivEye features diveye_features = [] # 1. Token log likelihoods tokens = div_tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024).to(div_model.device) with torch.no_grad(): outputs = div_model(tokens, labels=tokens) logits = outputs.logits shift_logits = logits[:, :-1, :].squeeze(0) shift_labels = tokens[:, 1:].squeeze(0) log_probs = torch.log_softmax(shift_logits.float(), dim=-1) token_log_likelihoods = log_probs[range(shift_labels.shape[0]), shift_labels].cpu().numpy() # 2. Surprisal surprisals = -token_log_likelihoods if len(surprisals) < 10 or len(token_log_likelihoods) < 3: diveye_features = [0.0] * 11 s = np.array(surprisals) mean_s, std_s, var_s, skew_s, kurt_s = np.mean(s), np.std(s), np.var(s), skew(s), kurtosis(s) diff_s = np.diff(s) mean_diff, std_diff = np.mean(diff_s), np.std(diff_s) first_order_diff = np.diff(token_log_likelihoods) second_order_diff = np.diff(first_order_diff) var_2nd = np.var(second_order_diff) entropy_2nd = entropy(np.histogram(second_order_diff, bins=20, density=True)[0]) autocorr_2nd = np.corrcoef(second_order_diff[:-1], second_order_diff[1:])[0, 1] if len(second_order_diff) > 1 else 0 comp_ratio = len(zlib.compress(text.encode('utf-8'))) / len(text.encode('utf-8')) diveye_features = [mean_s, std_s, var_s, skew_s, kurt_s, mean_diff, std_diff, var_2nd, entropy_2nd, autocorr_2nd, comp_ratio] # ===================================================================== # ===================================================================== # BiScope features COMPLETION_PROMPT_ONLY = "Complete the following text: " prompt_ids = bi_tokenizer(COMPLETION_PROMPT_ONLY, return_tensors='pt').input_ids.to(bi_model.device) text_ids = bi_tokenizer(text, return_tensors='pt', max_length=2000, truncation=True).input_ids.to(bi_model.device) combined_ids = torch.cat([prompt_ids, text_ids], dim=1) text_slice = slice(prompt_ids.shape[1], combined_ids.shape[1]) outputs = bi_model(input_ids=combined_ids) logits = outputs.logits targets = combined_ids[0][text_slice] fce_loss = CrossEntropyLoss(reduction='none')( logits[0, text_slice.start-1:text_slice.stop-1, :], targets ).detach().cpu().numpy() bce_loss = CrossEntropyLoss(reduction='none')( logits[0, text_slice, :], targets ).detach().cpu().numpy() biscope_features = [] for p in range(1, 10): split = len(fce_loss) * p // 10 fce_clipped = np.nan_to_num(np.clip(fce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6) bce_clipped = np.nan_to_num(np.clip(bce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6) biscope_features.extend([ np.mean(fce_clipped), np.max(fce_clipped), np.min(fce_clipped), np.std(fce_clipped), np.mean(bce_clipped), np.max(bce_clipped), np.min(bce_clipped), np.std(bce_clipped) ]) # ===================================================================== for f in biscope_features: diveye_features.append(f) ai_prob = model.predict_proba([diveye_features])[:, 1][0].item() # =========================================================== human_prob = 1 - ai_prob if ai_prob > 0.7: message = f"π€ **Likely AI-generated** (Confidence: {ai_prob:.2%})" elif ai_prob > 0.5: message = f"β οΈ **Possibly AI-generated** (Confidence: {ai_prob:.2%})" else: message = f"β **Likely Human-written** (Confidence: {human_prob:.2%})" bar_data = pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [ai_prob * 100, human_prob * 100] }) return message, round(ai_prob, 3), bar_data # ========================================================== # Token from environment variable token = os.getenv("HF_TOKEN") loaded = False if not torch.cuda.is_available(): loaded = False print("[DivEye] CUDA not available. Running on CPU.") # Import necessary models and tokenizers if torch.cuda.is_available(): loaded = True model_name_div = "tiiuae/falcon-7b" model_name_bi = "google/gemma-1.1-2b-it" div_model = AutoModelForCausalLM.from_pretrained(model_name_div, torch_dtype=torch.float16, device_map="cuda:0", use_auth_token=token) div_tokenizer = AutoTokenizer.from_pretrained(model_name_div, use_fast=False, trust_remote_code=True, use_auth_token=token) bi_model = AutoModelForCausalLM.from_pretrained(model_name_bi, torch_dtype=torch.float16, device_map="cuda:1", use_auth_token=token) bi_tokenizer = AutoTokenizer.from_pretrained(model_name_bi, use_fast=False, trust_remote_code=True, use_auth_token=token) div_model.eval() bi_model.eval() # Gradio app setup with gr.Blocks(title="DivEye", theme=gr.themes.Soft()) as demo: gr.HTML("""
1 Birla Institute of Technology and Science, Goa 2 IBM Research, USA
Abstract: Detecting AI-generated text is an increasing necessity to combat misuse of LLMs in domains such as education, business compliance, journalism, and social media, where synthetic fluency can mask misinformation or deception. Existing detectors often rely on likelihood-based heuristics or black-box classifiers, which struggle against high-quality generations and lack interpretability. In this work, we propose DivEye, a novel detection framework that captures how unpredictability fluctuates across a text using surprisal-based features. Motivated by the observation that human-authored text exhibits richer variability in lexical and structural unpredictability than LLM outputs, DivEye captures this signal through a set of interpretable statistical features. Our method outperforms existing zero-shot detectors by up to 33.2% and achieves competitive performance with fine-tuned baselines across multiple benchmarks. DivEye is robust to paraphrasing and adversarial attacks, generalizes well across domains and models, and improves the performance of existing detectors by up to 18.7% when used as an auxiliary signal. Beyond detection, DivEye provides interpretable insights into why a text is flagged, pointing to rhythmic unpredictability as a powerful and underexplored signal for LLM detection.