import gradio as gr import numpy as np import os import pandas as pd from transformers import AutoModelForCausalLM, AutoTokenizer import torch import xgboost as xgb import pandas as pd import numpy as np import torch import zlib from scipy.stats import skew, kurtosis, entropy from tqdm import tqdm from torch.nn import CrossEntropyLoss from pathlib import Path import spaces import os theme = gr.Theme.from_hub("gstaff/xkcd") class Diversity: def __init__(self, model, tokenizer, device): self.tokenizer = tokenizer self.model = model self.device = device def compute_log_likelihoods(self, text): tokens = self.tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024).to(self.device) with torch.no_grad(): outputs = self.model(tokens, labels=tokens) logits = outputs.logits shift_logits = logits[:, :-1, :].squeeze(0) shift_labels = tokens[:, 1:].squeeze(0) log_probs = torch.log_softmax(shift_logits.float(), dim=-1) token_log_likelihoods = log_probs[range(shift_labels.shape[0]), shift_labels].cpu().numpy() return token_log_likelihoods def compute_surprisal(self, text): log_likelihoods = self.compute_log_likelihoods(text) return -log_likelihoods def compute_features(self, text): surprisals = self.compute_surprisal(text) log_likelihoods = self.compute_log_likelihoods(text) if len(surprisals) < 10 or len(log_likelihoods) < 3: return None s = np.array(surprisals) mean_s, std_s, var_s, skew_s, kurt_s = np.mean(s), np.std(s), np.var(s), skew(s), kurtosis(s) diff_s = np.diff(s) mean_diff, std_diff = np.mean(diff_s), np.std(diff_s) first_order_diff = np.diff(log_likelihoods) second_order_diff = np.diff(first_order_diff) var_2nd = np.var(second_order_diff) entropy_2nd = entropy(np.histogram(second_order_diff, bins=20, density=True)[0]) autocorr_2nd = np.corrcoef(second_order_diff[:-1], second_order_diff[1:])[0, 1] if len(second_order_diff) > 1 else 0 comp_ratio = len(zlib.compress(text.encode('utf-8'))) / len(text.encode('utf-8')) return [mean_s, std_s, var_s, skew_s, kurt_s, mean_diff, std_diff, var_2nd, entropy_2nd, autocorr_2nd, comp_ratio] class BiScope: def __init__(self, model, tokenizer, device): self.COMPLETION_PROMPT_ONLY = "Complete the following text: " self.tokenizer = tokenizer self.model = model self.device = device def compute_fce_loss(self, logits, targets, text_slice): return CrossEntropyLoss(reduction='none')( logits[0, text_slice.start-1:text_slice.stop-1, :], targets ).detach().cpu().numpy() def compute_bce_loss(self, logits, targets, text_slice): return CrossEntropyLoss(reduction='none')( logits[0, text_slice, :], targets ).detach().cpu().numpy() def detect_single_sample(self, sample): prompt_ids = self.tokenizer(self.COMPLETION_PROMPT_ONLY, return_tensors='pt').input_ids.to(self.device) text_ids = self.tokenizer(sample, return_tensors='pt', max_length=2000, truncation=True).input_ids.to(self.device) combined_ids = torch.cat([prompt_ids, text_ids], dim=1) text_slice = slice(prompt_ids.shape[1], combined_ids.shape[1]) outputs = self.model(input_ids=combined_ids) logits = outputs.logits targets = combined_ids[0][text_slice] fce_loss = self.compute_fce_loss(logits, targets, text_slice) bce_loss = self.compute_bce_loss(logits, targets, text_slice) features = [] for p in range(1, 10): split = len(fce_loss) * p // 10 fce_clipped = np.nan_to_num(np.clip(fce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6) bce_clipped = np.nan_to_num(np.clip(bce_loss[split:], -1e6, 1e6), nan=0.0, posinf=1e6, neginf=-1e6) features.extend([ np.mean(fce_clipped), np.max(fce_clipped), np.min(fce_clipped), np.std(fce_clipped), np.mean(bce_clipped), np.max(bce_clipped), np.min(bce_clipped), np.std(bce_clipped) ]) return features # =========================================================== @spaces.GPU def evaluate(diveye, biscope, text): global model diveye_features = diveye.compute_features(text) biscope_features = biscope.detect_single_sample(text) for f in biscope_features: diveye_features.append(f) return model.predict_proba([diveye_features])[:, 1][0].item() def detect_ai_text(text): global loaded, diveye, biscope, model if not loaded: return "β Model not loaded. We require a GPU to run DivEye.", 0.0, pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [0, 0] }) text = text.strip() if not text or len(text.split()) < 15: return ( "β Please enter some text with at least 15 words.", 0.0, pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [0, 0] }) ) # Call software ai_prob = evaluate(diveye, biscope, text) human_prob = 1 - ai_prob if ai_prob > 0.7: message = f"π€ **HIGH CONFIDENCE**: AI-generated ({ai_prob:.1%})" elif ai_prob > 0.5: message = f"β οΈ **MODERATE CONFIDENCE**: Possibly AI-generated ({ai_prob:.1%})" else: message = f"β **LOW CONFIDENCE**: Human-written ({human_prob:.1%})" bar_data = pd.DataFrame({ "Source": ["AI Generated", "Human Written"], "Probability (%)": [ai_prob * 100, human_prob * 100] }) return message, round(ai_prob, 3), bar_data # ========================================================== # Token from environment variable token = os.getenv("HF_TOKEN") loaded = False if not torch.cuda.is_available(): loaded = False print("[DivEye] CUDA not available. Running on CPU.") # Import necessary models and tokenizers if torch.cuda.is_available(): loaded = True model_name_div = "tiiuae/falcon-7b" model_name_bi = "google/gemma-1.1-2b-it" div_model = AutoModelForCausalLM.from_pretrained(model_name_div, torch_dtype=torch.float16, device_map="cuda:0", use_auth_token=token) div_tokenizer = AutoTokenizer.from_pretrained(model_name_div, use_fast=False, trust_remote_code=True, use_auth_token=token) bi_model = AutoModelForCausalLM.from_pretrained(model_name_bi, torch_dtype=torch.float16, device_map="cuda:1", use_auth_token=token) bi_tokenizer = AutoTokenizer.from_pretrained(model_name_bi, use_fast=False, trust_remote_code=True, use_auth_token=token) div_model.eval() bi_model.eval() model_path = Path(__file__).parent / "model.json" model = xgb.XGBClassifier() model.load_model(model_path) diveye = Diversity(div_model, div_tokenizer, div_model.device) biscope = BiScope(bi_model, bi_tokenizer, bi_model.device) # Gradio app setup with gr.Blocks(title="DivEye") as demo: gr.HTML("""
1 Birla Institute of Technology and Science, Goa 2 IBM Research, USA
Abstract: Detecting AI-generated text is an increasing necessity to combat misuse of LLMs in domains such as education, business compliance, journalism, and social media, where synthetic fluency can mask misinformation or deception. Existing detectors often rely on likelihood-based heuristics or black-box classifiers, which struggle against high-quality generations and lack interpretability. In this work, we propose DivEye, a novel detection framework that captures how unpredictability fluctuates across a text using surprisal-based features. Motivated by the observation that human-authored text exhibits richer variability in lexical and structural unpredictability than LLM outputs, DivEye captures this signal through a set of interpretable statistical features. Our method outperforms existing zero-shot detectors by up to 33.2% and achieves competitive performance with fine-tuned baselines across multiple benchmarks. DivEye is robust to paraphrasing and adversarial attacks, generalizes well across domains and models, and improves the performance of existing detectors by up to 18.7% when used as an auxiliary signal. Beyond detection, DivEye provides interpretable insights into why a text is flagged, pointing to rhythmic unpredictability as a powerful and underexplored signal for LLM detection.