import gradio as gr from transformers import pipeline # ---------------------------- # Load TEXT detector (upgradeable) # ---------------------------- TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model" # swap if you try another model text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID) def _canonical(label: str) -> str | None: """Map raw label names to 'AI' or 'HUMAN' when possible.""" if not label: return None l = label.strip().lower() # Common explicit names if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]): return "AI" if any(k in l for k in ["human", "real", "authentic", "organic"]): return "HUMAN" # Try LABEL_X -> use id2label if present if l.startswith("label_"): try: idx = int(l.split("_")[-1]) except ValueError: return None id2label = getattr(text_pipe.model.config, "id2label", None) if isinstance(id2label, dict) and idx in id2label: return _canonical(str(id2label[idx])) # Sometimes labels are just "0"/"1" if l in {"0", "1"}: id2label = getattr(text_pipe.model.config, "id2label", None) if isinstance(id2label, dict) and l.isdigit(): mapped = id2label.get(int(l)) if mapped: return _canonical(str(mapped)) return None def _aggregate_probs(raw_results): """ Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly. Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map). """ # text-classification with top_k=None returns a list of dicts # e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}] if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict): label_scores = {d["label"]: float(d["score"]) for d in raw_results} elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list): # return_all_scores=True style: [[{label, score}, {label, score}, ...]] label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]} else: label_scores = {} ai_p = 0.0 human_p = 0.0 for lbl, sc in label_scores.items(): canon = _canonical(lbl) if canon == "AI": ai_p += sc elif canon == "HUMAN": human_p += sc # If nothing mapped, fall back to top label heuristic if ai_p == 0.0 and human_p == 0.0 and label_scores: top_lbl = max(label_scores, key=label_scores.get) top_sc = label_scores[top_lbl] canon = _canonical(top_lbl) if canon == "AI": ai_p = top_sc human_p = 1.0 - top_sc elif canon == "HUMAN": human_p = top_sc ai_p = 1.0 - top_sc return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores} def _verdict(ai_p: float, human_p: float, n_words: int) -> str: conf = max(ai_p, human_p) if n_words < 120: band = "LOW (short text)" elif conf < 0.60: band = "LOW (uncertain)" elif conf < 0.80: band = "MEDIUM" else: band = "HIGH" if ai_p > human_p: return f"πŸ€– Likely AI β€” Confidence: {band}" elif human_p > ai_p: return f"πŸ“ Likely Human β€” Confidence: {band}" else: return "❓ Uncertain β€” Confidence: LOW" def detect_text(input_text: str): text = (input_text or "").strip() if not text: return {}, "❌ Please enter some text." try: # Get ALL label scores so we can map correctly results = text_pipe(text, top_k=None) agg = _aggregate_probs(results) ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"]) # Normalize to show nicely, but keep raw too probs_out = { "AI-generated": round(ai_p, 4), "Human-written": round(human_p, 4), } # Optional: include raw labels so you can debug mappings in UI # probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()}) verdict = _verdict(ai_p, human_p, n_words=len(text.split())) return probs_out, verdict except Exception as e: return {}, f"❌ Error: {str(e)}" # ---------------------------- # (Optional) IMAGE detector β€” won't crash if model unavailable # ---------------------------- try: from PIL import Image image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images") except Exception: image_pipe = None def detect_image(img): if image_pipe is None: return {}, "⚠️ Image detector not available on this Space." try: results = image_pipe(img) label_scores = {d["label"]: float(d["score"]) for d in results} best = max(label_scores, key=label_scores.get) if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]): return label_scores, "πŸ€– This image looks AI-generated" else: return label_scores, "πŸ“· This image looks Human/Real" except Exception as e: return {}, f"❌ Error: {str(e)}" # ---------------------------- # UI # ---------------------------- with gr.Blocks() as demo: gr.Markdown("# πŸ” AI Content Detector\nDetect whether **text** (and optionally images) are AI-generated or human-made.") with gr.Tab("πŸ“ Text"): txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text here…") out_probs = gr.Label(label="Probabilities") out_verdict = gr.Textbox(label="Verdict", interactive=False) btn = gr.Button("Analyze", variant="primary") btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict]) with gr.Tab("πŸ“· Image"): img_in = gr.Image(type="pil", label="Upload an image") img_probs = gr.Label(label="Probabilities") img_verdict = gr.Textbox(label="Verdict", interactive=False) btn2 = gr.Button("Analyze Image") btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict]) if __name__ == "__main__": demo.launch()