Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
# ---------------------------- | |
# Load TEXT detector (upgradeable) | |
# ---------------------------- | |
TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model" # swap if you try another model | |
text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID) | |
def _canonical(label: str) -> str | None: | |
"""Map raw label names to 'AI' or 'HUMAN' when possible.""" | |
if not label: | |
return None | |
l = label.strip().lower() | |
# Common explicit names | |
if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]): | |
return "AI" | |
if any(k in l for k in ["human", "real", "authentic", "organic"]): | |
return "HUMAN" | |
# Try LABEL_X -> use id2label if present | |
if l.startswith("label_"): | |
try: | |
idx = int(l.split("_")[-1]) | |
except ValueError: | |
return None | |
id2label = getattr(text_pipe.model.config, "id2label", None) | |
if isinstance(id2label, dict) and idx in id2label: | |
return _canonical(str(id2label[idx])) | |
# Sometimes labels are just "0"/"1" | |
if l in {"0", "1"}: | |
id2label = getattr(text_pipe.model.config, "id2label", None) | |
if isinstance(id2label, dict) and l.isdigit(): | |
mapped = id2label.get(int(l)) | |
if mapped: | |
return _canonical(str(mapped)) | |
return None | |
def _aggregate_probs(raw_results): | |
""" | |
Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly. | |
Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map). | |
""" | |
# text-classification with top_k=None returns a list of dicts | |
# e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}] | |
if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict): | |
label_scores = {d["label"]: float(d["score"]) for d in raw_results} | |
elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list): | |
# return_all_scores=True style: [[{label, score}, {label, score}, ...]] | |
label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]} | |
else: | |
label_scores = {} | |
ai_p = 0.0 | |
human_p = 0.0 | |
for lbl, sc in label_scores.items(): | |
canon = _canonical(lbl) | |
if canon == "AI": | |
ai_p += sc | |
elif canon == "HUMAN": | |
human_p += sc | |
# If nothing mapped, fall back to top label heuristic | |
if ai_p == 0.0 and human_p == 0.0 and label_scores: | |
top_lbl = max(label_scores, key=label_scores.get) | |
top_sc = label_scores[top_lbl] | |
canon = _canonical(top_lbl) | |
if canon == "AI": | |
ai_p = top_sc | |
human_p = 1.0 - top_sc | |
elif canon == "HUMAN": | |
human_p = top_sc | |
ai_p = 1.0 - top_sc | |
return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores} | |
def _verdict(ai_p: float, human_p: float, n_words: int) -> str: | |
conf = max(ai_p, human_p) | |
if n_words < 120: | |
band = "LOW (short text)" | |
elif conf < 0.60: | |
band = "LOW (uncertain)" | |
elif conf < 0.80: | |
band = "MEDIUM" | |
else: | |
band = "HIGH" | |
if ai_p > human_p: | |
return f"π€ Likely AI β Confidence: {band}" | |
elif human_p > ai_p: | |
return f"π Likely Human β Confidence: {band}" | |
else: | |
return "β Uncertain β Confidence: LOW" | |
def detect_text(input_text: str): | |
text = (input_text or "").strip() | |
if not text: | |
return {}, "β Please enter some text." | |
try: | |
# Get ALL label scores so we can map correctly | |
results = text_pipe(text, top_k=None) | |
agg = _aggregate_probs(results) | |
ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"]) | |
# Normalize to show nicely, but keep raw too | |
probs_out = { | |
"AI-generated": round(ai_p, 4), | |
"Human-written": round(human_p, 4), | |
} | |
# Optional: include raw labels so you can debug mappings in UI | |
# probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()}) | |
verdict = _verdict(ai_p, human_p, n_words=len(text.split())) | |
return probs_out, verdict | |
except Exception as e: | |
return {}, f"β Error: {str(e)}" | |
# ---------------------------- | |
# (Optional) IMAGE detector β won't crash if model unavailable | |
# ---------------------------- | |
try: | |
from PIL import Image | |
image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images") | |
except Exception: | |
image_pipe = None | |
def detect_image(img): | |
if image_pipe is None: | |
return {}, "β οΈ Image detector not available on this Space." | |
try: | |
results = image_pipe(img) | |
label_scores = {d["label"]: float(d["score"]) for d in results} | |
best = max(label_scores, key=label_scores.get) | |
if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]): | |
return label_scores, "π€ This image looks AI-generated" | |
else: | |
return label_scores, "π· This image looks Human/Real" | |
except Exception as e: | |
return {}, f"β Error: {str(e)}" | |
# ---------------------------- | |
# UI | |
# ---------------------------- | |
with gr.Blocks() as demo: | |
gr.Markdown("# π AI Content Detector\nDetect whether **text** (and optionally images) are AI-generated or human-made.") | |
with gr.Tab("π Text"): | |
txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text hereβ¦") | |
out_probs = gr.Label(label="Probabilities") | |
out_verdict = gr.Textbox(label="Verdict", interactive=False) | |
btn = gr.Button("Analyze", variant="primary") | |
btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict]) | |
with gr.Tab("π· Image"): | |
img_in = gr.Image(type="pil", label="Upload an image") | |
img_probs = gr.Label(label="Probabilities") | |
img_verdict = gr.Textbox(label="Verdict", interactive=False) | |
btn2 = gr.Button("Analyze Image") | |
btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict]) | |
if __name__ == "__main__": | |
demo.launch() | |