Spaces:
Sleeping
Sleeping
File size: 6,308 Bytes
f1fd10a a14e700 f1fd10a 869dd64 a14e700 bc9aab1 a14e700 bc9aab1 a14e700 bc9aab1 a14e700 bc9aab1 a14e700 bc9aab1 a14e700 bc9aab1 a14e700 bc9aab1 f1fd10a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import gradio as gr
from transformers import pipeline
# ----------------------------
# Load TEXT detector (upgradeable)
# ----------------------------
TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model" # swap if you try another model
text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID)
def _canonical(label: str) -> str | None:
"""Map raw label names to 'AI' or 'HUMAN' when possible."""
if not label:
return None
l = label.strip().lower()
# Common explicit names
if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]):
return "AI"
if any(k in l for k in ["human", "real", "authentic", "organic"]):
return "HUMAN"
# Try LABEL_X -> use id2label if present
if l.startswith("label_"):
try:
idx = int(l.split("_")[-1])
except ValueError:
return None
id2label = getattr(text_pipe.model.config, "id2label", None)
if isinstance(id2label, dict) and idx in id2label:
return _canonical(str(id2label[idx]))
# Sometimes labels are just "0"/"1"
if l in {"0", "1"}:
id2label = getattr(text_pipe.model.config, "id2label", None)
if isinstance(id2label, dict) and l.isdigit():
mapped = id2label.get(int(l))
if mapped:
return _canonical(str(mapped))
return None
def _aggregate_probs(raw_results):
"""
Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly.
Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map).
"""
# text-classification with top_k=None returns a list of dicts
# e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}]
if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict):
label_scores = {d["label"]: float(d["score"]) for d in raw_results}
elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list):
# return_all_scores=True style: [[{label, score}, {label, score}, ...]]
label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]}
else:
label_scores = {}
ai_p = 0.0
human_p = 0.0
for lbl, sc in label_scores.items():
canon = _canonical(lbl)
if canon == "AI":
ai_p += sc
elif canon == "HUMAN":
human_p += sc
# If nothing mapped, fall back to top label heuristic
if ai_p == 0.0 and human_p == 0.0 and label_scores:
top_lbl = max(label_scores, key=label_scores.get)
top_sc = label_scores[top_lbl]
canon = _canonical(top_lbl)
if canon == "AI":
ai_p = top_sc
human_p = 1.0 - top_sc
elif canon == "HUMAN":
human_p = top_sc
ai_p = 1.0 - top_sc
return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores}
def _verdict(ai_p: float, human_p: float, n_words: int) -> str:
conf = max(ai_p, human_p)
if n_words < 120:
band = "LOW (short text)"
elif conf < 0.60:
band = "LOW (uncertain)"
elif conf < 0.80:
band = "MEDIUM"
else:
band = "HIGH"
if ai_p > human_p:
return f"π€ Likely AI β Confidence: {band}"
elif human_p > ai_p:
return f"π Likely Human β Confidence: {band}"
else:
return "β Uncertain β Confidence: LOW"
def detect_text(input_text: str):
text = (input_text or "").strip()
if not text:
return {}, "β Please enter some text."
try:
# Get ALL label scores so we can map correctly
results = text_pipe(text, top_k=None)
agg = _aggregate_probs(results)
ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"])
# Normalize to show nicely, but keep raw too
probs_out = {
"AI-generated": round(ai_p, 4),
"Human-written": round(human_p, 4),
}
# Optional: include raw labels so you can debug mappings in UI
# probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()})
verdict = _verdict(ai_p, human_p, n_words=len(text.split()))
return probs_out, verdict
except Exception as e:
return {}, f"β Error: {str(e)}"
# ----------------------------
# (Optional) IMAGE detector β won't crash if model unavailable
# ----------------------------
try:
from PIL import Image
image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images")
except Exception:
image_pipe = None
def detect_image(img):
if image_pipe is None:
return {}, "β οΈ Image detector not available on this Space."
try:
results = image_pipe(img)
label_scores = {d["label"]: float(d["score"]) for d in results}
best = max(label_scores, key=label_scores.get)
if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]):
return label_scores, "π€ This image looks AI-generated"
else:
return label_scores, "π· This image looks Human/Real"
except Exception as e:
return {}, f"β Error: {str(e)}"
# ----------------------------
# UI
# ----------------------------
with gr.Blocks() as demo:
gr.Markdown("# π AI Content Detector\nDetect whether **text** (and optionally images) are AI-generated or human-made.")
with gr.Tab("π Text"):
txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text hereβ¦")
out_probs = gr.Label(label="Probabilities")
out_verdict = gr.Textbox(label="Verdict", interactive=False)
btn = gr.Button("Analyze", variant="primary")
btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict])
with gr.Tab("π· Image"):
img_in = gr.Image(type="pil", label="Upload an image")
img_probs = gr.Label(label="Probabilities")
img_verdict = gr.Textbox(label="Verdict", interactive=False)
btn2 = gr.Button("Analyze Image")
btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict])
if __name__ == "__main__":
demo.launch()
|