File size: 6,308 Bytes
f1fd10a
 
 
a14e700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1fd10a
869dd64
a14e700
 
bc9aab1
 
 
a14e700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc9aab1
 
 
a14e700
 
 
 
 
 
 
 
bc9aab1
a14e700
 
 
 
 
 
 
 
 
 
 
 
 
bc9aab1
a14e700
 
 
 
 
bc9aab1
a14e700
 
 
 
 
 
bc9aab1
a14e700
 
 
 
 
 
bc9aab1
f1fd10a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import gradio as gr
from transformers import pipeline

# ----------------------------
# Load TEXT detector (upgradeable)
# ----------------------------
TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model"  # swap if you try another model

text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID)

def _canonical(label: str) -> str | None:
    """Map raw label names to 'AI' or 'HUMAN' when possible."""
    if not label:
        return None
    l = label.strip().lower()
    # Common explicit names
    if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]):
        return "AI"
    if any(k in l for k in ["human", "real", "authentic", "organic"]):
        return "HUMAN"
    # Try LABEL_X -> use id2label if present
    if l.startswith("label_"):
        try:
            idx = int(l.split("_")[-1])
        except ValueError:
            return None
        id2label = getattr(text_pipe.model.config, "id2label", None)
        if isinstance(id2label, dict) and idx in id2label:
            return _canonical(str(id2label[idx]))
    # Sometimes labels are just "0"/"1"
    if l in {"0", "1"}:
        id2label = getattr(text_pipe.model.config, "id2label", None)
        if isinstance(id2label, dict) and l.isdigit():
            mapped = id2label.get(int(l))
            if mapped:
                return _canonical(str(mapped))
    return None

def _aggregate_probs(raw_results):
    """

    Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly.

    Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map).

    """
    # text-classification with top_k=None returns a list of dicts
    # e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}]
    if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict):
        label_scores = {d["label"]: float(d["score"]) for d in raw_results}
    elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list):
        # return_all_scores=True style: [[{label, score}, {label, score}, ...]]
        label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]}
    else:
        label_scores = {}

    ai_p = 0.0
    human_p = 0.0
    for lbl, sc in label_scores.items():
        canon = _canonical(lbl)
        if canon == "AI":
            ai_p += sc
        elif canon == "HUMAN":
            human_p += sc

    # If nothing mapped, fall back to top label heuristic
    if ai_p == 0.0 and human_p == 0.0 and label_scores:
        top_lbl = max(label_scores, key=label_scores.get)
        top_sc = label_scores[top_lbl]
        canon = _canonical(top_lbl)
        if canon == "AI":
            ai_p = top_sc
            human_p = 1.0 - top_sc
        elif canon == "HUMAN":
            human_p = top_sc
            ai_p = 1.0 - top_sc

    return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores}

def _verdict(ai_p: float, human_p: float, n_words: int) -> str:
    conf = max(ai_p, human_p)
    if n_words < 120:
        band = "LOW (short text)"
    elif conf < 0.60:
        band = "LOW (uncertain)"
    elif conf < 0.80:
        band = "MEDIUM"
    else:
        band = "HIGH"

    if ai_p > human_p:
        return f"πŸ€– Likely AI β€” Confidence: {band}"
    elif human_p > ai_p:
        return f"πŸ“ Likely Human β€” Confidence: {band}"
    else:
        return "❓ Uncertain β€” Confidence: LOW"

def detect_text(input_text: str):
    text = (input_text or "").strip()
    if not text:
        return {}, "❌ Please enter some text."

    try:
        # Get ALL label scores so we can map correctly
        results = text_pipe(text, top_k=None)
        agg = _aggregate_probs(results)
        ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"])

        # Normalize to show nicely, but keep raw too
        probs_out = {
            "AI-generated": round(ai_p, 4),
            "Human-written": round(human_p, 4),
        }
        # Optional: include raw labels so you can debug mappings in UI
        # probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()})

        verdict = _verdict(ai_p, human_p, n_words=len(text.split()))
        return probs_out, verdict

    except Exception as e:
        return {}, f"❌ Error: {str(e)}"

# ----------------------------
# (Optional) IMAGE detector β€” won't crash if model unavailable
# ----------------------------
try:
    from PIL import Image
    image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images")
except Exception:
    image_pipe = None

def detect_image(img):
    if image_pipe is None:
        return {}, "⚠️ Image detector not available on this Space."
    try:
        results = image_pipe(img)
        label_scores = {d["label"]: float(d["score"]) for d in results}
        best = max(label_scores, key=label_scores.get)
        if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]):
            return label_scores, "πŸ€– This image looks AI-generated"
        else:
            return label_scores, "πŸ“· This image looks Human/Real"
    except Exception as e:
        return {}, f"❌ Error: {str(e)}"

# ----------------------------
# UI
# ----------------------------
with gr.Blocks() as demo:
    gr.Markdown("# πŸ” AI Content Detector\nDetect whether **text** (and optionally images) are AI-generated or human-made.")

    with gr.Tab("πŸ“ Text"):
        txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text here…")
        out_probs = gr.Label(label="Probabilities")
        out_verdict = gr.Textbox(label="Verdict", interactive=False)
        btn = gr.Button("Analyze", variant="primary")
        btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict])

    with gr.Tab("πŸ“· Image"):
        img_in = gr.Image(type="pil", label="Upload an image")
        img_probs = gr.Label(label="Probabilities")
        img_verdict = gr.Textbox(label="Verdict", interactive=False)
        btn2 = gr.Button("Analyze Image")
        btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict])

if __name__ == "__main__":
    demo.launch()