File size: 1,710 Bytes
ecec5d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
from transformers import pipeline

MODEL_ID = "HagalazAI/BlueSecureBERT"   # 2-class model (not_defensive vs defensive)
THRESHOLD = 0.579                       # recommended cut-off from your threshold picker


# Build a text-classification pipeline using softmax
clf = pipeline(
    "text-classification",
    model=MODEL_ID,
    tokenizer=MODEL_ID,
    top_k=None,
    function_to_apply="softmax",  # get probabilities that sum to 1
)


def predict_defensive(text: str):
    """
    Returns JSON with:
      - Probability that text is "defensive" (LABEL_1).
      - Boolean is_blue (True if above THRESHOLD).
    """
    # The pipeline returns a list of dicts, each with "label" and "score",
    # sorted by descending score, e.g:
    #   [ {"label": "LABEL_1", "score": 0.97},
    #     {"label": "LABEL_0", "score": 0.03} ]
    #
    # We want the dict whose label == "LABEL_1":
    preds = clf(text)[0]
    label_1_dict = next(x for x in preds if x["label"] == "LABEL_1")
    prob_defensive = float(label_1_dict["score"])
    is_blue = (prob_defensive >= THRESHOLD)

    return {
        "P(defensive)": f"{prob_defensive:.3f}",
        "is_blue": is_blue
    }


demo = gr.Interface(
    fn=predict_defensive,
    inputs=gr.Textbox(
        lines=2, 
        placeholder="Try a blue-team snippet… e.g. 'Enable the MS23 patch.'"
    ),
    outputs="json",
    title="BlueSecureBERT Demo",
    description=(
        f"This Space uses **{MODEL_ID}**.\n\n"
        f"**Threshold** for 'is_blue' = {THRESHOLD}\n\n"
        "The model is a 2-class classifier: LABEL_0=Not defensive, LABEL_1=Defensive.\n"
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    demo.launch()