Spaces:

HagalazAI
/

redsecurebert-demo

Sleeping

File size: 2,082 Bytes

87d2fc1
15a78c6
87d2fc1
15a78c6
 
b7ba252
15a78c6
 
 
87d2fc1
 
15a78c6
 
 
 
87d2fc1
 
15a78c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87d2fc1
15a78c6
 
87d2fc1
 
15a78c6
b7ba252
15a78c6
 
 
 
b7ba252
15a78c6
 
 
 
 
 
 
b7ba252
87d2fc1
15a78c6

import gradio as gr
from transformers import pipeline

MODEL_ID = "HagalazAI/RedSecureBERT"
THRESHOLD = 0.515

# 1) build a pipeline that applies softmax to the 2 logits
#    (the default pipeline for text-classification already does softmax
#     if the model config says 2 labels).
clf = pipeline(
    "text-classification",
    model=MODEL_ID,
    tokenizer=MODEL_ID,
    top_k=None,  # we want the list of all labels, not just the top label
    function_to_apply="softmax",
)


def predict_offensive(text):
    """
    Returns:
      * Probability that text is "offensive" (the label with index=1)
      * Boolean is_red
    """
    # The pipeline returns a list of dicts, each with {"label", "score"},
    # sorted by descending score, e.g.:
    #
    #   [ {"label": "LABEL_1", "score": 0.997...},
    #     {"label": "LABEL_0", "score": 0.003...} ]
    #
    # We want the entry with "label": "LABEL_1".
    preds = clf(text)[0]  # 0 -> first example in a batch, 2-class
    # If your pipeline is batched, it's typically [ [dict1, dict2], [dict1, dict2], ... ]
    # but for a single string, it's one item: [dict1, dict2].

    # preds is something like:
    # [ {"label":"LABEL_1","score":0.99},
    #   {"label":"LABEL_0","score":0.01} ]
    #
    # So let's find the dictionary for label==LABEL_1:
    label_1_entry = next(x for x in preds if x["label"] == "LABEL_1")
    prob_offensive = float(label_1_entry["score"])
    is_red = (prob_offensive >= THRESHOLD)

    return {
        "P(offensive)": f"{prob_offensive:.3f}",
        "is_red": is_red
    }


demo = gr.Interface(
    fn=predict_offensive,
    inputs=gr.Textbox(
        lines=2,
        placeholder="Try an exploit-like prompt: e.g. 'Bypass an antivirus...'"),
    outputs="json",
    title="RedSecureBERT Demo",
    description=(
        f"This Space uses **{MODEL_ID}**.\n\n"
        f"**Threshold** for 'is_red' = {THRESHOLD}\n\n"
        "The model is a 2-class classifier: LABEL_0=Not offensive, LABEL_1=Offensive.\n"
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    demo.launch()