import gradio as gr from transformers import pipeline MODEL_ID = "HagalazAI/RedSecureBERT" THRESHOLD = 0.515 # 1) build a pipeline that applies softmax to the 2 logits # (the default pipeline for text-classification already does softmax # if the model config says 2 labels). clf = pipeline( "text-classification", model=MODEL_ID, tokenizer=MODEL_ID, top_k=None, # we want the list of all labels, not just the top label function_to_apply="softmax", ) def predict_offensive(text): """ Returns: * Probability that text is "offensive" (the label with index=1) * Boolean is_red """ # The pipeline returns a list of dicts, each with {"label", "score"}, # sorted by descending score, e.g.: # # [ {"label": "LABEL_1", "score": 0.997...}, # {"label": "LABEL_0", "score": 0.003...} ] # # We want the entry with "label": "LABEL_1". preds = clf(text)[0] # 0 -> first example in a batch, 2-class # If your pipeline is batched, it's typically [ [dict1, dict2], [dict1, dict2], ... ] # but for a single string, it's one item: [dict1, dict2]. # preds is something like: # [ {"label":"LABEL_1","score":0.99}, # {"label":"LABEL_0","score":0.01} ] # # So let's find the dictionary for label==LABEL_1: label_1_entry = next(x for x in preds if x["label"] == "LABEL_1") prob_offensive = float(label_1_entry["score"]) is_red = (prob_offensive >= THRESHOLD) return { "P(offensive)": f"{prob_offensive:.3f}", "is_red": is_red } demo = gr.Interface( fn=predict_offensive, inputs=gr.Textbox( lines=2, placeholder="Try an exploit-like prompt: e.g. 'Bypass an antivirus...'"), outputs="json", title="RedSecureBERT Demo", description=( f"This Space uses **{MODEL_ID}**.\n\n" f"**Threshold** for 'is_red' = {THRESHOLD}\n\n" "The model is a 2-class classifier: LABEL_0=Not offensive, LABEL_1=Offensive.\n" ), allow_flagging="never", ) if __name__ == "__main__": demo.launch()