File size: 917 Bytes
000642c
 
 
8286de5
000642c
 
 
 
 
 
 
 
 
 
 
8286de5
000642c
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
from datasets import load_dataset, Dataset
import os
from piiranha import mask_pii

DATASET_NAME = os.getenv("DATASET_NAME")

dataset = load_dataset(DATASET_NAME, split="train")
df = pd.DataFrame(dataset)

def submit_entry(sender, message):
    """Adds a new SMS phishing report if it's not already in the dataset."""
    global df

    sender = sender.strip().replace(" ", "")  # Remove all spaces inside sender
    message = mask_pii(message).strip()

    # Check for duplicates
    if ((df["sender"] == sender) & (df["message"] == message)).any():
        "⚠️ This entry already exists in the dataset!"

    # Append new entry
    new_entry = pd.DataFrame([[sender, message]], columns=["sender", "message"])
    df = pd.concat([df, new_entry], ignore_index=True)

    new_dataset = Dataset.from_pandas(df)
    new_dataset.push_to_hub(DATASET_NAME)

    "✅ Submission saved successfully!"