anonyspark / test_schema_masking.py
GenAIDevTOProd's picture
Upload folder using huggingface_hub
e0c264d verified
# tests/test_schema_masking.py
import sys
import os
sys.path.append("/content/anonyspark")
from pyspark.sql import SparkSession
from anonyspark.utils import apply_masking
def test_schema_masking():
spark = SparkSession.builder.master("local[1]").appName("Test").getOrCreate()
df = spark.createDataFrame([{
"email": "john@example.com",
"name": "John",
"dob": "1991-08-14",
"ssn": "123-45-6789",
"itin": "912-73-1234",
"phone": "123-456-7890"
}])
schema = {
"email": "email",
"name": "name",
"dob": "dob",
"ssn": "ssn",
"itin": "itin",
"phone": "phone"
}
masked_df = apply_masking(df, schema)
result = masked_df.collect()[0].asDict()
assert result["masked_email"] == "***@example.com"
assert result["masked_name"] == "J***"
assert result["masked_dob"] == "***-**-14"
assert result["masked_ssn"] == "***-**-6789"
assert result["masked_itin"] == "***-**-1234"
assert result["masked_phone"] == "***-***-7890"