Spaces:
Running
Running
# tests/test_schema_masking.py | |
import sys | |
import os | |
sys.path.append("/content/anonyspark") | |
from pyspark.sql import SparkSession | |
from anonyspark.utils import apply_masking | |
def test_schema_masking(): | |
spark = SparkSession.builder.master("local[1]").appName("Test").getOrCreate() | |
df = spark.createDataFrame([{ | |
"email": "john@example.com", | |
"name": "John", | |
"dob": "1991-08-14", | |
"ssn": "123-45-6789", | |
"itin": "912-73-1234", | |
"phone": "123-456-7890" | |
}]) | |
schema = { | |
"email": "email", | |
"name": "name", | |
"dob": "dob", | |
"ssn": "ssn", | |
"itin": "itin", | |
"phone": "phone" | |
} | |
masked_df = apply_masking(df, schema) | |
result = masked_df.collect()[0].asDict() | |
assert result["masked_email"] == "***@example.com" | |
assert result["masked_name"] == "J***" | |
assert result["masked_dob"] == "***-**-14" | |
assert result["masked_ssn"] == "***-**-6789" | |
assert result["masked_itin"] == "***-**-1234" | |
assert result["masked_phone"] == "***-***-7890" | |