Spaces:
Running
Running
from pyspark.sql.functions import col | |
def apply_masking(df, schema): | |
""" | |
Apply masking UDFs to specified columns based on schema. | |
Schema = { "original_col": "mask_type" } | |
""" | |
from .masking import ( | |
mask_email_udf, mask_name_udf, mask_date_udf, | |
mask_ssn_udf, mask_itin_udf, mask_phone_udf | |
) | |
masking_map = { | |
"email": mask_email_udf, | |
"name": mask_name_udf, | |
"dob": mask_date_udf, | |
"ssn": mask_ssn_udf, | |
"itin": mask_itin_udf, | |
"phone": mask_phone_udf, | |
} | |
for col_name, mask_type in schema.items(): | |
if mask_type in masking_map: | |
df = df.withColumn(f"masked_{col_name}", masking_map[mask_type](col(col_name))) | |
return df | |