import json import sys import unicodedata from tinytroupe.utils import logger ################################################################################ # Validation ################################################################################ def check_valid_fields(obj: dict, valid_fields: list) -> None: """ Checks whether the fields in the specified dict are valid, according to the list of valid fields. If not, raises a ValueError. """ for key in obj: if key not in valid_fields: raise ValueError(f"Invalid key {key} in dictionary. Valid keys are: {valid_fields}") def sanitize_raw_string(value: str) -> str: """ Sanitizes the specified string by: - removing any invalid characters. - ensuring it is not longer than the maximum Python string length. This is for an abundance of caution with security, to avoid any potential issues with the string. """ # remove any invalid characters by making sure it is a valid UTF-8 string value = value.encode("utf-8", "ignore").decode("utf-8") value = unicodedata.normalize("NFC", value) # ensure it is not longer than the maximum Python string length return value[:sys.maxsize] def sanitize_dict(value: dict) -> dict: """ Sanitizes the specified dictionary by: - removing any invalid characters. - ensuring that the dictionary is not too deeply nested. """ # sanitize the string representation of the dictionary for k, v in value.items(): if isinstance(v, str): value[k] = sanitize_raw_string(v) # ensure that the dictionary is not too deeply nested return value