File size: 2,382 Bytes
d6cfb5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# %%writefile string_utils.py
import base64
import random
import re
import string
from urllib.parse import urlparse
class StringUtils:
@staticmethod
def generate_random_string(length: int = 32) -> str:
characters = string.ascii_letters + string.digits
random_string = "".join(random.choice(characters) for _ in range(length))
return random_string
@staticmethod
def clean_string(input_string: str) -> str:
# Remove non-ASCII characters
cleaned_string = re.sub(r"[^\x00-\x7F]+", " ", input_string)
# Consolidate spaces and ensure correct spacing around punctuation
cleaned_string = re.sub(r"\s*([.,;!?%:])\s*", r"\1 ", cleaned_string)
# Adjust spacing for the dollar sign
cleaned_string = re.sub(r"\$\s+", "$", cleaned_string)
# Ensure correct spacing inside parentheses around numbers
cleaned_string = re.sub(r"\(\s*(\d+)\s*\)", r"( \1 )", cleaned_string)
# Remove extra spaces around punctuation (this might be redundant but ensures
# no trailing space before punctuation)
cleaned_string = re.sub(r"\s+([.,;!?%:])", r"\1", cleaned_string)
# Remove leading and trailing whitespace, reduce multiple spaces to a single
# space, and convert to lower case
cleaned_string = re.sub(r"\s+", " ", cleaned_string).strip().lower()
return cleaned_string
@staticmethod
def get_file_name_without_extension(file_name: str) -> str:
return ".".join(file_name.split(".")[:-1])
@staticmethod
def is_valid_url(url: str):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False
@staticmethod
def is_base64(string: str) -> bool:
"""
Validates if the input string is a Base64-encoded string.
Args:
string (str): The string to validate.
Returns:
bool: True if the string is Base64, False otherwise.
"""
try:
# Check if the string can be decoded
base64_bytes = base64.b64decode(string, validate=True)
# Check if decoded bytes can be re-encoded to the original string
return base64.b64encode(base64_bytes).decode("utf-8") == string
except Exception:
return False
|