Spaces:

TeeA
/

Datum-3D

Running

App Files Files Community

Datum-3D / string_utils.py

TeeA

refactor

d6cfb5e 2 months ago

raw

history blame contribute delete

2.38 kB

	# %%writefile string_utils.py
	import base64
	import random
	import re
	import string
	from urllib.parse import urlparse


	class StringUtils:
	@staticmethod
	def generate_random_string(length: int = 32) -> str:
	characters = string.ascii_letters + string.digits
	random_string = "".join(random.choice(characters) for _ in range(length))
	return random_string

	@staticmethod
	def clean_string(input_string: str) -> str:
	# Remove non-ASCII characters
	cleaned_string = re.sub(r"[^\x00-\x7F]+", " ", input_string)

	# Consolidate spaces and ensure correct spacing around punctuation
	cleaned_string = re.sub(r"\s([.,;!?%:])\s", r"\1 ", cleaned_string)

	# Adjust spacing for the dollar sign
	cleaned_string = re.sub(r"\$\s+", "$", cleaned_string)

	# Ensure correct spacing inside parentheses around numbers
	cleaned_string = re.sub(r"$\s(\d+)\s$", r"( \1 )", cleaned_string)

	# Remove extra spaces around punctuation (this might be redundant but ensures
	# no trailing space before punctuation)
	cleaned_string = re.sub(r"\s+([.,;!?%:])", r"\1", cleaned_string)

	# Remove leading and trailing whitespace, reduce multiple spaces to a single
	# space, and convert to lower case
	cleaned_string = re.sub(r"\s+", " ", cleaned_string).strip().lower()

	return cleaned_string

	@staticmethod
	def get_file_name_without_extension(file_name: str) -> str:
	return ".".join(file_name.split(".")[:-1])

	@staticmethod
	def is_valid_url(url: str):
	try:
	result = urlparse(url)
	return all([result.scheme, result.netloc])
	except ValueError:
	return False

	@staticmethod
	def is_base64(string: str) -> bool:
	"""
	Validates if the input string is a Base64-encoded string.

	Args:
	string (str): The string to validate.

	Returns:
	bool: True if the string is Base64, False otherwise.
	"""
	try:
	# Check if the string can be decoded
	base64_bytes = base64.b64decode(string, validate=True)
	# Check if decoded bytes can be re-encoded to the original string
	return base64.b64encode(base64_bytes).decode("utf-8") == string
	except Exception:
	return False