Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,594 Bytes
90dc9aa 4205169 90dc9aa 4205169 90dc9aa 4205169 90dc9aa 4205169 90dc9aa 4205169 90dc9aa 4205169 90dc9aa 4205169 90dc9aa 7217024 90dc9aa 4205169 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from modules.languages.constants import LANGUAGES
import re
import unicodedata
def clean_text(text: str) -> str:
"""
Remove invisible or non-standard Unicode characters that break transliteration.
"""
# Normalize Unicode (decompose + recompose)
text = unicodedata.normalize("NFC", text)
# Remove zero-width and control characters
invisible_pattern = r'[\u200B-\u200D\uFEFF\u2060]'
text = re.sub(invisible_pattern, '', text)
# Replace non-breaking spaces with regular spaces
text = text.replace('\xa0', ' ')
# Remove stray control chars except \n
text = ''.join(ch for ch in text if ch.isprintable() or ch == '\n')
# Trim multiple spaces
text = re.sub(r'[ ]{2,}', ' ', text)
return text.strip()
def fn_transliterate(input_text: str, input_language: str = 'autodetect') -> dict:
try:
from aksharamukha import transliterate
input_text = clean_text(input_text) # <-- sanitize here
target_scripts = {lang["code"]: lang["aksharamukha_name"] for lang in LANGUAGES}
output = {}
for code, script_name in target_scripts.items():
if(input_text is not None and input_text.strip() != ""):
transliterated_text = transliterate.process(input_language, script_name, input_text)
else:
transliterated_text = input_text
output[code] = transliterated_text
return output
except Exception as e:
print(f"Error transliterating '{input_text[:30]}...': {e}")
return {lang["code"]: '-' for lang in LANGUAGES} |