File size: 1,594 Bytes
90dc9aa
4205169
 
90dc9aa
4205169
90dc9aa
4205169
90dc9aa
4205169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90dc9aa
 
 
4205169
90dc9aa
4205169
90dc9aa
4205169
90dc9aa
7217024
 
 
 
90dc9aa
 
 
 
 
 
4205169
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from modules.languages.constants import LANGUAGES
import re
import unicodedata

def clean_text(text: str) -> str:
    """
    Remove invisible or non-standard Unicode characters that break transliteration.
    """
    # Normalize Unicode (decompose + recompose)
    text = unicodedata.normalize("NFC", text)

    # Remove zero-width and control characters
    invisible_pattern = r'[\u200B-\u200D\uFEFF\u2060]'
    text = re.sub(invisible_pattern, '', text)

    # Replace non-breaking spaces with regular spaces
    text = text.replace('\xa0', ' ')

    # Remove stray control chars except \n
    text = ''.join(ch for ch in text if ch.isprintable() or ch == '\n')

    # Trim multiple spaces
    text = re.sub(r'[ ]{2,}', ' ', text)

    return text.strip()


def fn_transliterate(input_text: str, input_language: str = 'autodetect') -> dict:
    try:
        from aksharamukha import transliterate

        input_text = clean_text(input_text)  # <-- sanitize here

        target_scripts = {lang["code"]: lang["aksharamukha_name"] for lang in LANGUAGES}
        output = {}

        for code, script_name in target_scripts.items():
            if(input_text is not None and input_text.strip() != ""):
                transliterated_text = transliterate.process(input_language, script_name, input_text)
            else:
                transliterated_text = input_text
            output[code] = transliterated_text

        return output

    except Exception as e:
        print(f"Error transliterating '{input_text[:30]}...': {e}")
        return {lang["code"]: '-' for lang in LANGUAGES}