File size: 4,404 Bytes
3d3438c
4714b7e
9d32f47
c73ca33
82245c9
3d3438c
3ab1245
5062a2d
457c5fc
3ab1245
cb45ebb
1d6800e
ce136a5
 
c73ca33
 
ce136a5
4f3767a
 
 
 
 
ce136a5
4f3767a
c73ca33
4f3767a
 
 
82245c9
 
36e8ba8
 
4f3767a
 
 
 
36e8ba8
112986d
 
82245c9
4f3767a
 
 
ce136a5
 
c73ca33
 
 
ce136a5
c73ca33
4f3767a
 
c73ca33
ce136a5
112986d
 
 
ce136a5
112986d
4f3767a
 
ce136a5
 
8dfd3aa
 
 
aed4e6f
cb45ebb
aed4e6f
457c5fc
 
 
993ef9b
aed4e6f
 
8dfd3aa
 
 
 
 
 
 
 
 
f7ea107
 
 
 
 
 
 
 
 
8dfd3aa
f7ea107
8dfd3aa
f7ea107
8dfd3aa
 
 
9ee3d33
 
 
 
 
 
 
 
 
 
 
8dfd3aa
9ee3d33
 
8dfd3aa
 
 
9ee3d33
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
from transformers import pipeline, AutoTokenizer
import traceback
import sys
import iso639

# Model name
MODEL_NAME = "facebook/nllb-200-distilled-600M"

# Cache the pipeline
cached_pipeline = None

def get_supported_languages():
    try:
        print("--- DEBUG: Starting language list retrieval ---", file=sys.stderr)
        
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        lang_codes = [
            t.replace("__", "").replace(":", "")
            for t in tokenizer.special_tokens_map['additional_special_tokens']
            if not t.startswith('<')
        ]
        
        print(f"--- DEBUG: Raw language codes: {lang_codes[:10]}... ---", file=sys.stderr)

        lang_map = {}
        for code in lang_codes:
            try:
                lang_part = code.split('_')[0]
                lang = iso639.languages.get(part3=lang_part)
                name = lang.name
                
                if code == 'zho_Hans':
                    name = 'Chinese (Simplified)'
                elif code == 'zho_Hant':
                    name = 'Chinese (Traditional)'
                
                display_name = f"{name} ({code})"
                lang_map[display_name] = code
            except (KeyError, ValueError, AttributeError):
                print(f"--- DEBUG: Could not find friendly name for code: {code} ---", file=sys.stderr)
                lang_map[code] = code

        supported_langs = sorted(list(lang_map.keys()))
        
        print(f"--- DEBUG: Successfully processed language map. Total languages: {len(supported_langs)} ---", file=sys.stderr)
        print(f"--- DEBUG: Sample languages: {supported_langs[:5]} ---", file=sys.stderr)
        
        return lang_map, supported_langs
    except Exception as e:
        print(f"--- DEBUG: An unexpected error occurred: {e} ---", file=sys.stderr)
        print(f"--- DEBUG: Detailed traceback:\n{traceback.format_exc()} ---", file=sys.stderr)
        
        lang_map = {
            "English (eng_Latn)": "eng_Latn",
            "Chinese (Simplified) (zho_Hans)": "zho_Hans",
            "Greek (ell_Grek)": "ell_Grek"
        }
        supported_langs = list(lang_map.keys())
        print("--- DEBUG: Using fallback language list. ---", file=sys.stderr)
        
        return lang_map, supported_langs

# Get the language map and supported list at startup
lang_map, supported_langs = get_supported_languages()

def get_translator():
    global cached_pipeline
    if cached_pipeline is None:
        try:
            cached_pipeline = pipeline("translation", model=MODEL_NAME)
        except Exception as e:
            raise gr.Error(f"Model {MODEL_NAME} failed to load: {e}")
    return cached_pipeline

def translate_text(text, source_lang_display, target_lang_display):
    if not text:
        return "Please enter text to translate."
    
    source_code = lang_map.get(source_lang_display)
    target_code = lang_map.get(target_lang_display)
    
    if not source_code or not target_code:
        return "Unsupported language selected."
    
    try:
        translator = get_translator()
        translation_result = translator(
            text,
            src_lang=source_code,
            tgt_lang=target_code
        )[0]['translation_text']
        
        return translation_result
    except Exception as e:
        return f"Translation failed: {e}"

with gr.Blocks(title="NLLB Universal Translation Tool") as demo:
    gr.Markdown("# NLLB Universal Translation Tool")
    gr.Markdown("This tool provides a user-friendly interface for translating text between many languages.")

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                source_dropdown = gr.Dropdown(choices=supported_langs, label="Source Language", value="English (eng_Latn)")
                target_dropdown = gr.Dropdown(choices=supported_langs, label="Target Language", value="Chinese (Simplified) (zho_Hans)")
            
            text_input = gr.Textbox(lines=5, label="Input Text")
            translate_btn = gr.Button("Translate")

        with gr.Column(scale=1):
            translated_output = gr.Textbox(label="Translated Text", lines=5, interactive=False)

    translate_btn.click(
        fn=translate_text,
        inputs=[text_input, source_dropdown, target_dropdown],
        outputs=translated_output
    )

demo.launch()