Spaces:
Sleeping
Sleeping
File size: 4,404 Bytes
3d3438c 4714b7e 9d32f47 c73ca33 82245c9 3d3438c 3ab1245 5062a2d 457c5fc 3ab1245 cb45ebb 1d6800e ce136a5 c73ca33 ce136a5 4f3767a ce136a5 4f3767a c73ca33 4f3767a 82245c9 36e8ba8 4f3767a 36e8ba8 112986d 82245c9 4f3767a ce136a5 c73ca33 ce136a5 c73ca33 4f3767a c73ca33 ce136a5 112986d ce136a5 112986d 4f3767a ce136a5 8dfd3aa aed4e6f cb45ebb aed4e6f 457c5fc 993ef9b aed4e6f 8dfd3aa f7ea107 8dfd3aa f7ea107 8dfd3aa f7ea107 8dfd3aa 9ee3d33 8dfd3aa 9ee3d33 8dfd3aa 9ee3d33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
import traceback
import sys
import iso639
# Model name
MODEL_NAME = "facebook/nllb-200-distilled-600M"
# Cache the pipeline
cached_pipeline = None
def get_supported_languages():
try:
print("--- DEBUG: Starting language list retrieval ---", file=sys.stderr)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
lang_codes = [
t.replace("__", "").replace(":", "")
for t in tokenizer.special_tokens_map['additional_special_tokens']
if not t.startswith('<')
]
print(f"--- DEBUG: Raw language codes: {lang_codes[:10]}... ---", file=sys.stderr)
lang_map = {}
for code in lang_codes:
try:
lang_part = code.split('_')[0]
lang = iso639.languages.get(part3=lang_part)
name = lang.name
if code == 'zho_Hans':
name = 'Chinese (Simplified)'
elif code == 'zho_Hant':
name = 'Chinese (Traditional)'
display_name = f"{name} ({code})"
lang_map[display_name] = code
except (KeyError, ValueError, AttributeError):
print(f"--- DEBUG: Could not find friendly name for code: {code} ---", file=sys.stderr)
lang_map[code] = code
supported_langs = sorted(list(lang_map.keys()))
print(f"--- DEBUG: Successfully processed language map. Total languages: {len(supported_langs)} ---", file=sys.stderr)
print(f"--- DEBUG: Sample languages: {supported_langs[:5]} ---", file=sys.stderr)
return lang_map, supported_langs
except Exception as e:
print(f"--- DEBUG: An unexpected error occurred: {e} ---", file=sys.stderr)
print(f"--- DEBUG: Detailed traceback:\n{traceback.format_exc()} ---", file=sys.stderr)
lang_map = {
"English (eng_Latn)": "eng_Latn",
"Chinese (Simplified) (zho_Hans)": "zho_Hans",
"Greek (ell_Grek)": "ell_Grek"
}
supported_langs = list(lang_map.keys())
print("--- DEBUG: Using fallback language list. ---", file=sys.stderr)
return lang_map, supported_langs
# Get the language map and supported list at startup
lang_map, supported_langs = get_supported_languages()
def get_translator():
global cached_pipeline
if cached_pipeline is None:
try:
cached_pipeline = pipeline("translation", model=MODEL_NAME)
except Exception as e:
raise gr.Error(f"Model {MODEL_NAME} failed to load: {e}")
return cached_pipeline
def translate_text(text, source_lang_display, target_lang_display):
if not text:
return "Please enter text to translate."
source_code = lang_map.get(source_lang_display)
target_code = lang_map.get(target_lang_display)
if not source_code or not target_code:
return "Unsupported language selected."
try:
translator = get_translator()
translation_result = translator(
text,
src_lang=source_code,
tgt_lang=target_code
)[0]['translation_text']
return translation_result
except Exception as e:
return f"Translation failed: {e}"
with gr.Blocks(title="NLLB Universal Translation Tool") as demo:
gr.Markdown("# NLLB Universal Translation Tool")
gr.Markdown("This tool provides a user-friendly interface for translating text between many languages.")
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
source_dropdown = gr.Dropdown(choices=supported_langs, label="Source Language", value="English (eng_Latn)")
target_dropdown = gr.Dropdown(choices=supported_langs, label="Target Language", value="Chinese (Simplified) (zho_Hans)")
text_input = gr.Textbox(lines=5, label="Input Text")
translate_btn = gr.Button("Translate")
with gr.Column(scale=1):
translated_output = gr.Textbox(label="Translated Text", lines=5, interactive=False)
translate_btn.click(
fn=translate_text,
inputs=[text_input, source_dropdown, target_dropdown],
outputs=translated_output
)
demo.launch() |