Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline, AutoTokenizer | |
import traceback | |
import sys | |
import iso639 | |
# Model name | |
MODEL_NAME = "facebook/nllb-200-distilled-600M" | |
# Cache the pipeline | |
cached_pipeline = None | |
def get_supported_languages(): | |
try: | |
print("--- DEBUG: Starting language list retrieval ---", file=sys.stderr) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
lang_codes = [ | |
t.replace("__", "").replace(":", "") | |
for t in tokenizer.special_tokens_map['additional_special_tokens'] | |
if not t.startswith('<') | |
] | |
print(f"--- DEBUG: Raw language codes: {lang_codes[:10]}... ---", file=sys.stderr) | |
lang_map = {} | |
for code in lang_codes: | |
try: | |
lang_part = code.split('_')[0] | |
lang = iso639.languages.get(part3=lang_part) | |
name = lang.name | |
if code == 'zho_Hans': | |
name = 'Chinese (Simplified)' | |
elif code == 'zho_Hant': | |
name = 'Chinese (Traditional)' | |
display_name = f"{name} ({code})" | |
lang_map[display_name] = code | |
except (KeyError, ValueError, AttributeError): | |
print(f"--- DEBUG: Could not find friendly name for code: {code} ---", file=sys.stderr) | |
lang_map[code] = code | |
supported_langs = sorted(list(lang_map.keys())) | |
print(f"--- DEBUG: Successfully processed language map. Total languages: {len(supported_langs)} ---", file=sys.stderr) | |
print(f"--- DEBUG: Sample languages: {supported_langs[:5]} ---", file=sys.stderr) | |
return lang_map, supported_langs | |
except Exception as e: | |
print(f"--- DEBUG: An unexpected error occurred: {e} ---", file=sys.stderr) | |
print(f"--- DEBUG: Detailed traceback:\n{traceback.format_exc()} ---", file=sys.stderr) | |
lang_map = { | |
"English (eng_Latn)": "eng_Latn", | |
"Chinese (Simplified) (zho_Hans)": "zho_Hans", | |
"Greek (ell_Grek)": "ell_Grek" | |
} | |
supported_langs = list(lang_map.keys()) | |
print("--- DEBUG: Using fallback language list. ---", file=sys.stderr) | |
return lang_map, supported_langs | |
# Get the language map and supported list at startup | |
lang_map, supported_langs = get_supported_languages() | |
def get_translator(): | |
global cached_pipeline | |
if cached_pipeline is None: | |
try: | |
cached_pipeline = pipeline("translation", model=MODEL_NAME) | |
except Exception as e: | |
raise gr.Error(f"Model {MODEL_NAME} failed to load: {e}") | |
return cached_pipeline | |
def translate_text(text, source_lang_display, target_lang_display): | |
if not text: | |
return "Please enter text to translate." | |
source_code = lang_map.get(source_lang_display) | |
target_code = lang_map.get(target_lang_display) | |
if not source_code or not target_code: | |
return "Unsupported language selected." | |
try: | |
translator = get_translator() | |
translation_result = translator( | |
text, | |
src_lang=source_code, | |
tgt_lang=target_code | |
)[0]['translation_text'] | |
return translation_result | |
except Exception as e: | |
return f"Translation failed: {e}" | |
with gr.Blocks(title="NLLB Universal Translation Tool") as demo: | |
gr.Markdown("# NLLB Universal Translation Tool") | |
gr.Markdown("This tool provides a user-friendly interface for translating text between many languages.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
source_dropdown = gr.Dropdown(choices=supported_langs, label="Source Language", value="English (eng_Latn)") | |
target_dropdown = gr.Dropdown(choices=supported_langs, label="Target Language", value="Chinese (Simplified) (zho_Hans)") | |
text_input = gr.Textbox(lines=5, label="Input Text") | |
translate_btn = gr.Button("Translate") | |
with gr.Column(scale=1): | |
translated_output = gr.Textbox(label="Translated Text", lines=5, interactive=False) | |
translate_btn.click( | |
fn=translate_text, | |
inputs=[text_input, source_dropdown, target_dropdown], | |
outputs=translated_output | |
) | |
demo.launch() |