import gradio as gr from transformers import pipeline, AutoTokenizer import traceback import sys import iso639 # Model name MODEL_NAME = "facebook/nllb-200-distilled-600M" # Cache the pipeline cached_pipeline = None def get_supported_languages(): try: print("--- DEBUG: Starting language list retrieval ---", file=sys.stderr) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) lang_codes = [ t.replace("__", "").replace(":", "") for t in tokenizer.special_tokens_map['additional_special_tokens'] if not t.startswith('<') ] print(f"--- DEBUG: Raw language codes: {lang_codes[:10]}... ---", file=sys.stderr) lang_map = {} for code in lang_codes: try: lang_part = code.split('_')[0] lang = iso639.languages.get(part3=lang_part) name = lang.name if code == 'zho_Hans': name = 'Chinese (Simplified)' elif code == 'zho_Hant': name = 'Chinese (Traditional)' display_name = f"{name} ({code})" lang_map[display_name] = code except (KeyError, ValueError, AttributeError): print(f"--- DEBUG: Could not find friendly name for code: {code} ---", file=sys.stderr) lang_map[code] = code supported_langs = sorted(list(lang_map.keys())) print(f"--- DEBUG: Successfully processed language map. Total languages: {len(supported_langs)} ---", file=sys.stderr) print(f"--- DEBUG: Sample languages: {supported_langs[:5]} ---", file=sys.stderr) return lang_map, supported_langs except Exception as e: print(f"--- DEBUG: An unexpected error occurred: {e} ---", file=sys.stderr) print(f"--- DEBUG: Detailed traceback:\n{traceback.format_exc()} ---", file=sys.stderr) lang_map = { "English (eng_Latn)": "eng_Latn", "Chinese (Simplified) (zho_Hans)": "zho_Hans", "Greek (ell_Grek)": "ell_Grek" } supported_langs = list(lang_map.keys()) print("--- DEBUG: Using fallback language list. ---", file=sys.stderr) return lang_map, supported_langs # Get the language map and supported list at startup lang_map, supported_langs = get_supported_languages() def get_translator(): global cached_pipeline if cached_pipeline is None: try: cached_pipeline = pipeline("translation", model=MODEL_NAME) except Exception as e: raise gr.Error(f"Model {MODEL_NAME} failed to load: {e}") return cached_pipeline def translate_text(text, source_lang_display, target_lang_display): if not text: return "Please enter text to translate." source_code = lang_map.get(source_lang_display) target_code = lang_map.get(target_lang_display) if not source_code or not target_code: return "Unsupported language selected." try: translator = get_translator() translation_result = translator( text, src_lang=source_code, tgt_lang=target_code )[0]['translation_text'] return translation_result except Exception as e: return f"Translation failed: {e}" with gr.Blocks(title="NLLB Universal Translation Tool") as demo: gr.Markdown("# NLLB Universal Translation Tool") gr.Markdown("This tool provides a user-friendly interface for translating text between many languages.") with gr.Row(): with gr.Column(scale=1): with gr.Row(): source_dropdown = gr.Dropdown(choices=supported_langs, label="Source Language", value="English (eng_Latn)") target_dropdown = gr.Dropdown(choices=supported_langs, label="Target Language", value="Chinese (Simplified) (zho_Hans)") text_input = gr.Textbox(lines=5, label="Input Text") translate_btn = gr.Button("Translate") with gr.Column(scale=1): translated_output = gr.Textbox(label="Translated Text", lines=5, interactive=False) translate_btn.click( fn=translate_text, inputs=[text_input, source_dropdown, target_dropdown], outputs=translated_output ) demo.launch()