l9p's picture
重要回滚,回归Web翻译服务
8dfd3aa verified
import gradio as gr
from transformers import pipeline, AutoTokenizer
import traceback
import sys
import iso639
# Model name
MODEL_NAME = "facebook/nllb-200-distilled-600M"
# Cache the pipeline
cached_pipeline = None
def get_supported_languages():
try:
print("--- DEBUG: Starting language list retrieval ---", file=sys.stderr)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
lang_codes = [
t.replace("__", "").replace(":", "")
for t in tokenizer.special_tokens_map['additional_special_tokens']
if not t.startswith('<')
]
print(f"--- DEBUG: Raw language codes: {lang_codes[:10]}... ---", file=sys.stderr)
lang_map = {}
for code in lang_codes:
try:
lang_part = code.split('_')[0]
lang = iso639.languages.get(part3=lang_part)
name = lang.name
if code == 'zho_Hans':
name = 'Chinese (Simplified)'
elif code == 'zho_Hant':
name = 'Chinese (Traditional)'
display_name = f"{name} ({code})"
lang_map[display_name] = code
except (KeyError, ValueError, AttributeError):
print(f"--- DEBUG: Could not find friendly name for code: {code} ---", file=sys.stderr)
lang_map[code] = code
supported_langs = sorted(list(lang_map.keys()))
print(f"--- DEBUG: Successfully processed language map. Total languages: {len(supported_langs)} ---", file=sys.stderr)
print(f"--- DEBUG: Sample languages: {supported_langs[:5]} ---", file=sys.stderr)
return lang_map, supported_langs
except Exception as e:
print(f"--- DEBUG: An unexpected error occurred: {e} ---", file=sys.stderr)
print(f"--- DEBUG: Detailed traceback:\n{traceback.format_exc()} ---", file=sys.stderr)
lang_map = {
"English (eng_Latn)": "eng_Latn",
"Chinese (Simplified) (zho_Hans)": "zho_Hans",
"Greek (ell_Grek)": "ell_Grek"
}
supported_langs = list(lang_map.keys())
print("--- DEBUG: Using fallback language list. ---", file=sys.stderr)
return lang_map, supported_langs
# Get the language map and supported list at startup
lang_map, supported_langs = get_supported_languages()
def get_translator():
global cached_pipeline
if cached_pipeline is None:
try:
cached_pipeline = pipeline("translation", model=MODEL_NAME)
except Exception as e:
raise gr.Error(f"Model {MODEL_NAME} failed to load: {e}")
return cached_pipeline
def translate_text(text, source_lang_display, target_lang_display):
if not text:
return "Please enter text to translate."
source_code = lang_map.get(source_lang_display)
target_code = lang_map.get(target_lang_display)
if not source_code or not target_code:
return "Unsupported language selected."
try:
translator = get_translator()
translation_result = translator(
text,
src_lang=source_code,
tgt_lang=target_code
)[0]['translation_text']
return translation_result
except Exception as e:
return f"Translation failed: {e}"
with gr.Blocks(title="NLLB Universal Translation Tool") as demo:
gr.Markdown("# NLLB Universal Translation Tool")
gr.Markdown("This tool provides a user-friendly interface for translating text between many languages.")
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
source_dropdown = gr.Dropdown(choices=supported_langs, label="Source Language", value="English (eng_Latn)")
target_dropdown = gr.Dropdown(choices=supported_langs, label="Target Language", value="Chinese (Simplified) (zho_Hans)")
text_input = gr.Textbox(lines=5, label="Input Text")
translate_btn = gr.Button("Translate")
with gr.Column(scale=1):
translated_output = gr.Textbox(label="Translated Text", lines=5, interactive=False)
translate_btn.click(
fn=translate_text,
inputs=[text_input, source_dropdown, target_dropdown],
outputs=translated_output
)
demo.launch()