Spaces:
Sleeping
Sleeping
| import os | |
| import ctranslate2 | |
| from transformers import AutoTokenizer | |
| import gradio as gr | |
| # 自动 clone 模型(避免上传) | |
| if not os.path.exists("nllb_ct2"): | |
| os.system("git lfs install && git clone https://huggingface.co/JustFrederik/nllb-200-distilled-600M-ct2-int8 nllb_ct2") | |
| device = "cuda" if ctranslate2.get_cuda_device_count() > 0 else "cpu" | |
| translator = ctranslate2.Translator("nllb_ct2", device=device) | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") | |
| def translate(text, source_lang="eng_Latn", target_lang="zho_Hans"): | |
| prefix = f"<<{target_lang}>>" | |
| tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prefix + text, add_special_tokens=False)) | |
| results = translator.translate_batch([tokens]) | |
| translated_tokens = results[0].hypotheses[0] | |
| return tokenizer.decode(tokenizer.convert_tokens_to_ids(translated_tokens), skip_special_tokens=True) | |
| gr.Interface( | |
| fn=translate, | |
| inputs=[ | |
| gr.Textbox(label="输入文本"), | |
| gr.Textbox(value="eng_Latn", label="源语言代码"), | |
| gr.Textbox(value="zho_Hans", label="目标语言代码"), | |
| ], | |
| outputs="text", | |
| title="NLLB-200 翻译器(自动挂载模型)", | |
| ).launch() | |