File size: 1,225 Bytes
ca20dd7
8c0405d
 
 
 
ca20dd7
 
 
b577599
ca20dd7
 
8c0405d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca20dd7
8c0405d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import ctranslate2
from transformers import AutoTokenizer
import gradio as gr

# 自动 clone 模型(避免上传)
if not os.path.exists("nllb_ct2"):
    os.system("git lfs install && git clone https://huggingface.co/JustFrederik/nllb-200-distilled-600M-ct2-int8 nllb_ct2")

device = "cuda" if ctranslate2.get_cuda_device_count() > 0 else "cpu"
translator = ctranslate2.Translator("nllb_ct2", device=device)
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

def translate(text, source_lang="eng_Latn", target_lang="zho_Hans"):
    prefix = f"<<{target_lang}>>"
    tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prefix + text, add_special_tokens=False))
    results = translator.translate_batch([tokens])
    translated_tokens = results[0].hypotheses[0]
    return tokenizer.decode(tokenizer.convert_tokens_to_ids(translated_tokens), skip_special_tokens=True)

gr.Interface(
    fn=translate,
    inputs=[
        gr.Textbox(label="输入文本"),
        gr.Textbox(value="eng_Latn", label="源语言代码"),
        gr.Textbox(value="zho_Hans", label="目标语言代码"),
    ],
    outputs="text",
    title="NLLB-200 翻译器(自动挂载模型)",
).launch()