Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,50 +1,15 @@
|
|
1 |
-
import spaces
|
2 |
import gradio as gr
|
3 |
-
from sacremoses import MosesPunctNormalizer
|
4 |
-
from stopes.pipelines.monolingual.utils.sentence_split import get_split_algo
|
5 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
from flores import code_mapping
|
7 |
-
import platform
|
8 |
-
import torch
|
9 |
-
import nltk
|
10 |
from functools import lru_cache
|
11 |
-
|
12 |
-
nltk.download("punkt_tab")
|
13 |
-
|
14 |
-
REMOVED_TARGET_LANGUAGES = {"Ligurian", "Lombard", "Sicilian"}
|
15 |
-
|
16 |
-
|
17 |
-
device = "cpu" if platform.system() == "Darwin" else "cuda"
|
18 |
-
MODEL_NAME = "facebook/nllb-200-3.3B"
|
19 |
|
20 |
code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
|
21 |
flores_codes = list(code_mapping.keys())
|
22 |
-
target_languages =
|
23 |
-
|
24 |
-
def load_model():
|
25 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
|
26 |
-
print(f"Model loaded in {device}")
|
27 |
-
return model
|
28 |
-
|
29 |
-
|
30 |
-
model = load_model()
|
31 |
-
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
|
36 |
-
|
37 |
-
punct_normalizer = MosesPunctNormalizer(lang="en")
|
38 |
-
|
39 |
-
|
40 |
-
@lru_cache(maxsize=202)
|
41 |
-
def get_language_specific_sentence_splitter(language_code):
|
42 |
-
short_code = language_code[:3]
|
43 |
-
splitter = get_split_algo(short_code, "default")
|
44 |
-
return splitter
|
45 |
-
|
46 |
-
|
47 |
-
# cache function
|
48 |
@lru_cache(maxsize=100)
|
49 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
50 |
if not src_lang:
|
@@ -53,83 +18,37 @@ def translate(text: str, src_lang: str, tgt_lang: str):
|
|
53 |
raise gr.Error("The target language is empty! Please choose it in the dropdown list.")
|
54 |
return _translate(text, src_lang, tgt_lang)
|
55 |
|
56 |
-
|
57 |
-
# Only assign GPU if cache not used
|
58 |
-
@spaces.GPU
|
59 |
def _translate(text: str, src_lang: str, tgt_lang: str):
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
paragraphs = text.split("\n")
|
69 |
-
translated_paragraphs = []
|
70 |
-
|
71 |
-
for paragraph in paragraphs:
|
72 |
-
splitter = get_language_specific_sentence_splitter(src_code)
|
73 |
-
sentences = list(splitter(paragraph))
|
74 |
-
translated_sentences = []
|
75 |
-
|
76 |
-
for sentence in sentences:
|
77 |
-
input_tokens = (
|
78 |
-
tokenizer(sentence, return_tensors="pt")
|
79 |
-
.input_ids[0]
|
80 |
-
.cpu()
|
81 |
-
.numpy()
|
82 |
-
.tolist()
|
83 |
-
)
|
84 |
-
translated_chunk = model.generate(
|
85 |
-
input_ids=torch.tensor([input_tokens]).to(device),
|
86 |
-
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code),
|
87 |
-
max_length=len(input_tokens) + 50,
|
88 |
-
num_return_sequences=1,
|
89 |
-
num_beams=5,
|
90 |
-
no_repeat_ngram_size=4, # repetition blocking works better if this number is below num_beams
|
91 |
-
renormalize_logits=True, # recompute token probabilities after banning the repetitions
|
92 |
-
)
|
93 |
-
translated_chunk = tokenizer.decode(
|
94 |
-
translated_chunk[0], skip_special_tokens=True
|
95 |
-
)
|
96 |
-
translated_sentences.append(translated_chunk)
|
97 |
-
|
98 |
-
translated_paragraph = " ".join(translated_sentences)
|
99 |
-
translated_paragraphs.append(translated_paragraph)
|
100 |
-
|
101 |
-
return "\n".join(translated_paragraphs)
|
102 |
-
|
103 |
-
|
104 |
|
105 |
description = """
|
106 |
<div style="text-align: center;">
|
107 |
<img src="https://huggingface.co/spaces/UNESCO/nllb/resolve/main/UNESCO_META_HF_BANNER.png" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
|
108 |
<h1 style="color: #0077be;">UNESCO Language Translator, powered by Meta and Hugging Face</h1>
|
109 |
</div>
|
110 |
-
|
111 |
UNESCO, Meta, and Hugging Face have come together to create an accessible, high-quality translation experience in 200 languages.
|
112 |
-
|
113 |
This is made possible through an open approach to AI innovation using Meta's open-sourced No Language Left Behind (NLLB) AI model, hosted on Hugging Face Spaces.
|
114 |
"""
|
115 |
disclaimer = """
|
116 |
## Disclaimer
|
117 |
-
|
118 |
This translation interface, developed as part of UNESCO's work on Multilingualism and supported by Meta's No Language Left Behind AI model and Hugging Face, is designed to assist with language translation using open-source AI technologies. However, translations generated by the tool may not be accurate or perfect. While we strive to provide accurate translations, the tool may produce inaccuracies due to the complexity and nuances of different languages.
|
119 |
-
|
120 |
- The tool may not fully capture the context, cultural nuances, idiomatic expressions, or specific terminologies.
|
121 |
- Manual review and adjustment are recommended for important translations.
|
122 |
- The translations are provided "as is" without any warranties of any kind, either expressed or implied.
|
123 |
- Users should not rely solely on the tool for critical or sensitive translations and are responsible for verifying the accuracy and appropriateness of the translations for their specific needs.
|
124 |
- We recommend consulting with professional translators for official, legal, medical, or other critical translations.
|
125 |
- We shall not be liable for any direct, indirect, incidental, special, or consequential damages arising out of or in connection with the use or inability to use the translation tool, including but not limited to errors or omissions in translations.
|
126 |
-
|
127 |
By using this translation tool, you agree to these terms and acknowledge that the use of the tool is at your own risk.
|
128 |
-
|
129 |
For any feedback or support, please contact UNESCO World Atlas of Languages Team: WAL.Data@unesco.org.
|
130 |
"""
|
131 |
|
132 |
-
|
133 |
examples_inputs = [["The United Nations Educational, Scientific and Cultural Organization is a specialized agency of the United Nations with the aim of promoting world peace and security through international cooperation in education, arts, sciences and culture. ","English","Ayacucho Quechua"],]
|
134 |
|
135 |
with gr.Blocks() as demo:
|
@@ -151,4 +70,4 @@ with gr.Blocks() as demo:
|
|
151 |
examples = gr.Examples(examples=examples_inputs,inputs=[input_text, src_lang,target_lang], fn=translate, outputs=output, cache_examples=True)
|
152 |
with gr.Row():
|
153 |
gr.Markdown(disclaimer)
|
154 |
-
demo.launch()
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
from flores import code_mapping
|
|
|
|
|
|
|
3 |
from functools import lru_cache
|
4 |
+
import openai # 用于调用外部API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[0]))
|
7 |
flores_codes = list(code_mapping.keys())
|
8 |
+
target_languages = flores_codes # 简化列表
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# 假设openai_client已定义,例如:
|
11 |
+
openai_client = openai.OpenAI(base_url="https://ssapi.cppbear.site", api_key="sk-5VFSx79t3fLQk4BtN68WTeeRckZ12CONElR7RsK39zrQysji")
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
@lru_cache(maxsize=100)
|
14 |
def translate(text: str, src_lang: str, tgt_lang: str):
|
15 |
if not src_lang:
|
|
|
18 |
raise gr.Error("The target language is empty! Please choose it in the dropdown list.")
|
19 |
return _translate(text, src_lang, tgt_lang)
|
20 |
|
|
|
|
|
|
|
21 |
def _translate(text: str, src_lang: str, tgt_lang: str):
|
22 |
+
prompt = f"Translate the following text from {src_lang} to {tgt_lang}: {text}"
|
23 |
+
response = openai_client.chat.completions.create(
|
24 |
+
model="v0-1.5-md", # 如gpt-3.5-turbo或其他兼容模型
|
25 |
+
messages=[{"role": "user", "content": prompt}],
|
26 |
+
max_tokens=1024,
|
27 |
+
temperature=0.0
|
28 |
+
)
|
29 |
+
return response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
description = """
|
32 |
<div style="text-align: center;">
|
33 |
<img src="https://huggingface.co/spaces/UNESCO/nllb/resolve/main/UNESCO_META_HF_BANNER.png" alt="UNESCO Meta Hugging Face Banner" style="max-width: 800px; width: 100%; margin: 0 auto;">
|
34 |
<h1 style="color: #0077be;">UNESCO Language Translator, powered by Meta and Hugging Face</h1>
|
35 |
</div>
|
|
|
36 |
UNESCO, Meta, and Hugging Face have come together to create an accessible, high-quality translation experience in 200 languages.
|
|
|
37 |
This is made possible through an open approach to AI innovation using Meta's open-sourced No Language Left Behind (NLLB) AI model, hosted on Hugging Face Spaces.
|
38 |
"""
|
39 |
disclaimer = """
|
40 |
## Disclaimer
|
|
|
41 |
This translation interface, developed as part of UNESCO's work on Multilingualism and supported by Meta's No Language Left Behind AI model and Hugging Face, is designed to assist with language translation using open-source AI technologies. However, translations generated by the tool may not be accurate or perfect. While we strive to provide accurate translations, the tool may produce inaccuracies due to the complexity and nuances of different languages.
|
|
|
42 |
- The tool may not fully capture the context, cultural nuances, idiomatic expressions, or specific terminologies.
|
43 |
- Manual review and adjustment are recommended for important translations.
|
44 |
- The translations are provided "as is" without any warranties of any kind, either expressed or implied.
|
45 |
- Users should not rely solely on the tool for critical or sensitive translations and are responsible for verifying the accuracy and appropriateness of the translations for their specific needs.
|
46 |
- We recommend consulting with professional translators for official, legal, medical, or other critical translations.
|
47 |
- We shall not be liable for any direct, indirect, incidental, special, or consequential damages arising out of or in connection with the use or inability to use the translation tool, including but not limited to errors or omissions in translations.
|
|
|
48 |
By using this translation tool, you agree to these terms and acknowledge that the use of the tool is at your own risk.
|
|
|
49 |
For any feedback or support, please contact UNESCO World Atlas of Languages Team: WAL.Data@unesco.org.
|
50 |
"""
|
51 |
|
|
|
52 |
examples_inputs = [["The United Nations Educational, Scientific and Cultural Organization is a specialized agency of the United Nations with the aim of promoting world peace and security through international cooperation in education, arts, sciences and culture. ","English","Ayacucho Quechua"],]
|
53 |
|
54 |
with gr.Blocks() as demo:
|
|
|
70 |
examples = gr.Examples(examples=examples_inputs,inputs=[input_text, src_lang,target_lang], fn=translate, outputs=output, cache_examples=True)
|
71 |
with gr.Row():
|
72 |
gr.Markdown(disclaimer)
|
73 |
+
demo.launch()
|