File size: 4,147 Bytes
29c8c0e 97c7079 29c8c0e 07f92fc 97c7079 07f92fc c6d71e7 07f92fc c6d71e7 97c7079 07f92fc 29c8c0e 07f92fc 29c8c0e 07f92fc 29c8c0e 07f92fc c6d71e7 07f92fc 97c7079 07f92fc 29c8c0e 07f92fc 29c8c0e 07f92fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import PeftModel
BASE_MODEL = "facebook/nllb-200-distilled-600M"
ADAPTER_NO_TO_EN = "entropy25/mt_en_no_oil"
#ADAPTER_EN_TO_NO = "entropy25/no_en"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForSeq2SeqLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
device_map="auto"
)
model_no_to_en = PeftModel.from_pretrained(base_model, ADAPTER_NO_TO_EN)
model_en_to_no = PeftModel.from_pretrained(base_model, ADAPTER_EN_TO_NO)
LANG_CODES = {
"English": "eng_Latn",
"Norwegian": "nob_Latn"
}
def translate(text, source_lang, target_lang):
if not text.strip():
return "Please enter text to translate"
if source_lang == target_lang:
return "Source and target languages must be different"
try:
model = model_no_to_en if source_lang == "Norwegian" else model_en_to_no
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=512
)
if hasattr(model, 'device'):
inputs = {k: v.to(model.device) for k, v in inputs.items()}
outputs = model.generate(
**inputs,
forced_bos_token_id=tokenizer.convert_tokens_to_ids(LANG_CODES[target_lang]),
max_length=512,
num_beams=5
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
return result
except Exception as e:
return f"Translation error: {str(e)}"
def swap_languages(source, target, text, translation):
return target, source, translation, text
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Oil & Gas Professional Translation")
gr.Markdown("English β Norwegian translation specialized for petroleum industry")
with gr.Row():
source_lang = gr.Dropdown(
choices=["English", "Norwegian"],
label="Source Language",
value="English"
)
swap_btn = gr.Button("β", scale=0, size="sm")
target_lang = gr.Dropdown(
choices=["English", "Norwegian"],
label="Target Language",
value="Norwegian"
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter text to translate",
lines=8
)
input_chars = gr.Textbox(
label="Character Count",
value="0",
interactive=False,
max_lines=1
)
with gr.Column():
output_text = gr.Textbox(
label="Translation",
lines=8,
interactive=False
)
with gr.Row():
copy_btn = gr.Button("π Copy", scale=1)
clear_btn = gr.Button("ποΈ Clear", scale=1)
translate_btn = gr.Button("Translate", variant="primary", size="lg")
gr.Examples(
examples=[
["The drilling operation encountered high pressure", "English", "Norwegian"],
["Reservoaret viser god permeabilitet", "Norwegian", "English"]
],
inputs=[input_text, source_lang, target_lang]
)
input_text.change(
fn=lambda x: str(len(x)),
inputs=input_text,
outputs=input_chars
)
translate_btn.click(
fn=translate,
inputs=[input_text, source_lang, target_lang],
outputs=output_text
)
swap_btn.click(
fn=swap_languages,
inputs=[source_lang, target_lang, input_text, output_text],
outputs=[source_lang, target_lang, input_text, output_text]
)
copy_btn.click(
fn=lambda x: x,
inputs=output_text,
outputs=input_text
)
clear_btn.click(
fn=lambda: ("", ""),
outputs=[input_text, output_text]
)
demo.launch() |