File size: 14,331 Bytes
847f4f3
 
97c7079
 
29c8c0e
99c70a3
847f4f3
99c70a3
 
e9db9b3
 
 
070850f
847f4f3
070850f
 
 
 
847f4f3
07f92fc
c6d71e7
e50136c
c6d71e7
070850f
97c7079
070850f
 
 
 
 
 
 
 
29c8c0e
 
 
4311b49
07f92fc
 
4311b49
07f92fc
99c70a3
e9db9b3
 
99c70a3
e9db9b3
 
 
99c70a3
e9db9b3
 
 
847f4f3
99c70a3
29fad78
 
847f4f3
29fad78
 
 
 
 
99c70a3
 
 
 
 
 
29fad78
 
 
 
 
 
e9db9b3
29fad78
 
 
 
 
 
07f92fc
29fad78
4311b49
 
 
 
99c70a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2a0f58
847f4f3
99c70a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4311b49
 
99c70a3
 
 
 
 
 
 
4311b49
29fad78
99c70a3
 
 
 
 
 
 
 
 
 
29fad78
 
99c70a3
 
 
 
 
4311b49
29fad78
4311b49
99c70a3
 
 
 
 
 
 
 
 
 
4311b49
29fad78
99c70a3
 
 
29fad78
4311b49
99c70a3
 
 
 
 
4311b49
29fad78
99c70a3
 
29fad78
99c70a3
 
 
 
 
56b2235
f2a0f58
56b2235
99c70a3
29c8c0e
99c70a3
29fad78
 
99c70a3
29fad78
99c70a3
 
 
 
 
 
 
 
 
 
29fad78
99c70a3
4311b49
99c70a3
 
 
29fad78
4311b49
07f92fc
99c70a3
 
 
4311b49
99c70a3
29fad78
99c70a3
 
 
 
 
 
 
 
 
 
29fad78
99c70a3
4311b49
99c70a3
 
 
29fad78
 
4311b49
847f4f3
99c70a3
 
 
 
 
07f92fc
99c70a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4311b49
99c70a3
56b2235
 
99c70a3
 
56b2235
 
99c70a3
 
 
f2a0f58
99c70a3
f2a0f58
 
4311b49
 
 
 
 
 
 
 
847f4f3
4311b49
c6d71e7
 
4311b49
07f92fc
 
4311b49
07f92fc
97c7079
07f92fc
 
4311b49
 
07f92fc
 
4311b49
 
 
 
07f92fc
29c8c0e
99c70a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import PeftModel

# 基础模型
base_model_name = "facebook/nllb-200-distilled-600M"

# 两个适配器模型
adapter_en_to_no = "entropy25/mt_en_no_oil"
adapter_no_to_en = "entropy25/mt_no_en_oil"

# 加载 tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# 方法1:为每个方向加载独立的基础模型
print("Loading English to Norwegian model...")
base_model_en_to_no = AutoModelForSeq2SeqLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto"
)
model_en_to_no = PeftModel.from_pretrained(base_model_en_to_no, adapter_en_to_no)

print("Loading Norwegian to English model...")
base_model_no_to_en = AutoModelForSeq2SeqLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    device_map="auto"
)
model_no_to_en = PeftModel.from_pretrained(base_model_no_to_en, adapter_no_to_en)

def translate(text, source_lang, target_lang):
    if not text.strip():
        return ""
    
    if source_lang == target_lang:
        return text
    
    # 选择正确的模型和语言代码
    if source_lang == "English" and target_lang == "Norwegian":
        model = model_en_to_no
        src_code = "eng_Latn"
        tgt_code = "nob_Latn"
    elif source_lang == "Norwegian" and target_lang == "English":
        model = model_no_to_en
        src_code = "nob_Latn"
        tgt_code = "eng_Latn"
    else:
        return "Unsupported language pair"
    
    # 按行翻译
    sentences = text.split('\n')
    translated_sentences = []
    
    for sentence in sentences:
        if not sentence.strip():
            translated_sentences.append("")
            continue
        
        inputs = tokenizer(
            sentence,
            return_tensors="pt",
            truncation=True,
            max_length=512
        )
        
        if hasattr(model, 'device'):
            inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        outputs = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code),
            max_length=512,
            num_beams=5
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        translated_sentences.append(result)
    
    return '\n'.join(translated_sentences)

def swap_languages(src, tgt, input_txt, output_txt):
    return tgt, src, output_txt, input_txt

def load_file(file):
    if file is None:
        return ""
    
    try:
        with open(file.name, 'r', encoding='utf-8') as f:
            return f.read()
    except:
        try:
            with open(file.name, 'r', encoding='latin-1') as f:
                return f.read()
        except Exception as e:
            return f"Error reading file: {str(e)}"

# 英语示例
EXAMPLES_EN = {
    "drilling_short": "Mud weight adjusted to 1.82 specific gravity at 3,247 meters depth.",
    "drilling_long": "The drilling operation at well site A-15 encountered unexpected high-pressure zones at 3,247 meters depth, requiring immediate adjustment of mud weight from 1.65 to 1.82 specific gravity to maintain wellbore stability and prevent potential kicks.",
    
    "reservoir_short": "Permeability is 250 millidarcy with 22 percent porosity.",
    "reservoir_long": "The reservoir shows excellent permeability of 250 millidarcy and porosity of 22 percent based on core analysis, indicating significant hydrocarbon potential with estimated oil saturation of 65 percent.",
    
    "subsea_short": "Christmas tree rated for 10,000 psi working pressure.",
    "subsea_long": "The subsea production system consists of a vertical Christmas tree rated for 10,000 psi working pressure and 150 degrees Celsius temperature, equipped with redundant safety features including automatic shutdown valves and real-time pressure monitoring systems.",
    
    "seismic_short": "Structural trap area estimated at 12 square kilometers.",
    "seismic_long": "Seismic data confirms the presence of a structural trap with an estimated area of 12 square kilometers, and productivity tests show stabilized oil production of 3,400 barrels per day at optimization pressure of 2,100 psi.",
    
    "safety_short": "H2S training required before site access.",
    "safety_long": "Emergency response procedures require all personnel to complete H2S safety training before site access, with breathing apparatus and wind indicators positioned at designated muster points, and immediate evacuation protocols activated when gas detection exceeds 10 ppm concentration levels."
}

# 挪威语示例
EXAMPLES_NO = {
    "drilling_short": "Slamvekt justert til 1,82 spesifikk tyngde ved 3 247 meters dybde.",
    "drilling_long": "Boreoperasjonen ved brønnsted A-15 støtte på uventede høytrykksoner ved 3 247 meters dybde, noe som krevde umiddelbar justering av slamvekt fra 1,65 til 1,82 spesifikk tyngde for å opprettholde brønnborestabilitet og forhindre potensielle kicks.",
    
    "reservoir_short": "Permeabilitet er 250 millidarcy med 22 prosent porøsitet.",
    "reservoir_long": "Reservoaret viser utmerket permeabilitet på 250 millidarcy og porøsitet på 22 prosent basert på kjerneanalyse, noe som indikerer betydelig hydrokarbonpotensial med estimert oljemetning på 65 prosent.",
    
    "subsea_short": "Juletre dimensjonert for 10 000 psi arbeidstrykk.",
    "subsea_long": "Subsea produksjonssystemet består av et vertikalt juletre dimensjonert for 10 000 psi arbeidstrykk og 150 grader Celsius temperatur, utstyrt med redundante sikkerhetsfunksjoner inkludert automatiske nedstengningsventiler og sanntids trykkmonitorering.",
    
    "seismic_short": "Strukturell felle estimert til 12 kvadratkilometer.",
    "seismic_long": "Seismiske data bekrefter tilstedeværelsen av en strukturell felle med estimert areal på 12 kvadratkilometer, og produktivitetstester viser stabilisert oljeproduksjon på 3 400 fat per dag ved optimaliseringstrykk på 2 100 psi.",
    
    "safety_short": "H2S-opplæring påkrevd før tilgang til området.",
    "safety_long": "Nødprosedyrer krever at alt personell fullfører H2S-sikkerhetsopplæring før områdetilgang, med åndedrettsutstyr og vindindikatorer plassert ved utpekte samlingspunkter, og umiddelbare evakueringsprotokoller aktiveres når gassdeteksjon overskrider 10 ppm konsentrasjonsnivå."
}

def get_examples(source_lang):
    """根据源语言返回对应的示例"""
    if source_lang == "English":
        return EXAMPLES_EN
    else:
        return EXAMPLES_NO

def update_example_buttons(source_lang):
    """当语言改变时更新示例"""
    examples = get_examples(source_lang)
    return examples["drilling_short"]

custom_css = """
.gradio-container {
    max-width: 1100px !important;
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
}
.main-container {
    background: #f6f7f8 !important;
    padding: 0 !important;
    border-radius: 0 !important;
}
.translate-box {
    background: white !important;
    border-radius: 5px !important;
    padding: 0 !important;
    box-shadow: 0 2px 4px rgba(0,0,0,0.08) !important;
    margin: 20px 0 !important;
}
.lang-header {
    padding: 16px 20px !important;
    border-bottom: 1px solid #e8eaed !important;
    background: #fafafa !important;
}
.lang-selector {
    border: none !important;
    background: transparent !important;
    font-size: 15px !important;
    font-weight: 500 !important;
    color: #333 !important;
}
.text-area textarea {
    border: none !important;
    font-size: 17px !important;
    line-height: 1.7 !important;
    padding: 20px !important;
    min-height: 200px !important;
}
.swap-container {
    display: flex !important;
    align-items: center !important;
    justify-content: center !important;
    padding: 20px 0 !important;
}
.swap-btn {
    width: 44px !important;
    height: 44px !important;
    min-width: 44px !important;
    border-radius: 50% !important;
    background: white !important;
    border: 1px solid #d1d5db !important;
    box-shadow: 0 1px 3px rgba(0,0,0,0.1) !important;
    font-size: 18px !important;
    color: #0f6fff !important;
    cursor: pointer !important;
}
.swap-btn:hover {
    background: #f8f9fa !important;
    border-color: #0f6fff !important;
}
.footer-info {
    text-align: center !important;
    color: #999 !important;
    font-size: 13px !important;
    padding: 20px !important;
}
"""

with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
    
    gr.HTML("<div style='height: 20px'></div>")
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group(elem_classes="translate-box"):
                with gr.Row(elem_classes="lang-header"):
                    source_lang = gr.Dropdown(
                        choices=["English", "Norwegian"],
                        value="English",
                        show_label=False,
                        container=False,
                        elem_classes="lang-selector",
                        scale=1
                    )
                
                input_text = gr.Textbox(
                    placeholder="Type to translate",
                    show_label=False,
                    lines=8,
                    max_lines=20,
                    container=False,
                    elem_classes="text-area"
                )
        
        with gr.Column(scale=0, min_width=100):
            with gr.Row(elem_classes="swap-container"):
                swap_btn = gr.Button("⇄", elem_classes="swap-btn")
        
        with gr.Column(scale=1):
            with gr.Group(elem_classes="translate-box"):
                with gr.Row(elem_classes="lang-header"):
                    target_lang = gr.Dropdown(
                        choices=["English", "Norwegian"],
                        value="Norwegian",
                        show_label=False,
                        container=False,
                        elem_classes="lang-selector",
                        scale=1
                    )
                
                output_text = gr.Textbox(
                    placeholder="Translation",
                    show_label=False,
                    lines=8,
                    max_lines=20,
                    container=False,
                    elem_classes="text-area",
                    interactive=False
                )
    
    gr.HTML(
        "<div class='footer-info'>"
        "Oil & Gas Translation • English ↔ Norwegian • Bidirectional Model"
        "</div>"
    )
    
    with gr.Accordion("Example Sentences", open=True):
        with gr.Row():
            example_text = gr.Textbox(
                value=EXAMPLES_EN["drilling_short"],
                label="",
                lines=3,
                max_lines=5,
                show_copy_button=True
            )
            use_example_btn = gr.Button("Use This Example →", variant="primary", size="sm")
        
        with gr.Row():
            btn1 = gr.Button("Drilling (Short)", size="sm")
            btn2 = gr.Button("Drilling (Long)", size="sm")
            btn3 = gr.Button("Reservoir (Short)", size="sm")
            btn4 = gr.Button("Reservoir (Long)", size="sm")
            btn5 = gr.Button("Subsea (Short)", size="sm")
        
        with gr.Row():
            btn6 = gr.Button("Subsea (Long)", size="sm")
            btn7 = gr.Button("Seismic (Short)", size="sm")
            btn8 = gr.Button("Seismic (Long)", size="sm")
            btn9 = gr.Button("Safety (Short)", size="sm")
            btn10 = gr.Button("Safety (Long)", size="sm")
        
        # 为每个按钮设置点击事件,根据当前源语言选择示例
        def get_example(key, source_lang):
            examples = get_examples(source_lang)
            return examples[key]
        
        btn1.click(lambda sl: get_example("drilling_short", sl), inputs=[source_lang], outputs=example_text)
        btn2.click(lambda sl: get_example("drilling_long", sl), inputs=[source_lang], outputs=example_text)
        btn3.click(lambda sl: get_example("reservoir_short", sl), inputs=[source_lang], outputs=example_text)
        btn4.click(lambda sl: get_example("reservoir_long", sl), inputs=[source_lang], outputs=example_text)
        btn5.click(lambda sl: get_example("subsea_short", sl), inputs=[source_lang], outputs=example_text)
        btn6.click(lambda sl: get_example("subsea_long", sl), inputs=[source_lang], outputs=example_text)
        btn7.click(lambda sl: get_example("seismic_short", sl), inputs=[source_lang], outputs=example_text)
        btn8.click(lambda sl: get_example("seismic_long", sl), inputs=[source_lang], outputs=example_text)
        btn9.click(lambda sl: get_example("safety_short", sl), inputs=[source_lang], outputs=example_text)
        btn10.click(lambda sl: get_example("safety_long", sl), inputs=[source_lang], outputs=example_text)
        
        use_example_btn.click(
            fn=lambda x: x,
            inputs=example_text,
            outputs=input_text
        )
    
    with gr.Accordion("Upload Text File", open=False):
        file_input = gr.File(
            label="Upload a .txt file to translate",
            file_types=[".txt"],
            type="filepath"
        )
    
    # 当源语言改变时,更新示例显示
    source_lang.change(
        fn=update_example_buttons,
        inputs=[source_lang],
        outputs=[example_text]
    )
    
    input_text.change(
        fn=translate,
        inputs=[input_text, source_lang, target_lang],
        outputs=output_text
    )
    
    source_lang.change(
        fn=translate,
        inputs=[input_text, source_lang, target_lang],
        outputs=output_text
    )
    
    target_lang.change(
        fn=translate,
        inputs=[input_text, source_lang, target_lang],
        outputs=output_text
    )
    
    swap_btn.click(
        fn=swap_languages,
        inputs=[source_lang, target_lang, input_text, output_text],
        outputs=[source_lang, target_lang, input_text, output_text]
    )
    
    file_input.change(
        fn=load_file,
        inputs=file_input,
        outputs=input_text
    )

demo.launch()