File size: 2,631 Bytes
68ecc2e
7ae93e9
68ecc2e
 
7ae93e9
68ecc2e
 
 
 
 
 
 
 
 
 
 
 
 
7ae93e9
68ecc2e
7ae93e9
68ecc2e
 
 
 
7ae93e9
68ecc2e
 
 
 
 
 
7ae93e9
68ecc2e
 
 
 
 
 
 
7ae93e9
68ecc2e
7ae93e9
68ecc2e
 
 
 
 
 
 
7ae93e9
68ecc2e
 
7ae93e9
68ecc2e
 
 
 
 
 
 
7ae93e9
68ecc2e
 
7ae93e9
68ecc2e
 
7ae93e9
68ecc2e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
%%writefile app.py
import gradio as gr
import torch
import re  # For sentence splitting

# Function to split text into sentences while keeping punctuation
def split_sentences(text):
    """Splits text into sentences, preserving punctuation."""
    sentences = re.split(r'([.!?])\s+', text.strip())  # Splits while keeping punctuation
    merged_sentences = []
    
    # Merge punctuation back with previous sentence
    for i in range(0, len(sentences) - 1, 2):
        merged_sentences.append(sentences[i] + sentences[i + 1])  # Sentence + punctuation
    
    # If last sentence has no punctuation, add it
    if len(sentences) % 2 != 0:
        merged_sentences.append(sentences[-1])

    return merged_sentences

# Function to translate each sentence separately
def translate_text(text, direction):
    sentences = split_sentences(text)  # Step 1: Split sentences
    translated_sentences = []

    if direction == "English β†’ Runyankore":
        tokenizer_used = tokenizer_en_ru
        model_used = model_en_ru
    else:  # Runyankore β†’ English
        tokenizer_used = tokenizer_ru_en
        model_used = model_ru_en

    for sentence in sentences:
        if sentence.strip():  # Ignore empty sentences
            inputs = tokenizer_used(sentence, return_tensors="pt").to(model_used.device)
            with torch.no_grad():
                translated_tokens = model_used.generate(**inputs, max_length=256)
            pred_text = tokenizer_used.decode(translated_tokens[0], skip_special_tokens=True)
            translated_sentences.append(pred_text)

    return " ".join(translated_sentences)  # Step 3: Rejoin translated sentences

# Gradio Interface with Mobile-Responsive Design
with gr.Blocks() as iface:
    gr.Markdown(
        "<h1 style='text-align: center;'>Runyankore ↔ English Translator</h1>"
        "<p style='text-align: center;'>Enter a paragraph and select the translation direction.<br>"
        "Each sentence will be translated separately.</p>"
    )

    with gr.Row():
        text_input = gr.Textbox(lines=4, label="Enter text", interactive=True, scale=1)

    with gr.Row():
        direction = gr.Radio(
            ["English β†’ Runyankore", "Runyankore β†’ English"],
            label="Select Translation Direction",
            interactive=True,
            scale=1
        )

    with gr.Row():
        translate_button = gr.Button("Translate", scale=1)

    with gr.Row():
        output_text = gr.Textbox(lines=4, label="Translated text", interactive=False, scale=1)

    translate_button.click(translate_text, inputs=[text_input, direction], outputs=output_text)

iface.launch()