%%writefile app.py import gradio as gr import torch import re # For sentence splitting # Function to split text into sentences while keeping punctuation def split_sentences(text): """Splits text into sentences, preserving punctuation.""" sentences = re.split(r'([.!?])\s+', text.strip()) # Splits while keeping punctuation merged_sentences = [] # Merge punctuation back with previous sentence for i in range(0, len(sentences) - 1, 2): merged_sentences.append(sentences[i] + sentences[i + 1]) # Sentence + punctuation # If last sentence has no punctuation, add it if len(sentences) % 2 != 0: merged_sentences.append(sentences[-1]) return merged_sentences # Function to translate each sentence separately def translate_text(text, direction): sentences = split_sentences(text) # Step 1: Split sentences translated_sentences = [] if direction == "English → Runyankore": tokenizer_used = tokenizer_en_ru model_used = model_en_ru else: # Runyankore → English tokenizer_used = tokenizer_ru_en model_used = model_ru_en for sentence in sentences: if sentence.strip(): # Ignore empty sentences inputs = tokenizer_used(sentence, return_tensors="pt").to(model_used.device) with torch.no_grad(): translated_tokens = model_used.generate(**inputs, max_length=256) pred_text = tokenizer_used.decode(translated_tokens[0], skip_special_tokens=True) translated_sentences.append(pred_text) return " ".join(translated_sentences) # Step 3: Rejoin translated sentences # Gradio Interface with Mobile-Responsive Design with gr.Blocks() as iface: gr.Markdown( "
Enter a paragraph and select the translation direction.
"
"Each sentence will be translated separately.