translate-eng-heb

Sleeping

File size: 3,710 Bytes

7360bd5
9d8040c
 
 
20c9f48
9d8040c
 
c987792
54b0da1
9d8040c
20c9f48
9d8040c
20c9f48
6c5b5b0
 
c987792
6c5b5b0
 
0976407
20c9f48
9d8040c
 
 
 
 
 
 
 
 
 
20c9f48
54b0da1
 
 
 
 
 
 
 
 
 
8bed7ff
54b0da1
8bed7ff
9d8040c
0976407
9d8040c
 
042a850
9d8040c
 
 
 
c987792
9d8040c
 
 
 
 
 
 
 
 
 
 
 
 
54b0da1
9d8040c
20c9f48
54b0da1
20c9f48
7360bd5
 
54b0da1
 
7360bd5

import gradio as gr
#from langchain.llms import OpenAI
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
#import pandas as pd
from tqdm import tqdm
import time
from awq import AutoAWQForCausalLM

#llm = OpenAI()
#tokenizer = AutoTokenizer.from_pretrained("618AI/dictalm2-it-qa-fine-tune")
#model = AutoModelForCausalLM.from_pretrained("618AI/dictalm2-it-qa-fine-tune", )

tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct")
model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", low_cpu_mem_usage=True, torch_dtype=torch.float16)

#tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-AWQ")
#model = AutoAWQForCausalLM.from_pretrained("dicta-il/dictalm2.0-AWQ", device_map="cpu", torch_dtype=torch.float16)
#model.to("cuda")

def parsetext(translated_text):
    test_text = translated_text
    index = test_text.find("Answer: ")
    truncated_text = test_text[index:]
    index = truncated_text.find('\n')
    if index==-1:
        truncated_text = truncated_text[8:]
    else:
        truncated_text = truncated_text[8:index]
    return truncated_text


def translate_eng_heb(source_text):
    prompt_eng_heb = f"""Forget the previous context. 
                Translate the following text from English to Hebrew. 
                Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY. 
                If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a 
                corresponding single special character for translation. Once the text is translated, restore the original group of special
                characters in their corresponding place. 
                For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation. 
                After translation, replace the !! in the correct place in the hebrew text, אוי!! 
                The text to be translated is enclosed in <text> tags.
                """
    prompt = f"{prompt_eng_heb}<text>{source_text}<text>\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    #inputs = {k: v.to("cuda") for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=50)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    translated_text = parsetext(response)
    
    return response

#def translate_eng_heb(source_text):
#    prompt_eng_heb = f"""Forget the previous context. 
#                Translate the following text from English to Hebrew. 
#                Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY. 
#                If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a 
#                corresponding single special character for translation. Once the text is translated, restore the original group of special
#                characters in their corresponding place. 
#                For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation. 
#                After translation, replace the !! in the correct place in the hebrew text, אוי!! 
#                
#                """
#    prompt = f"{prompt_eng_heb}{source_text}\nAnswer:"
    
#    response = llm(prompt)
#    translated_text = parsetext(response)
    
#    return translated_text

with gr.Blocks() as demo:
    textbox = gr.Textbox(placeholder="Enter text block to translate", lines=4)
    gr.Interface(fn=translate_eng_heb, inputs=textbox, outputs="text")

demo.launch()