Spaces:
Sleeping
Sleeping
import gradio as gr | |
#from langchain.llms import OpenAI | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
#import pandas as pd | |
from tqdm import tqdm | |
import time | |
from awq import AutoAWQForCausalLM | |
#llm = OpenAI() | |
#tokenizer = AutoTokenizer.from_pretrained("618AI/dictalm2-it-qa-fine-tune") | |
#model = AutoModelForCausalLM.from_pretrained("618AI/dictalm2-it-qa-fine-tune", ) | |
tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct") | |
model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", low_cpu_mem_usage=True, torch_dtype=torch.float16) | |
#tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-AWQ") | |
#model = AutoAWQForCausalLM.from_pretrained("dicta-il/dictalm2.0-AWQ", device_map="cpu", torch_dtype=torch.float16) | |
#model.to("cuda") | |
def parsetext(translated_text): | |
test_text = translated_text | |
index = test_text.find("Answer: ") | |
truncated_text = test_text[index:] | |
index = truncated_text.find('\n') | |
if index==-1: | |
truncated_text = truncated_text[8:] | |
else: | |
truncated_text = truncated_text[8:index] | |
return truncated_text | |
def translate_eng_heb(source_text): | |
prompt_eng_heb = f"""Forget the previous context. | |
Translate the following text from English to Hebrew. | |
Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY. | |
If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a | |
corresponding single special character for translation. Once the text is translated, restore the original group of special | |
characters in their corresponding place. | |
For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation. | |
After translation, replace the !! in the correct place in the hebrew text, ืืื!! | |
The text to be translated is enclosed in <text> tags. | |
""" | |
prompt = f"{prompt_eng_heb}<text>{source_text}<text>\nAnswer:" | |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True) | |
#inputs = {k: v.to("cuda") for k, v in inputs.items()} | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_new_tokens=50) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
translated_text = parsetext(response) | |
return response | |
#def translate_eng_heb(source_text): | |
# prompt_eng_heb = f"""Forget the previous context. | |
# Translate the following text from English to Hebrew. | |
# Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY. | |
# If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a | |
# corresponding single special character for translation. Once the text is translated, restore the original group of special | |
# characters in their corresponding place. | |
# For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation. | |
# After translation, replace the !! in the correct place in the hebrew text, ืืื!! | |
# | |
# """ | |
# prompt = f"{prompt_eng_heb}{source_text}\nAnswer:" | |
# response = llm(prompt) | |
# translated_text = parsetext(response) | |
# return translated_text | |
with gr.Blocks() as demo: | |
textbox = gr.Textbox(placeholder="Enter text block to translate", lines=4) | |
gr.Interface(fn=translate_eng_heb, inputs=textbox, outputs="text") | |
demo.launch() |