Spaces:
Sleeping
Sleeping
File size: 3,710 Bytes
7360bd5 9d8040c 20c9f48 9d8040c c987792 54b0da1 9d8040c 20c9f48 9d8040c 20c9f48 6c5b5b0 c987792 6c5b5b0 0976407 20c9f48 9d8040c 20c9f48 54b0da1 8bed7ff 54b0da1 8bed7ff 9d8040c 0976407 9d8040c 042a850 9d8040c c987792 9d8040c 54b0da1 9d8040c 20c9f48 54b0da1 20c9f48 7360bd5 54b0da1 7360bd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
#from langchain.llms import OpenAI
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
#import pandas as pd
from tqdm import tqdm
import time
from awq import AutoAWQForCausalLM
#llm = OpenAI()
#tokenizer = AutoTokenizer.from_pretrained("618AI/dictalm2-it-qa-fine-tune")
#model = AutoModelForCausalLM.from_pretrained("618AI/dictalm2-it-qa-fine-tune", )
tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct")
model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", low_cpu_mem_usage=True, torch_dtype=torch.float16)
#tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-AWQ")
#model = AutoAWQForCausalLM.from_pretrained("dicta-il/dictalm2.0-AWQ", device_map="cpu", torch_dtype=torch.float16)
#model.to("cuda")
def parsetext(translated_text):
test_text = translated_text
index = test_text.find("Answer: ")
truncated_text = test_text[index:]
index = truncated_text.find('\n')
if index==-1:
truncated_text = truncated_text[8:]
else:
truncated_text = truncated_text[8:index]
return truncated_text
def translate_eng_heb(source_text):
prompt_eng_heb = f"""Forget the previous context.
Translate the following text from English to Hebrew.
Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
corresponding single special character for translation. Once the text is translated, restore the original group of special
characters in their corresponding place.
For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
After translation, replace the !! in the correct place in the hebrew text, אוי!!
The text to be translated is enclosed in <text> tags.
"""
prompt = f"{prompt_eng_heb}<text>{source_text}<text>\nAnswer:"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
#inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
translated_text = parsetext(response)
return response
#def translate_eng_heb(source_text):
# prompt_eng_heb = f"""Forget the previous context.
# Translate the following text from English to Hebrew.
# Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
# If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
# corresponding single special character for translation. Once the text is translated, restore the original group of special
# characters in their corresponding place.
# For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
# After translation, replace the !! in the correct place in the hebrew text, אוי!!
#
# """
# prompt = f"{prompt_eng_heb}{source_text}\nAnswer:"
# response = llm(prompt)
# translated_text = parsetext(response)
# return translated_text
with gr.Blocks() as demo:
textbox = gr.Textbox(placeholder="Enter text block to translate", lines=4)
gr.Interface(fn=translate_eng_heb, inputs=textbox, outputs="text")
demo.launch() |