prsingh1982's picture
Update app.py
8bed7ff verified
import gradio as gr
#from langchain.llms import OpenAI
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
#import pandas as pd
from tqdm import tqdm
import time
from awq import AutoAWQForCausalLM
#llm = OpenAI()
#tokenizer = AutoTokenizer.from_pretrained("618AI/dictalm2-it-qa-fine-tune")
#model = AutoModelForCausalLM.from_pretrained("618AI/dictalm2-it-qa-fine-tune", )
tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct")
model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", low_cpu_mem_usage=True, torch_dtype=torch.float16)
#tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-AWQ")
#model = AutoAWQForCausalLM.from_pretrained("dicta-il/dictalm2.0-AWQ", device_map="cpu", torch_dtype=torch.float16)
#model.to("cuda")
def parsetext(translated_text):
test_text = translated_text
index = test_text.find("Answer: ")
truncated_text = test_text[index:]
index = truncated_text.find('\n')
if index==-1:
truncated_text = truncated_text[8:]
else:
truncated_text = truncated_text[8:index]
return truncated_text
def translate_eng_heb(source_text):
prompt_eng_heb = f"""Forget the previous context.
Translate the following text from English to Hebrew.
Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
corresponding single special character for translation. Once the text is translated, restore the original group of special
characters in their corresponding place.
For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
After translation, replace the !! in the correct place in the hebrew text, ืื•ื™!!
The text to be translated is enclosed in <text> tags.
"""
prompt = f"{prompt_eng_heb}<text>{source_text}<text>\nAnswer:"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
#inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
translated_text = parsetext(response)
return response
#def translate_eng_heb(source_text):
# prompt_eng_heb = f"""Forget the previous context.
# Translate the following text from English to Hebrew.
# Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
# If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
# corresponding single special character for translation. Once the text is translated, restore the original group of special
# characters in their corresponding place.
# For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
# After translation, replace the !! in the correct place in the hebrew text, ืื•ื™!!
#
# """
# prompt = f"{prompt_eng_heb}{source_text}\nAnswer:"
# response = llm(prompt)
# translated_text = parsetext(response)
# return translated_text
with gr.Blocks() as demo:
textbox = gr.Textbox(placeholder="Enter text block to translate", lines=4)
gr.Interface(fn=translate_eng_heb, inputs=textbox, outputs="text")
demo.launch()