translate-eng-heb

Sleeping

App Files Files Community

translate-eng-heb / app.py

prsingh1982

Update app.py

8bed7ff verified about 2 months ago

raw

history blame contribute delete

3.71 kB

	import gradio as gr
	#from langchain.llms import OpenAI
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	#import pandas as pd
	from tqdm import tqdm
	import time
	from awq import AutoAWQForCausalLM

	#llm = OpenAI()
	#tokenizer = AutoTokenizer.from_pretrained("618AI/dictalm2-it-qa-fine-tune")
	#model = AutoModelForCausalLM.from_pretrained("618AI/dictalm2-it-qa-fine-tune", )

	tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct")
	model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", low_cpu_mem_usage=True, torch_dtype=torch.float16)

	#tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-AWQ")
	#model = AutoAWQForCausalLM.from_pretrained("dicta-il/dictalm2.0-AWQ", device_map="cpu", torch_dtype=torch.float16)
	#model.to("cuda")

	def parsetext(translated_text):
	test_text = translated_text
	index = test_text.find("Answer: ")
	truncated_text = test_text[index:]
	index = truncated_text.find('\n')
	if index==-1:
	truncated_text = truncated_text[8:]
	else:
	truncated_text = truncated_text[8:index]
	return truncated_text


	def translate_eng_heb(source_text):
	prompt_eng_heb = f"""Forget the previous context.
	Translate the following text from English to Hebrew.
	Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
	If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
	corresponding single special character for translation. Once the text is translated, restore the original group of special
	characters in their corresponding place.
	For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
	After translation, replace the !! in the correct place in the hebrew text, אוי!!
	The text to be translated is enclosed in <text> tags.
	"""
	prompt = f"{prompt_eng_heb}<text>{source_text}<text>\nAnswer:"
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
	#inputs = {k: v.to("cuda") for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=50)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
	translated_text = parsetext(response)

	return response

	#def translate_eng_heb(source_text):
	# prompt_eng_heb = f"""Forget the previous context.
	# Translate the following text from English to Hebrew.
	# Do NOT respond with any additional information other than the translation. Respond with the translated text ONLY.
	# If the english text contains multiple consecutive special characters (like !,@,$,# etc.), replace them with a
	# corresponding single special character for translation. Once the text is translated, restore the original group of special
	# characters in their corresponding place.
	# For example, if the english text is 'Aahh!!', replace it with 'Aahh!' before translation.
	# After translation, replace the !! in the correct place in the hebrew text, אוי!!
	#
	# """
	# prompt = f"{prompt_eng_heb}{source_text}\nAnswer:"

	# response = llm(prompt)
	# translated_text = parsetext(response)

	# return translated_text

	with gr.Blocks() as demo:
	textbox = gr.Textbox(placeholder="Enter text block to translate", lines=4)
	gr.Interface(fn=translate_eng_heb, inputs=textbox, outputs="text")

	demo.launch()