File size: 1,827 Bytes
2a79b90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import onnxruntime as ort
from transformers import AutoTokenizer
import numpy as np
import os
print("my env", os.environ["XLNX_VART_FIRMWARE"])
onnx_path = r"C:\Users\Felix\Olive\examples\gpt2\cache\models\1_VitisAIQuantization-1193226590a636c107851db60c66899c-ebec96f9d75c46bed8dc01c8240c6bad-cpu-cpu\output_model\model.onnx"
config_path = r"C:\Users\Felix\Downloads\voe-3.5-win_amd64\voe-3.5-win_amd64\vaip_config.json"
# onnx_path = r"C:\Users\Felix\Downloads\voe-3.5-win_amd64\voe-3.5-win_amd64\Examples\resnet50_python\models\ResNet\ResNet_int.onnx"
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
session = ort.InferenceSession(onnx_path, providers=['VitisAIExecutionProvider'], provider_options=[{'config_file': config_path}])
print("after load")
inps = tokenizer("Hey hey hey! This is me and", return_tensors="np", padding="max_length", max_length=128)
inputs = {
"input_ids": inps["input_ids"].astype(np.int32),
"attention_mask": inps["attention_mask"].astype(np.int32),
"position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :]
}
result = session.run(None, inputs)
res_logits = result[0]
res_logits = np.argmax(res_logits, axis=-1)
print(tokenizer.batch_decode(res_logits))
inps = tokenizer("Hey hey hey! This is me and I love to", return_tensors="np", padding="max_length", max_length=128)
inputs = {
"input_ids": inps["input_ids"].astype(np.int32),
"attention_mask": inps["attention_mask"].astype(np.int32),
"position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :]
}
result = session.run(None, inputs)
res_logits = result[0]
res_logits = np.argmax(res_logits, axis=-1)
print(tokenizer.batch_decode(res_logits)) |