|
import onnxruntime as ort |
|
from transformers import AutoTokenizer |
|
import numpy as np |
|
import os |
|
|
|
print("my env", os.environ["XLNX_VART_FIRMWARE"]) |
|
|
|
onnx_path = r"C:\Users\Felix\Olive\examples\gpt2\cache\models\1_VitisAIQuantization-1193226590a636c107851db60c66899c-ebec96f9d75c46bed8dc01c8240c6bad-cpu-cpu\output_model\model.onnx" |
|
config_path = r"C:\Users\Felix\Downloads\voe-3.5-win_amd64\voe-3.5-win_amd64\vaip_config.json" |
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
|
tokenizer.pad_token_id = tokenizer.eos_token_id |
|
tokenizer.padding_side = "left" |
|
|
|
session = ort.InferenceSession(onnx_path, providers=['VitisAIExecutionProvider'], provider_options=[{'config_file': config_path}]) |
|
print("after load") |
|
inps = tokenizer("Hey hey hey! This is me and", return_tensors="np", padding="max_length", max_length=128) |
|
|
|
inputs = { |
|
"input_ids": inps["input_ids"].astype(np.int32), |
|
"attention_mask": inps["attention_mask"].astype(np.int32), |
|
"position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :] |
|
} |
|
|
|
result = session.run(None, inputs) |
|
|
|
res_logits = result[0] |
|
|
|
res_logits = np.argmax(res_logits, axis=-1) |
|
print(tokenizer.batch_decode(res_logits)) |
|
|
|
inps = tokenizer("Hey hey hey! This is me and I love to", return_tensors="np", padding="max_length", max_length=128) |
|
|
|
inputs = { |
|
"input_ids": inps["input_ids"].astype(np.int32), |
|
"attention_mask": inps["attention_mask"].astype(np.int32), |
|
"position_ids": np.arange(inps["attention_mask"].shape[1], dtype=np.int32)[None, :] |
|
} |
|
|
|
result = session.run(None, inputs) |
|
|
|
res_logits = result[0] |
|
|
|
res_logits = np.argmax(res_logits, axis=-1) |
|
print(tokenizer.batch_decode(res_logits)) |