Training data-efficient image transformers & distillation through attention
Paper
•
2012.12877
•
Published
•
2
DeiT proposed in this paper are more efficiently trained transformers for image classification, requiring far less data and far less computing resources compared to the original ViT models.
Follow this link to see the original implementation.
from onnxruntime import InferenceSession
from transformers import DeiTFeatureExtractor, DeiTForImageClassification
import torch
from PIL import Image
import requests
torch.manual_seed(3)
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
inputs = feature_extractor(images=image, return_tensors="np")
session = InferenceSession("onnx/model.onnx")
# ONNX Runtime expects NumPy arrays as input
outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))