yolox / tools /demo_api.py
tidalove's picture
try pad=0?
a757aaf verified
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import time
import os
import json
from loguru import logger
import cv2
import torch
from yolox.data.data_augment import ValTransform
from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
def get_image_list(path):
image_names = []
for maindir, subdir, file_name_list in os.walk(path):
for filename in file_name_list:
apath = os.path.join(maindir, filename)
ext = os.path.splitext(apath)[1]
if ext in IMAGE_EXT:
image_names.append(apath)
return image_names
class Predictor(object):
def __init__(
self,
model,
exp,
cls_names=COCO_CLASSES,
trt_file=None,
decoder=None,
device="cpu",
fp16=False,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.decoder = decoder
self.num_classes = exp.num_classes
self.confthre = exp.test_conf
self.nmsthre = exp.nmsthre
self.test_size = exp.test_size
self.device = device
self.fp16 = fp16
self.preproc = ValTransform(legacy=legacy)
if trt_file is not None:
from torch2trt import TRTModule
model_trt = TRTModule()
model_trt.load_state_dict(torch.load(trt_file))
x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
self.model(x)
self.model = model_trt
def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None
height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
pad_h =0 # ( self.test_size[0] - img.shape[0] * ratio ) / 2
pad_w =0 # ( self.test_size[1] - img.shape[1] * ratio ) / 2
img_info["pad"] = (pad_w, pad_h)
img_info["ratio"] = ratio
img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
if self.device == "gpu":
img = img.cuda()
if self.fp16:
img = img.half() # to FP16
with torch.no_grad():
t0 = time.time()
outputs = self.model(img)
if self.decoder is not None:
outputs = self.decoder(outputs, dtype=outputs.type())
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre, class_agnostic=True
)
logger.info("Infer time: {:.4f}s".format(time.time() - t0))
return outputs, img_info
def build_predictor(
exp_file, model_name, ckpt_path, device="cpu", fp16=False, fuse=False, trt=False, conf=0, nms=0, tsize=None
):
# load experiment
exp = get_exp(exp_file, model_name)
if conf is not None:
exp.test_conf = conf
if nms is not None:
exp.nmsthre = nms
if tsize is not None:
exp.test_size = (tsize, tsize)
# create & initialize model
model = exp.get_model()
if device == "gpu":
model.cuda()
if fp16:
model.half()
model.eval()
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
logger.info("loading checkpoint")
ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=False)
# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")
predictor = Predictor(
model, exp, COCO_CLASSES,
None, decoder=None,
device=device, fp16=fp16, legacy=False
)
return predictor
def run_detection(predictor, path):
# COCO output format: { images: [{id: 0, filename: "x.jpg"}, ...],
# annotations: [{id: 0, image_id: 0, bbox: [0 0 0 0], score: 0.35, class: 1}, ... ] }
if os.path.isdir(path):
files = get_image_list(path)
else:
files = [path]
files.sort()
img_list = []
ann_list = []
for img_id, image_name in enumerate(files):
outputs, img_info = predictor.inference(image_name)
ratio = img_info["ratio"]
pad_w, pad_h = img_info["pad"]
img_entry = {"id": img_id,
"filename": image_name }
img_list.append(img_entry)
if outputs[0] is not None:
for id, output in enumerate(outputs[0]):
print(output)
x1, y1, x2, y2 = output[:4]
x1 = (x1 - pad_w) / ratio
y1 = (y1 - pad_h) / ratio
x2 = (x2 - pad_w) / ratio
y2 = (y2 - pad_h) / ratio
ann_entry = {"id": id,
"image_id": img_id,
"bbox": [float(x1), float(y1), float(x2), float(y2)],
"cls": output[6].item(),
"score": (output[4] * output[5]).item() }
ann_list.append(ann_entry)
data_dict = { "images": img_list,
"annotations": ann_list
}
with open(f"{path}/results.json", 'w') as f:
json.dump(data_dict, f)
return f"{path}/results.json"