|
#include <vector> |
|
#include <string> |
|
#include <utility> |
|
|
|
#include <opencv2/opencv.hpp> |
|
#include "labelsimagenet1k.h" |
|
|
|
using namespace std; |
|
using namespace cv; |
|
using namespace dnn; |
|
|
|
vector< pair<dnn::Backend, dnn::Target> > backendTargetPairs = { |
|
std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_OPENCV, dnn::DNN_TARGET_CPU), |
|
std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA), |
|
std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA_FP16), |
|
std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_TIMVX, dnn::DNN_TARGET_NPU), |
|
std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CANN, dnn::DNN_TARGET_NPU) }; |
|
|
|
|
|
std::string keys = |
|
"{ help h | | Print help message. }" |
|
"{ model m | image_classification_mobilenetv1_2022apr.onnx | Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1) }" |
|
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
|
"{ initial_width | 0 | Preprocess input image by initial resizing to a specific width.}" |
|
"{ initial_height | 0 | Preprocess input image by initial resizing to a specific height.}" |
|
"{ rgb | true | swap R and B plane.}" |
|
"{ crop | false | Preprocess input image by center cropping.}" |
|
"{ vis v | true | Usage: Specify to open a new window to show results.}" |
|
"{ backend bt | 0 | Choose one of computation backends: " |
|
"0: (default) OpenCV implementation + CPU, " |
|
"1: CUDA + GPU (CUDA), " |
|
"2: CUDA + GPU (CUDA FP16), " |
|
"3: TIM-VX + NPU, " |
|
"4: CANN + NPU}"; |
|
|
|
|
|
int main(int argc, char** argv) |
|
{ |
|
CommandLineParser parser(argc, argv, keys); |
|
|
|
parser.about("Use this script to run classification deep learning networks in opencv Zoo using OpenCV."); |
|
if (parser.has("help")) |
|
{ |
|
parser.printMessage(); |
|
return 0; |
|
} |
|
|
|
int rszWidth = parser.get<int>("initial_width"); |
|
int rszHeight = parser.get<int>("initial_height"); |
|
bool swapRB = parser.get<bool>("rgb"); |
|
bool crop = parser.get<bool>("crop"); |
|
bool vis = parser.get<bool>("vis"); |
|
String model = parser.get<String>("model"); |
|
int backendTargetid = parser.get<int>("backend"); |
|
|
|
if (model.empty()) |
|
{ |
|
CV_Error(Error::StsError, "Model file " + model + " not found"); |
|
} |
|
vector<string> labels = getLabelsImagenet1k(); |
|
|
|
Net net = readNet(samples::findFile(model)); |
|
net.setPreferableBackend(backendTargetPairs[backendTargetid].first); |
|
net.setPreferableTarget(backendTargetPairs[backendTargetid].second); |
|
|
|
VideoCapture cap; |
|
if (parser.has("input")) |
|
cap.open(samples::findFile(parser.get<String>("input"))); |
|
else |
|
cap.open(0); |
|
if (!cap.isOpened()) |
|
CV_Error(Error::StsError, "Cannot open video or file"); |
|
Mat frame, blob; |
|
static const std::string kWinName = model; |
|
int nbInference = 0; |
|
while (waitKey(1) < 0) |
|
{ |
|
cap >> frame; |
|
if (frame.empty()) |
|
{ |
|
cout << "Frame is empty" << endl; |
|
waitKey(); |
|
break; |
|
} |
|
|
|
if (rszWidth != 0 && rszHeight != 0) |
|
{ |
|
resize(frame, frame, Size(rszWidth, rszHeight)); |
|
} |
|
Image2BlobParams paramMobilenet; |
|
paramMobilenet.datalayout = DNN_LAYOUT_NCHW; |
|
paramMobilenet.ddepth = CV_32F; |
|
paramMobilenet.mean = Scalar(123.675, 116.28, 103.53); |
|
paramMobilenet.scalefactor = Scalar(1 / (255. * 0.229), 1 / (255. * 0.224), 1 / (255. * 0.225)); |
|
paramMobilenet.size = Size(224, 224); |
|
paramMobilenet.swapRB = swapRB; |
|
if (crop) |
|
paramMobilenet.paddingmode = DNN_PMODE_CROP_CENTER; |
|
else |
|
paramMobilenet.paddingmode = DNN_PMODE_NULL; |
|
|
|
blobFromImageWithParams(frame, blob, paramMobilenet); |
|
|
|
|
|
net.setInput(blob); |
|
Mat prob = net.forward(); |
|
|
|
|
|
Point classIdPoint; |
|
double confidence; |
|
minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint); |
|
int classId = classIdPoint.x; |
|
std::string label = format("%s: %.4f", (labels.empty() ? format("Class #%d", classId).c_str() : |
|
labels[classId].c_str()), |
|
confidence); |
|
if (vis) |
|
{ |
|
putText(frame, label, Point(0, 55), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
|
imshow(kWinName, frame); |
|
} |
|
else |
|
{ |
|
cout << label << endl; |
|
nbInference++; |
|
if (nbInference > 100) |
|
{ |
|
cout << nbInference << " inference made. Demo existing" << endl; |
|
break; |
|
} |
|
} |
|
} |
|
return 0; |
|
} |
|
|