File size: 7,855 Bytes
84bf331 41c69c8 84bf331 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
#include "opencv2/opencv.hpp"
#include <map>
#include <vector>
#include <string>
#include <iostream>
const std::map<std::string, int> str2backend{
{"opencv", cv::dnn::DNN_BACKEND_OPENCV}, {"cuda", cv::dnn::DNN_BACKEND_CUDA},
{"timvx", cv::dnn::DNN_BACKEND_TIMVX}, {"cann", cv::dnn::DNN_BACKEND_CANN}
};
const std::map<std::string, int> str2target{
{"cpu", cv::dnn::DNN_TARGET_CPU}, {"cuda", cv::dnn::DNN_TARGET_CUDA},
{"npu", cv::dnn::DNN_TARGET_NPU}, {"cuda_fp16", cv::dnn::DNN_TARGET_CUDA_FP16}
};
class YuNet
{
public:
YuNet(const std::string& model_path,
const cv::Size& input_size = cv::Size(320, 320),
float conf_threshold = 0.6f,
float nms_threshold = 0.3f,
int top_k = 5000,
int backend_id = 0,
int target_id = 0)
: model_path_(model_path), input_size_(input_size),
conf_threshold_(conf_threshold), nms_threshold_(nms_threshold),
top_k_(top_k), backend_id_(backend_id), target_id_(target_id)
{
model = cv::FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_);
}
/* Overwrite the input size when creating the model. Size format: [Width, Height].
*/
void setInputSize(const cv::Size& input_size)
{
input_size_ = input_size;
model->setInputSize(input_size_);
}
cv::Mat infer(const cv::Mat image)
{
cv::Mat res;
model->detect(image, res);
return res;
}
private:
cv::Ptr<cv::FaceDetectorYN> model;
std::string model_path_;
cv::Size input_size_;
float conf_threshold_;
float nms_threshold_;
int top_k_;
int backend_id_;
int target_id_;
};
cv::Mat visualize(const cv::Mat& image, const cv::Mat& faces, float fps = -1.f)
{
static cv::Scalar box_color{0, 255, 0};
static std::vector<cv::Scalar> landmark_color{
cv::Scalar(255, 0, 0), // right eye
cv::Scalar( 0, 0, 255), // left eye
cv::Scalar( 0, 255, 0), // nose tip
cv::Scalar(255, 0, 255), // right mouth corner
cv::Scalar( 0, 255, 255) // left mouth corner
};
static cv::Scalar text_color{0, 255, 0};
auto output_image = image.clone();
if (fps >= 0)
{
cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2);
}
for (int i = 0; i < faces.rows; ++i)
{
// Draw bounding boxes
int x1 = static_cast<int>(faces.at<float>(i, 0));
int y1 = static_cast<int>(faces.at<float>(i, 1));
int w = static_cast<int>(faces.at<float>(i, 2));
int h = static_cast<int>(faces.at<float>(i, 3));
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
// Confidence as text
float conf = faces.at<float>(i, 14);
cv::putText(output_image, cv::format("%.4f", conf), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.5, text_color);
// Draw landmarks
for (int j = 0; j < landmark_color.size(); ++j)
{
int x = static_cast<int>(faces.at<float>(i, 2*j+4)), y = static_cast<int>(faces.at<float>(i, 2*j+5));
cv::circle(output_image, cv::Point(x, y), 2, landmark_color[j], 2);
}
}
return output_image;
}
int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv,
"{help h | | Print this message}"
"{input i | | Set input to a certain image, omit if using camera}"
"{model m | face_detection_yunet_2023mar.onnx | Set path to the model}"
"{backend b | opencv | Set DNN backend}"
"{target t | cpu | Set DNN target}"
"{save s | false | Whether to save result image or not}"
"{vis v | false | Whether to visualize result image or not}"
/* model params below*/
"{conf_threshold | 0.9 | Set the minimum confidence for the model to identify a face. Filter out faces of conf < conf_threshold}"
"{nms_threshold | 0.3 | Set the threshold to suppress overlapped boxes. Suppress boxes if IoU(box1, box2) >= nms_threshold, the one of higher score is kept.}"
"{top_k | 5000 | Keep top_k bounding boxes before NMS. Set a lower value may help speed up postprocessing.}"
);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
std::string input_path = parser.get<std::string>("input");
std::string model_path = parser.get<std::string>("model");
std::string backend = parser.get<std::string>("backend");
std::string target = parser.get<std::string>("target");
bool save_flag = parser.get<bool>("save");
bool vis_flag = parser.get<bool>("vis");
// model params
float conf_threshold = parser.get<float>("conf_threshold");
float nms_threshold = parser.get<float>("nms_threshold");
int top_k = parser.get<int>("top_k");
const int backend_id = str2backend.at(backend);
const int target_id = str2target.at(target);
// Instantiate YuNet
YuNet model(model_path, cv::Size(320, 320), conf_threshold, nms_threshold, top_k, backend_id, target_id);
// If input is an image
if (!input_path.empty())
{
auto image = cv::imread(input_path);
// Inference
model.setInputSize(image.size());
auto faces = model.infer(image);
// Print faces
std::cout << cv::format("%d faces detected:\n", faces.rows);
for (int i = 0; i < faces.rows; ++i)
{
int x1 = static_cast<int>(faces.at<float>(i, 0));
int y1 = static_cast<int>(faces.at<float>(i, 1));
int w = static_cast<int>(faces.at<float>(i, 2));
int h = static_cast<int>(faces.at<float>(i, 3));
float conf = faces.at<float>(i, 14);
std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f\n", i, x1, y1, w, h, conf);
}
// Draw reults on the input image
if (save_flag || vis_flag)
{
auto res_image = visualize(image, faces);
if (save_flag)
{
std::cout << "Results are saved to result.jpg\n";
cv::imwrite("result.jpg", res_image);
}
if (vis_flag)
{
cv::namedWindow(input_path, cv::WINDOW_AUTOSIZE);
cv::imshow(input_path, res_image);
cv::waitKey(0);
}
}
}
else // Call default camera
{
int device_id = 0;
auto cap = cv::VideoCapture(device_id);
int w = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
int h = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
model.setInputSize(cv::Size(w, h));
auto tick_meter = cv::TickMeter();
cv::Mat frame;
while (cv::waitKey(1) < 0)
{
bool has_frame = cap.read(frame);
if (!has_frame)
{
std::cout << "No frames grabbed! Exiting ...\n";
break;
}
// Inference
tick_meter.start();
cv::Mat faces = model.infer(frame);
tick_meter.stop();
// Draw results on the input image
auto res_image = visualize(frame, faces, (float)tick_meter.getFPS());
// Visualize in a new window
cv::imshow("YuNet Demo", res_image);
tick_meter.reset();
}
}
return 0;
}
|