JeVois  1.21
JeVois Smart Embedded Machine Vision Toolkit
Share this page:
Loading...
Searching...
No Matches
PostProcessorDetect.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2021 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
21#include <jevois/DNN/Utils.H>
22#include <jevois/Util/Utils.H>
24#include <jevois/Core/Engine.H>
25#include <jevois/Core/Module.H>
27
28#include <opencv2/dnn.hpp>
29
30// ####################################################################################################
33
34// ####################################################################################################
36{
37 classes::freeze(doit);
38 detecttype::freeze(doit);
39 if (itsYOLO) itsYOLO->freeze(doit);
40}
41
42// ####################################################################################################
43void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::classes const &, std::string const & val)
44{
45 if (val.empty()) { itsLabels.clear(); return; }
46
47 // Get the dataroot of our network. We assume that there is a sub-component named "network" that is a sibling of us:
48 std::vector<std::string> dd = jevois::split(Component::descriptor(), ":");
49 dd.back() = "network"; dd.emplace_back("dataroot");
50 std::string const dataroot = engine()->getParamStringUnique(jevois::join(dd, ":"));
51
52 itsLabels = jevois::dnn::readLabelsFile(jevois::absolutePath(dataroot, val));
53}
54// ####################################################################################################
55void jevois::dnn::PostProcessorDetect::onParamChange(postprocessor::detecttype const &,
56 postprocessor::DetectType const & val)
57{
58 if (val == postprocessor::DetectType::RAWYOLO)
59 itsYOLO = addSubComponent<jevois::dnn::PostProcessorDetectYOLO>("yolo");
60 else
61 {
62 itsYOLO.reset();
63 removeSubComponent("yolo", false);
64 }
65}
66
67// ####################################################################################################
68void jevois::dnn::PostProcessorDetect::process(std::vector<cv::Mat> const & outs, jevois::dnn::PreProcessor * preproc)
69{
70 if (outs.empty()) LFATAL("No outputs received, we need at least one.");
71 cv::Mat const & out = outs[0]; cv::MatSize const & msiz = out.size;
72
73 float const confThreshold = cthresh::get() * 0.01F;
74 float const boxThreshold = dthresh::get() * 0.01F;
75 float const nmsThreshold = nms::get() * 0.01F;
76 int const fudge = classoffset::get();
77 itsImageSize = preproc->imagesize();
78
79 // To draw boxes, we will need to:
80 // - scale from [0..1]x[0..1] to blobw x blobh
81 // - scale and center from blobw x blobh to input image w x h, provided by PreProcessor::b2i()
82 // - when using the GUI, we further scale and translate to OpenGL display coordinates using GUIhelper::i2d()
83 // Here we assume that the first blob sets the input size.
84 cv::Size const bsiz = preproc->blobsize(0);
85
86 // We keep 3 vectors here instead of creating a class to hold all of the data because OpenCV will need that for
87 // non-maximum suppression:
88 std::vector<int> classIds;
89 std::vector<float> confidences;
90 std::vector<cv::Rect> boxes;
91 size_t const maxbox = maxnbox::get();
92
93 // Here we just scale the coords from [0..1]x[0..1] to blobw x blobh:
94 try
95 {
96 switch(detecttype::get())
97 {
98 // ----------------------------------------------------------------------------------------------------
99 case jevois::dnn::postprocessor::DetectType::FasterRCNN:
100 {
101 // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
102 // a vector of values [batchId, classId, confidence, left, top, right, bottom]
103 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
104 LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
105 "[batchId, classId, confidence, left, top, right, bottom]");
106
107 float const * data = (float const *)out.data;
108 for (size_t i = 0; i < out.total(); i += 7)
109 {
110 float confidence = data[i + 2];
111 if (confidence > confThreshold)
112 {
113 int left = (int)data[i + 3];
114 int top = (int)data[i + 4];
115 int right = (int)data[i + 5];
116 int bottom = (int)data[i + 6];
117 int width = right - left + 1;
118 int height = bottom - top + 1;
119 classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
120 boxes.push_back(cv::Rect(left, top, width, height));
121 confidences.push_back(confidence);
122 if (classIds.size() > maxbox) break; // Stop if too many boxes
123 }
124 }
125 }
126 break;
127
128 // ----------------------------------------------------------------------------------------------------
129 case jevois::dnn::postprocessor::DetectType::SSD:
130 {
131 // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is
132 // a vector of values [batchId, classId, confidence, left, top, right, bottom]
133 if (outs.size() != 1 || msiz.dims() != 4 || msiz[0] != 1 || msiz[1] != 1 || msiz[3] != 7)
134 LTHROW("Expected 1 output blob with shape 1x1xNx7 for N detections with values "
135 "[batchId, classId, confidence, left, top, right, bottom]");
136
137 float const * data = (float const *)out.data;
138 for (size_t i = 0; i < out.total(); i += 7)
139 {
140 float confidence = data[i + 2];
141 if (confidence > confThreshold)
142 {
143 int left = (int)(data[i + 3] * bsiz.width);
144 int top = (int)(data[i + 4] * bsiz.height);
145 int right = (int)(data[i + 5] * bsiz.width);
146 int bottom = (int)(data[i + 6] * bsiz.height);
147 int width = right - left + 1;
148 int height = bottom - top + 1;
149 classIds.push_back((int)(data[i + 1]) + fudge); // Skip 0th background class id.
150 boxes.push_back(cv::Rect(left, top, width, height));
151 confidences.push_back(confidence);
152 if (classIds.size() > maxbox) break; // Stop if too many boxes
153 }
154 }
155 }
156 break;
157
158 // ----------------------------------------------------------------------------------------------------
159 case jevois::dnn::postprocessor::DetectType::TPUSSD:
160 {
161 // Network produces 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count
162 // (see GetDetectionResults in detection/adapter.cc of libcoral):
163 if (outs.size() != 4)
164 LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
165 cv::Mat const & bboxes = outs[0];
166 cv::Mat const & ids = outs[1];
167 cv::Mat const & scores = outs[2];
168 cv::Mat const & count = outs[3];
169 if (bboxes.total() != 4 * ids.total() || bboxes.total() != 4 * scores.total() || count.total() != 1)
170 LTHROW("Expected 4 output blobs with shapes 4xN for boxes, N for IDs, N for scores, and 1x1 for count");
171
172 size_t num = count.at<float>(0);
173 if (num > ids.total()) LTHROW("Too many detections: " << num << " for only " << ids.total() << " ids");
174 float const * bb = (float const *)bboxes.data;
175
176 for (size_t i = 0; i < num; ++i)
177 {
178 if (scores.at<float>(i) < confThreshold) continue;
179
180 int top = (int)(bb[4 * i] * bsiz.height);
181 int left = (int)(bb[4 * i + 1] * bsiz.width);
182 int bottom = (int)(bb[4 * i + 2] * bsiz.height);
183 int right = (int)(bb[4 * i + 3] * bsiz.width);
184 int width = right - left + 1;
185 int height = bottom - top + 1;
186 classIds.push_back((int)(ids.at<float>(i)) + fudge); // Skip 0th background class id.
187 boxes.push_back(cv::Rect(left, top, width, height));
188 confidences.push_back(scores.at<float>(i));
189 if (classIds.size() > maxbox) break; // Stop if too many boxes
190 }
191 }
192 break;
193
194 // ----------------------------------------------------------------------------------------------------
195 case jevois::dnn::postprocessor::DetectType::YOLO:
196 {
197 for (size_t i = 0; i < outs.size(); ++i)
198 {
199 // Network produces output blob(s) with shape Nx(5+C) where N is a number of detected objects and C is a number
200 // of classes + 5 where the first 5 numbers are [center_x, center_y, width, height, box score].
201 cv::Mat const & out = outs[i];
202 cv::MatSize const & ms = out.size; int const nd = ms.dims();
203 int nbox = -1, ndata = -1;
204
205 if (nd >= 2)
206 {
207 nbox = ms[nd-2];
208 ndata = ms[nd-1];
209 for (int i = 0; i < nd-2; ++i) if (ms[i] != 1) nbox = -1; // reject if more than 2 effective dims
210 }
211
212 if (nbox < 0 || ndata < 5)
213 LTHROW("Expected 1 or more output blobs with shape Nx(5+C) where N is the number of "
214 "detected objects, C is the number of classes, and the first 5 columns are "
215 "[center_x, center_y, width, height, box score]. // "
216 "Incorrect size " << jevois::dnn::shapestr(out) << " for output " << i <<
217 ": need Nx(5+C) or 1xNx(5+C)");
218
219 // Some networks, like YOLOv5 or YOLOv7, output 3D 1xNx(5+C), so here we slice off the last 2 dims:
220 int sz2[] = { nbox, ndata };
221 cv::Mat const out2(2, sz2, out.type(), out.data);
222
223 float const * data = (float const *)out2.data;
224 for (int j = 0; j < nbox; ++j, data += ndata)
225 {
226 if (data[4] < boxThreshold) continue; // skip if box score is too low
227
228 cv::Mat scores = out2.row(j).colRange(5, ndata);
229 cv::Point classIdPoint; double confidence;
230 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
231
232 if (confidence < confThreshold) continue; // skip if class score too low
233
234 // YOLO<5 produces boxes in [0..1[x[0..1[ and 2D output blob:
235 int centerX, centerY, width, height;
236 if (nd == 2)
237 {
238 centerX = (int)(data[0] * bsiz.width);
239 centerY = (int)(data[1] * bsiz.height);
240 width = (int)(data[2] * bsiz.width);
241 height = (int)(data[3] * bsiz.height);
242 }
243 else
244 {
245 // YOLOv5, YOLOv7 produce boxes already scaled by input blob size, and 3D output blob:
246 centerX = (int)(data[0]);
247 centerY = (int)(data[1]);
248 width = (int)(data[2]);
249 height = (int)(data[3]);
250 }
251
252 int left = centerX - width / 2;
253 int top = centerY - height / 2;
254 boxes.push_back(cv::Rect(left, top, width, height));
255 classIds.push_back(classIdPoint.x);
256 confidences.push_back((float)confidence);
257 if (classIds.size() > maxbox) break; // Stop if too many boxes
258 }
259 }
260 }
261 break;
262
263 // ----------------------------------------------------------------------------------------------------
264 case jevois::dnn::postprocessor::DetectType::YOLOv10:
265 {
266 for (size_t i = 0; i < outs.size(); ++i)
267 {
268 // Network produces output blob(s) with shape Nx(4+C) where N is a number of detected objects and C is a number
269 // of classes + 4 where the first 4 numbers are [center_x, center_y, width, height]. There is no box score, just
270 // scores for individual classes for each detection.
271 cv::Mat const & out = outs[i];
272 cv::MatSize const & ms = out.size; int const nd = ms.dims();
273 int nbox = -1, ndata = -1;
274
275 if (nd >= 2)
276 {
277 nbox = ms[nd-2];
278 ndata = ms[nd-1];
279 for (int i = 0; i < nd-2; ++i) if (ms[i] != 1) nbox = -1; // reject if more than 2 effective dims
280 }
281
282 if (nbox < 0 || ndata < 4)
283 LTHROW("Expected 1 or more output blobs with shape Nx(4+C) where N is the number of "
284 "detected objects, C is the number of classes, and the first 4 columns are "
285 "[center_x, center_y, width, height]. // "
286 "Incorrect size " << jevois::dnn::shapestr(out) << " for output " << i <<
287 ": need Nx(4+C) or 1xNx(4+C)");
288
289 // Some networks may output 3D 1xNx(4+C), so here we slice off the last 2 dims:
290 int sz2[] = { nbox, ndata };
291 cv::Mat const out2(2, sz2, out.type(), out.data);
292
293 float const * data = (float const *)out2.data;
294 for (int j = 0; j < nbox; ++j, data += ndata)
295 {
296 cv::Mat scores = out2.row(j).colRange(4, ndata);
297 cv::Point classIdPoint; double confidence;
298 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
299
300 if (confidence < confThreshold) continue; // skip if class score too low
301
302 // Boxes are already scaled by input blob size, and are x1, y1, x2, y2:
303 boxes.push_back(cv::Rect(data[0], data[1], data[2]-data[0]+1, data[3]-data[1]+1));
304 classIds.push_back(classIdPoint.x);
305 confidences.push_back((float)confidence);
306 if (classIds.size() > maxbox) break; // Stop if too many boxes
307 }
308 }
309 }
310 break;
311
312 // ----------------------------------------------------------------------------------------------------
313 case jevois::dnn::postprocessor::DetectType::YOLOv10pp:
314 {
315 // Network produces output blob with a shape 1xNx6 where N is a number of detections and an every detection is
316 // a vector of values [left, top, right, bottom, confidence, classId]
317 if (outs.size() != 1 || msiz.dims() != 3 || msiz[0] != 1 || msiz[2] != 6)
318 LTHROW("Expected 1 output blob with shape 1xNx6 for N detections with values "
319 "[left, top, right, bottom, confidence, classId]");
320
321 float const * data = (float const *)out.data;
322 for (size_t i = 0; i < out.total(); i += 6)
323 {
324 float confidence = data[i + 4];
325 if (confidence > confThreshold)
326 {
327 // Boxes are already scaled by input blob size, and are x1, y1, x2, y2:
328 int left = (int)data[i + 0];
329 int top = (int)data[i + 1];
330 int right = (int)data[i + 2];
331 int bottom = (int)data[i + 3];
332 int width = right - left + 1;
333 int height = bottom - top + 1;
334 classIds.push_back((int)(data[i + 5]) + fudge); // Skip 0th background class id.
335 boxes.push_back(cv::Rect(left, top, width, height));
336 confidences.push_back(confidence);
337 if (classIds.size() > maxbox) break; // Stop if too many boxes
338 }
339 }
340 }
341 break;
342
343 // ----------------------------------------------------------------------------------------------------
344 case jevois::dnn::postprocessor::DetectType::RAWYOLO:
345 {
346 if (itsYOLO) itsYOLO->yolo(outs, classIds, confidences, boxes, itsLabels.size(), boxThreshold, confThreshold,
347 bsiz, fudge, maxbox);
348 else LFATAL("Internal error -- no YOLO subcomponent");
349 }
350 break;
351
352 default:
353 // Do not use strget() here as it will throw!
354 LTHROW("Unsupported Post-processor detecttype " << int(detecttype::get()));
355 }
356 }
357 // Abort here if the received outputs were malformed:
358 catch (std::exception const & e)
359 {
360 std::string err = "Selected detecttype is " + detecttype::strget() + " and network produced:\n\n";
361 for (cv::Mat const & m : outs) err += "- " + jevois::dnn::shapestr(m) + "\n";
362 err += "\nFATAL ERROR(s):\n\n";
363 err += e.what();
364 LFATAL(err);
365 }
366
367 // Cleanup overlapping boxes:
368 std::vector<int> indices;
369 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
370
371 // Now clamp boxes to be within blob, and adjust the boxes from blob size to input image size:
372 for (cv::Rect & b : boxes)
373 {
374 jevois::dnn::clamp(b, bsiz.width, bsiz.height);
375
376 cv::Point2f tl = b.tl(); preproc->b2i(tl.x, tl.y);
377 cv::Point2f br = b.br(); preproc->b2i(br.x, br.y);
378 b.x = tl.x; b.y = tl.y; b.width = br.x - tl.x; b.height = br.y - tl.y;
379 }
380
381 // Store results:
382 itsDetections.clear();
383 for (size_t i = 0; i < indices.size(); ++i)
384 {
385 int idx = indices[i];
386 cv::Rect const & box = boxes[idx];
387 jevois::ObjReco o {confidences[idx] * 100.0f, jevois::dnn::getLabel(itsLabels, classIds[idx]) };
388 std::vector<jevois::ObjReco> ov;
389 ov.emplace_back(o);
390 jevois::ObjDetect od { box.x, box.y, box.x + box.width, box.y + box.height, ov };
391 itsDetections.emplace_back(od);
392 }
393}
394
395// ####################################################################################################
397 jevois::OptGUIhelper * helper, bool overlay,
398 bool /*idle*/)
399{
400 for (jevois::ObjDetect const & o : itsDetections)
401 {
402 std::string categ, label;
403
404 if (o.reco.empty())
405 {
406 categ = "unknown";
407 label = "unknown";
408 }
409 else
410 {
411 categ = o.reco[0].category;
412 label = jevois::sformat("%s: %.2f", categ.c_str(), o.reco[0].score);
413 }
414
415 // If desired, draw boxes in output image:
416 if (outimg && overlay)
417 {
418 jevois::rawimage::drawRect(*outimg, o.tlx, o.tly, o.brx - o.tlx, o.bry - o.tly, 2, jevois::yuyv::LightGreen);
419 jevois::rawimage::writeText(*outimg, label, o.tlx + 6, o.tly + 2, jevois::yuyv::LightGreen,
421 }
422
423#ifdef JEVOIS_PRO
424 // If desired, draw results on GUI:
425 if (helper)
426 {
427 int col = jevois::dnn::stringToRGBA(categ, 0xff);
428 helper->drawRect(o.tlx, o.tly, o.brx, o.bry, col, true);
429 helper->drawText(o.tlx + 3.0f, o.tly + 3.0f, label.c_str(), col);
430 }
431#else
432 (void)helper; // keep compiler happy
433#endif
434
435 // If desired, send results to serial port:
436 if (mod) mod->sendSerialObjDetImg2D(itsImageSize.width, itsImageSize.height, o);
437 }
438}
#define o
Definition Font10x20.C:6
#define LTHROW(msg)
Definition Log.H:251
std::string descriptor() const
Get our full descriptor (including all parents) as [Instancename]:[...]:[...].
Definition Component.C:276
std::string getParamStringUnique(std::string const &paramdescriptor) const
Get a parameter value by string, simple version assuming only one parameter match.
Definition Component.C:405
Helper class to assist modules in creating graphical and GUI elements.
Definition GUIhelper.H:133
void drawText(float x, float y, char const *txt, ImU32 col=IM_COL32(128, 255, 128, 255))
Draw text over an image.
Definition GUIhelper.C:611
void drawRect(float x1, float y1, float x2, float y2, ImU32 col=IM_COL32(128, 255, 128, 255), bool filled=true)
Draw rectangular box over an image.
Definition GUIhelper.C:480
A raw image as coming from a V4L2 Camera and/or being sent out to a USB Gadget.
Definition RawImage.H:111
Base class for a module that supports standardized serial messages.
Definition Module.H:234
void sendSerialObjDetImg2D(unsigned int camw, unsigned int camh, float x, float y, float w, float h, std::vector< ObjReco > const &res)
Send a standardized object detection + recognition message.
Definition Module.C:572
void onParamChange(postprocessor::detecttype const &param, postprocessor::DetectType const &val) override
void report(jevois::StdModule *mod, jevois::RawImage *outimg=nullptr, jevois::OptGUIhelper *helper=nullptr, bool overlay=true, bool idle=false) override
Report what happened in last process() to console/output video/GUI.
void process(std::vector< cv::Mat > const &outs, PreProcessor *preproc) override
Process outputs and draw/send some results.
void freeze(bool doit) override
Freeze/unfreeze parameters that users should not change while running.
virtual ~PostProcessorDetect()
Destructor.
Pre-Processor for neural network pipeline.
cv::Size const & imagesize() const
Access the last processed image size.
void b2i(float &x, float &y, size_t blobnum=0)
Convert coordinates from blob back to original image.
cv::Size blobsize(size_t num) const
Access the width and height of a given blob, accounting for NCHW or NHWC.
#define LFATAL(msg)
Convenience macro for users to print out console or syslog messages, FATAL level.
Definition Log.H:230
std::map< int, std::string > readLabelsFile(std::string const &fname)
Read a label file.
Definition Utils.C:25
void clamp(cv::Rect &r, int width, int height)
Clamp a rectangle to within given image width and height.
Definition Utils.C:367
int stringToRGBA(std::string const &label, unsigned char alpha=128)
Compute a color from a label name.
Definition Utils.C:76
std::string getLabel(std::map< int, std::string > const &labels, int id)
Get a label from an id.
Definition Utils.C:68
std::string shapestr(cv::Mat const &m)
Get a string of the form: "nD AxBxC... TYPE" from an n-dimensional cv::Mat with data type TYPE.
Definition Utils.C:105
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
Write some text in an image.
void drawRect(RawImage &img, int x, int y, unsigned int w, unsigned int h, unsigned int thick, unsigned int col)
Draw a rectangle in a YUYV image.
std::string join(std::vector< std::string > const &strings, std::string const &delimiter)
Concatenate a vector of tokens into a string.
Definition Utils.C:280
std::string sformat(char const *fmt,...) __attribute__((format(__printf__
Create a string using printf style arguments.
Definition Utils.C:439
std::filesystem::path absolutePath(std::filesystem::path const &root, std::filesystem::path const &path)
Compute an absolute path from two paths.
Definition Utils.C:385
std::vector< std::string > split(std::string const &input, std::string const &regex="\\s+")
Split string into vector of tokens using a regex to specify what to split on; default regex splits by...
Definition Utils.C:270
unsigned short constexpr LightGreen
YUYV color value.
Definition RawImage.H:63
A trivial struct to store object detection results.
Definition ObjDetect.H:27
std::vector< ObjReco > reco
The recognized classes with their scores.
Definition ObjDetect.H:29
A trivial struct to store object recognition results.
Definition ObjReco.H:24