JeVoisBase  1.21
JeVois Smart Embedded Machine Vision Toolkit Base Modules
Share this page:
Loading...
Searching...
No Matches
DemoNeon.C
Go to the documentation of this file.
1// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2//
3// JeVois Smart Embedded Machine Vision Toolkit - Copyright (C) 2016 by Laurent Itti, the University of Southern
4// California (USC), and iLab at USC. See http://iLab.usc.edu and http://jevois.org for information about this project.
5//
6// This file is part of the JeVois Smart Embedded Machine Vision Toolkit. This program is free software; you can
7// redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
8// Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
10// License for more details. You should have received a copy of the GNU General Public License along with this program;
11// if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
12//
13// Contact information: Laurent Itti - 3641 Watt Way, HNB-07A - Los Angeles, CA 90089-2520 - USA.
14// Tel: +1 213 740 3527 - itti@pollux.usc.edu - http://iLab.usc.edu - http://jevois.org
15// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16/*! \file */
17
18#include <jevois/Core/Module.H>
19#include <jevois/Debug/Log.H>
20#include <jevois/Util/Utils.H>
22#include <jevois/Debug/Timer.H>
23
24#include <linux/videodev2.h>
25#include <opencv2/core/core.hpp>
26#include <opencv2/imgproc/imgproc.hpp>
27#include <string.h>
28
29// Neon-related:
30#include <NE10_imgproc.h>
31
32// icon by by Madebyoliver in technology at flaticon
33
34namespace
35{
36 // OpenCV's cvtColor() cannot convert from RGBA to YUYV. Found this code here and cleaned it up a bit:
37 // http://study.marearts.com/2014/12/yuyv-to-rgb-and-rgb-to-yuyv-using.html
38 class Parallel_process : public cv::ParallelLoopBody
39 {
40 private:
41 cv::Mat const & inImg;
42 unsigned char * outImg;
43 int widhStep;
44 int m_stride;
45
46 public:
47 Parallel_process(cv::Mat const & inputImgage, unsigned char* outImage, size_t outw) :
48 inImg(inputImgage), outImg(outImage)
49 {
50 widhStep = inputImgage.size().width * 4; // 4bpp for RGBA
51 m_stride = outw * 2; // 2bpp for YUYV
52 }
53
54 virtual void operator()(const cv::Range & range) const
55 {
56 for (int i = range.start; i < range.end; ++i)
57 {
58 int const s1 = i * widhStep;
59
60 for (int iw = 0; iw < inImg.size().width; iw += 2)
61 {
62 int const s2 = iw * 4; int mc = s1 + s2;
63 float const R1 = inImg.data[mc + 0];
64 float const G1 = inImg.data[mc + 1];
65 float const B1 = inImg.data[mc + 2];
66 // skip A
67 float const R2 = inImg.data[mc + 4];
68 float const G2 = inImg.data[mc + 5];
69 float const B2 = inImg.data[mc + 6];
70 // skip A
71
72 int Y = (0.257F * R1) + (0.504F * G1) + (0.098F * B1) + 16;
73 int U = -(0.148F * R1) - (0.291F * G1) + (0.439F * B1) + 128;
74 int V = (0.439F * R1 ) - (0.368F * G1) - (0.071F * B1) + 128;
75 int Y2 = (0.257F * R2) + (0.504F * G2) + (0.098F * B2) + 16;
76
77 if (Y > 255) Y = 255; else if (Y < 0) Y = 0;
78 if (U > 255) U = 255; else if (U < 0) U = 0;
79 if (V > 255) V = 255; else if (V < 0) V = 0;
80 if (Y2 > 255) Y2 = 255; else if (Y2 < 0) Y2 = 0;
81
82 mc = i * m_stride + iw * 2;
83 outImg[mc + 0] = Y; outImg[mc + 1] = U; outImg[mc + 2] = Y2; outImg[mc + 3] = V;
84 }
85 }
86 }
87 };
88
89 void rgba2yuyv(cv::Mat const & src, unsigned char * dst, size_t dstw)
90 { cv::parallel_for_(cv::Range(0, src.rows), Parallel_process(src, dst, dstw)); }
91
92} // anonymous namespace
93
94// Module parameters: allow user to play with filter kernel size
95static jevois::ParameterCategory const ParamCateg("Neon Demo Options");
96
97//! Parameter \relates DemoNeon
98JEVOIS_DECLARE_PARAMETER(kernelw, unsigned int, "Kernel width (pixels)", 5, ParamCateg);
99
100//! Parameter \relates DemoNeon
101JEVOIS_DECLARE_PARAMETER(kernelh, unsigned int, "Kernel height (pixels)", 5, ParamCateg);
102
103//! Simple demo of ARM Neon (SIMD) extensions, comparing a box filter (blur) between CPU and Neon
104/*! NEON are specialized ARM processor instructions that can handle several operations at once, for example, 8 additions
105 of 8 bytes with 8 other bytes. NEON is the counterpart for ARM architectures of SSE for Intel architectures.
106
107 They are very useful for image processing. NEON instructions are supported both by the JeVois hardware platform and
108 by the JeVois programming framework.
109
110 In fact, one can directly call NEON instructions using C-like function calls and specialized C data types to
111 represent small vectors of numbers (like 8 bytes).
112
113 This demo uses a blur filter from the open-source NE10 library. It compares processing time to apply the same filter
114 to the input video stream, either using conventional C code, or using NEON-accelerated code. The NEON-accelerated
115 code is about 6x faster.
116
117 For more examples of use of NEON on JeVois, see modules \jvmod{DarknetSingle}, \jvmod{DarknetYOLO}, and
118 \jvmod{DarknetSaliency} which use NEON to accelerate the deep neural networks implemented in these modules.
119
120
121 @author Laurent Itti
122
123 @displayname Demo NEON
124 @videomapping YUYV 960 240 30.0 YUYV 320 240 30.0 JeVois DemoNeon
125 @email itti\@usc.edu
126 @address University of Southern California, HNB-07A, 3641 Watt Way, Los Angeles, CA 90089-2520, USA
127 @copyright Copyright (C) 2016 by Laurent Itti, iLab and the University of Southern California
128 @mainurl http://jevois.org
129 @supporturl http://jevois.org/doc
130 @otherurl http://iLab.usc.edu
131 @license GPL v3
132 @distribution Unrestricted
133 @restrictions None
134 \ingroup modules */
136 public jevois::Parameter<kernelw, kernelh>
137{
138 public:
139 //! Default base class constructor ok
141
142 //! Virtual destructor for safe inheritance
143 virtual ~DemoNeon() { }
144
145 //! Processing function
146 virtual void process(jevois::InputFrame && inframe, jevois::OutputFrame && outframe) override
147 {
148 static jevois::Timer cputim("CPU time");
149 static jevois::Timer neontim("Neon time");
150
151 // Wait for next available camera image:
152 jevois::RawImage inimg = inframe.get();
153 unsigned int const w = inimg.width, h = inimg.height;
154 inimg.require("input", w, h, V4L2_PIX_FMT_YUYV); // any image size but require YUYV pixels
155
156 // While we convert it, start a thread to wait for out frame and paste the input into it:
157 jevois::RawImage outimg;
158 auto paste_fut = jevois::async([&]() {
159 outimg = outframe.get();
160 outimg.require("output", w * 3, h, inimg.fmt);
161 jevois::rawimage::paste(inimg, outimg, 0, 0);
162 jevois::rawimage::writeText(outimg, "JeVois NEON Demo", 3, 3, jevois::yuyv::White);
163 });
164
165 // Convert input frame to RGBA:
166 cv::Mat imgrgba = jevois::rawimage::convertToCvRGBA(inimg);
167
168 // Wait for paste to finish up:
169 paste_fut.get();
170
171 // Let camera know we are done processing the input image:
172 inframe.done();
173
174 // First, apply blur filter using CPU:
175 ne10_size_t src_size { w, h }, kernel_size { kernelw::get(), kernelh::get() };
176
177 cv::Mat cpuresult(h, w, CV_8UC4);
178 cputim.start();
179 ne10_img_boxfilter_rgba8888_c(imgrgba.data, cpuresult.data, src_size, w * 4, w * 4, kernel_size);
180 std::string const & cpufps = cputim.stop();
181
182 // Then apply it using neon:
183 cv::Mat neonresult(h, w, CV_8UC4);
184 neontim.start();
185
186#ifdef __ARM_NEON__
187 // Neon version:
188 ne10_img_boxfilter_rgba8888_neon(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
189#else
190 // On non-ARM/NEON host, revert to CPU version again:
191 ne10_img_boxfilter_rgba8888_c(imgrgba.data, neonresult.data, src_size, w * 4, w * 4, kernel_size);
192#endif
193
194 std::string const & neonfps = neontim.stop();
195
196 // Convert both results back to YUYV for display:
197 rgba2yuyv(cpuresult, outimg.pixelsw<unsigned char>() + w * 2, w * 3);
198 jevois::rawimage::writeText(outimg, "Box filter - CPU", w + 3, 3, jevois::yuyv::White);
199 rgba2yuyv(neonresult, outimg.pixelsw<unsigned char>() + w * 4, w * 3);
200 jevois::rawimage::writeText(outimg, "Box filter - NEON", w * 2 + 3, 3, jevois::yuyv::White);
201
202 // Show processing fps:
203 jevois::rawimage::writeText(outimg, cpufps, w + 3, h - 13, jevois::yuyv::White);
204 jevois::rawimage::writeText(outimg, neonfps, w * 2 + 3, h - 13, jevois::yuyv::White);
205
206 // Send the output image with our processing results to the host over USB:
207 outframe.send();
208 }
209};
210
211// Allow the module to be loaded as a shared object (.so) file:
JEVOIS_REGISTER_MODULE(ArUcoBlob)
int h
ImVec2 V
#define B2
Definition Surprise.C:82
#define B1
Definition Surprise.C:81
Simple demo of ARM Neon (SIMD) extensions, comparing a box filter (blur) between CPU and Neon.
Definition DemoNeon.C:137
virtual ~DemoNeon()
Virtual destructor for safe inheritance.
Definition DemoNeon.C:143
virtual void process(jevois::InputFrame &&inframe, jevois::OutputFrame &&outframe) override
Processing function.
Definition DemoNeon.C:146
JEVOIS_DECLARE_PARAMETER(kernelw, unsigned int, "Kernel width (pixels)", 5, ParamCateg)
Parameter.
JEVOIS_DECLARE_PARAMETER(kernelh, unsigned int, "Kernel height (pixels)", 5, ParamCateg)
Parameter.
friend friend class Module
unsigned int fmt
unsigned int width
unsigned int height
void require(char const *info, unsigned int w, unsigned int h, unsigned int f) const
std::string const & stop(double *seconds)
cv::Mat convertToCvRGBA(RawImage const &src)
void paste(RawImage const &src, RawImage &dest, int dx, int dy)
void writeText(RawImage &img, std::string const &txt, int x, int y, unsigned int col, Font font=Font6x10)
std::future< std::invoke_result_t< std::decay_t< Function >, std::decay_t< Args >... > > async(Function &&f, Args &&... args)
unsigned short constexpr White