Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_resize.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of OpenCV */
29/*M///////////////////////////////////////////////////////////////////////////////////////
30//
31// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
32//
33// By downloading, copying, installing or using the software you agree to this license.
34// If you do not agree to this license, do not download, install,
35// copy or use the software.
36//
37//
38// License Agreement
39// For Open Source Computer Vision Library
40//
41// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
42// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
43// Third party copyrights are property of their respective owners.
44//
45// Redistribution and use in source and binary forms, with or without modification,
46// are permitted provided that the following conditions are met:
47//
48// * Redistribution's of source code must retain the above copyright notice,
49// this list of conditions and the following disclaimer.
50//
51// * Redistribution's in binary form must reproduce the above copyright notice,
52// this list of conditions and the following disclaimer in the documentation
53// and/or other materials provided with the distribution.
54//
55// * The name of the copyright holders may not be used to endorse or promote products
56// derived from this software without specific prior written permission.
57//
58// This software is provided by the copyright holders and contributors "as is" and
59// any express or implied warranties, including, but not limited to, the implied
60// warranties of merchantability and fitness for a particular purpose are disclaimed.
61// In no event shall the Intel Corporation or contributors be liable for any direct,
62// indirect, incidental, special, exemplary, or consequential damages
63// (including, but not limited to, procurement of substitute goods or services;
64// loss of use, data, or profits; or business interruption) however caused
65// and on any theory of liability, whether in contract, strict liability,
66// or tort (including negligence or otherwise) arising in any way out of
67// the use of this software, even if advised of the possibility of such damage.
68//
69//M*/
70
71/*
72 * NE10 Library : imgproc/NE10_resize.c
73 */
74
75#include "NE10.h"
76
90#define INTER_RESIZE_COEF_BITS 11
91#define INTER_RESIZE_COEF_SCALE (1 << 11)
92#define NE10_MAX_ESIZE 16
93
94static inline ne10_uint32_t ne10_align_size (ne10_int32_t sz, ne10_int32_t n)
95{
96 return (sz + n - 1) & -n;
97}
98
99static inline ne10_int32_t ne10_floor (ne10_float32_t a)
100{
101 return ( ( (a) >= 0) ? ( (ne10_int32_t) a) : ( (ne10_int32_t) a - 1));
102}
103
104static inline ne10_int32_t ne10_clip (ne10_int32_t x, ne10_int32_t a, ne10_int32_t b)
105{
106 return (x >= a ? (x < b ? x : b - 1) : a);
107}
108
109static inline ne10_uint8_t ne10_cast_op (ne10_int32_t val)
110{
111 ne10_int32_t bits = INTER_RESIZE_COEF_BITS * 2;
112 ne10_int32_t SHIFT = bits;
113 ne10_int32_t DELTA = 1 << (bits - 1) ;
114 ne10_int32_t temp = NE10_MIN (255, NE10_MAX (0, (val + DELTA) >> SHIFT));
115 return (ne10_uint8_t) (temp);
116};
117
118static void ne10_img_hresize_linear_c (const ne10_uint8_t** src,
119 ne10_int32_t** dst,
120 ne10_int32_t count,
121 const ne10_int32_t* xofs,
122 const ne10_int16_t* alpha,
123 ne10_int32_t swidth,
124 ne10_int32_t dwidth,
125 ne10_int32_t cn,
126 ne10_int32_t xmin,
127 ne10_int32_t xmax)
128{
129 ne10_int32_t dx, k;
130
131 ne10_int32_t dx0 = 0;
132
133 //for (k = 0; k <= count - 2; k++)
134 if (count == 2)
135 {
136 k = 0;
137 const ne10_uint8_t *S0 = src[k], *S1 = src[k + 1];
138 ne10_int32_t *D0 = dst[k], *D1 = dst[k + 1];
139 for (dx = dx0; dx < xmax; dx++)
140 {
141 ne10_int32_t sx = xofs[dx];
142 ne10_int32_t a0 = alpha[dx * 2], a1 = alpha[dx * 2 + 1];
143 ne10_int32_t t0 = S0[sx] * a0 + S0[sx + cn] * a1;
144 ne10_int32_t t1 = S1[sx] * a0 + S1[sx + cn] * a1;
145 D0[dx] = t0;
146 D1[dx] = t1;
147 }
148
149 for (; dx < dwidth; dx++)
150 {
151 ne10_int32_t sx = xofs[dx];
152 D0[dx] = (ne10_int32_t) S0[sx] * INTER_RESIZE_COEF_SCALE;
153 D1[dx] = (ne10_int32_t) S1[sx] * INTER_RESIZE_COEF_SCALE;
154 }
155 }
156
157 //for (; k < count; k++)
158 if (count == 1)
159 {
160 k = 0;
161 const ne10_uint8_t *S = src[k];
162 ne10_int32_t *D = dst[k];
163 for (dx = 0; dx < xmax; dx++)
164 {
165 ne10_int32_t sx = xofs[dx];
166 D[dx] = S[sx] * alpha[dx * 2] + S[sx + cn] * alpha[dx * 2 + 1];
167 }
168
169 for (; dx < dwidth; dx++)
170 D[dx] = (ne10_int32_t) S[xofs[dx]] * INTER_RESIZE_COEF_SCALE;
171 }
172}
173
174
175static void ne10_img_vresize_linear_c (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width)
176{
177 ne10_int32_t b0 = beta[0], b1 = beta[1];
178 const ne10_int32_t *S0 = src[0], *S1 = src[1];
179
180 ne10_int32_t x = 0;
181 for (; x <= width - 4; x += 4)
182 {
183 ne10_int32_t t0, t1;
184 t0 = S0[x] * b0 + S1[x] * b1;
185 t1 = S0[x + 1] * b0 + S1[x + 1] * b1;
186 dst[x] = ne10_cast_op (t0);
187 dst[x + 1] = ne10_cast_op (t1);
188 t0 = S0[x + 2] * b0 + S1[x + 2] * b1;
189 t1 = S0[x + 3] * b0 + S1[x + 3] * b1;
190 dst[x + 2] = ne10_cast_op (t0);
191 dst[x + 3] = ne10_cast_op (t1);
192 }
193
194 for (; x < width; x++)
195 dst[x] = ne10_cast_op (S0[x] * b0 + S1[x] * b1);
196}
197
198static void ne10_img_resize_generic_linear_c (ne10_uint8_t* src,
199 ne10_uint8_t* dst,
200 const ne10_int32_t* xofs,
201 const ne10_int16_t* _alpha,
202 const ne10_int32_t* yofs,
203 const ne10_int16_t* _beta,
204 ne10_int32_t xmin,
205 ne10_int32_t xmax,
206 ne10_int32_t ksize,
207 ne10_int32_t srcw,
208 ne10_int32_t srch,
209 ne10_int32_t srcstep,
210 ne10_int32_t dstw,
211 ne10_int32_t dsth,
212 ne10_int32_t channels)
213{
214
215 const ne10_int16_t* alpha = _alpha;
216 const ne10_int16_t* beta = _beta;
217 ne10_int32_t cn = channels;
218 srcw *= cn;
219 dstw *= cn;
220
221 ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
222 ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
223
224
225 ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
226
227 const ne10_uint8_t* srows[NE10_MAX_ESIZE];
228 ne10_int32_t* rows[NE10_MAX_ESIZE];
229 ne10_int32_t prev_sy[NE10_MAX_ESIZE];
230 ne10_int32_t k, dy;
231 xmin *= cn;
232 xmax *= cn;
233
234 for (k = 0; k < ksize; k++)
235 {
236 prev_sy[k] = -1;
237 rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
238 }
239
240 // image resize is a separable operation. In case of not too strong
241 for (dy = 0; dy < dsth; dy++, beta += ksize)
242 {
243 ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
244
245 for (k = 0; k < ksize; k++)
246 {
247 ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
248 for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
249 {
250 if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it.
251 {
252 if (k1 > k)
253 memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
254 break;
255 }
256 }
257 if (k1 == ksize)
258 k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
259 srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
260 prev_sy[k] = sy;
261 }
262
263 if (k0 < ksize)
264 ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
265 srcw, dstw, cn, xmin, xmax);
266
267 ne10_img_vresize_linear_c ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
268 }
269
270 NE10_FREE (buffer_);
271}
272
273static void ne10_img_resize_cal_offset_linear (ne10_int32_t* xofs,
274 ne10_int16_t* ialpha,
275 ne10_int32_t* yofs,
276 ne10_int16_t* ibeta,
277 ne10_int32_t *xmin,
278 ne10_int32_t *xmax,
279 ne10_int32_t ksize,
280 ne10_int32_t ksize2,
281 ne10_int32_t srcw,
282 ne10_int32_t srch,
283 ne10_int32_t dstw,
284 ne10_int32_t dsth,
285 ne10_int32_t channels)
286{
287 ne10_float32_t inv_scale_x = (ne10_float32_t) dstw / srcw;
288 ne10_float32_t inv_scale_y = (ne10_float32_t) dsth / srch;
289
290 ne10_int32_t cn = channels;
291 ne10_float32_t scale_x = 1. / inv_scale_x;
292 ne10_float32_t scale_y = 1. / inv_scale_y;
293 ne10_int32_t k, sx, sy, dx, dy;
294
295
296 ne10_float32_t fx, fy;
297
298 ne10_float32_t cbuf[NE10_MAX_ESIZE];
299
300 for (dx = 0; dx < dstw; dx++)
301 {
302 fx = (ne10_float32_t) ( (dx + 0.5) * scale_x - 0.5);
303 sx = ne10_floor (fx);
304 fx -= sx;
305
306 if (sx < ksize2 - 1)
307 {
308 *xmin = dx + 1;
309 if (sx < 0)
310 fx = 0, sx = 0;
311 }
312
313 if (sx + ksize2 >= srcw)
314 {
315 *xmax = NE10_MIN (*xmax, dx);
316 if (sx >= srcw - 1)
317 fx = 0, sx = srcw - 1;
318 }
319
320 for (k = 0, sx *= cn; k < cn; k++)
321 xofs[dx * cn + k] = sx + k;
322
323 cbuf[0] = 1.f - fx;
324 cbuf[1] = fx;
325
326 for (k = 0; k < ksize; k++)
327 ialpha[dx * cn * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
328 for (; k < cn * ksize; k++)
329 ialpha[dx * cn * ksize + k] = ialpha[dx * cn * ksize + k - ksize];
330 }
331
332 for (dy = 0; dy < dsth; dy++)
333 {
334 fy = (ne10_float32_t) ( (dy + 0.5) * scale_y - 0.5);
335 sy = ne10_floor (fy);
336 fy -= sy;
337
338 yofs[dy] = sy;
339
340 cbuf[0] = 1.f - fy;
341 cbuf[1] = fy;
342
343 for (k = 0; k < ksize; k++)
344 ibeta[dy * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
345
346 }
347
348}
349
367void ne10_img_resize_bilinear_rgba_c (ne10_uint8_t* dst,
368 ne10_uint32_t dst_width,
369 ne10_uint32_t dst_height,
370 ne10_uint8_t* src,
371 ne10_uint32_t src_width,
372 ne10_uint32_t src_height,
373 ne10_uint32_t src_stride)
374{
375 ne10_int32_t dstw = dst_width;
376 ne10_int32_t dsth = dst_height;
377 ne10_int32_t srcw = src_width;
378 ne10_int32_t srch = src_height;
379
380 ne10_int32_t cn = 4;
381
382
383 ne10_int32_t xmin = 0;
384 ne10_int32_t xmax = dstw;
385 ne10_int32_t width = dstw * cn;
386
387 ne10_int32_t ksize = 0, ksize2;
388 ksize = 2;
389 ksize2 = ksize / 2;
390
391 ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
392
393 ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
394 ne10_int32_t* yofs = xofs + width;
395 ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
396 ne10_int16_t* ibeta = ialpha + width * ksize;
397
398 ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
399
400 ne10_img_resize_generic_linear_c (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
401 NE10_FREE (buffer_);
402}
403
404extern void ne10_img_hresize_4channels_linear_neon (const ne10_uint8_t** src,
405 ne10_int32_t** dst,
406 ne10_int32_t count,
407 const ne10_int32_t* xofs,
408 const ne10_int16_t* alpha,
409 ne10_int32_t swidth,
410 ne10_int32_t dwidth,
411 ne10_int32_t cn,
412 ne10_int32_t xmin,
413 ne10_int32_t xmax);
414extern void ne10_img_vresize_linear_neon (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width);
415
416static void ne10_img_resize_generic_linear_neon (ne10_uint8_t* src,
417 ne10_uint8_t* dst,
418 const ne10_int32_t* xofs,
419 const ne10_int16_t* _alpha,
420 const ne10_int32_t* yofs,
421 const ne10_int16_t* _beta,
422 ne10_int32_t xmin,
423 ne10_int32_t xmax,
424 ne10_int32_t ksize,
425 ne10_int32_t srcw,
426 ne10_int32_t srch,
427 ne10_int32_t srcstep,
428 ne10_int32_t dstw,
429 ne10_int32_t dsth,
430 ne10_int32_t channels)
431{
432
433 const ne10_int16_t* alpha = _alpha;
434 const ne10_int16_t* beta = _beta;
435 ne10_int32_t cn = channels;
436 srcw *= cn;
437 dstw *= cn;
438
439 ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
440 ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
441
442
443 ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
444
445 const ne10_uint8_t* srows[NE10_MAX_ESIZE];
446 ne10_int32_t* rows[NE10_MAX_ESIZE];
447 ne10_int32_t prev_sy[NE10_MAX_ESIZE];
448 ne10_int32_t k, dy;
449 xmin *= cn;
450 xmax *= cn;
451
452 for (k = 0; k < ksize; k++)
453 {
454 prev_sy[k] = -1;
455 rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
456 }
457
458 // image resize is a separable operation. In case of not too strong
459 for (dy = 0; dy < dsth; dy++, beta += ksize)
460 {
461 ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
462
463 for (k = 0; k < ksize; k++)
464 {
465 ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
466 for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
467 {
468 if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it.
469 {
470 if (k1 > k)
471 memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
472 break;
473 }
474 }
475 if (k1 == ksize)
476 k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
477 srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
478 prev_sy[k] = sy;
479 }
480
481 if (k0 < ksize)
482 {
483 if (cn == 4)
484 ne10_img_hresize_4channels_linear_neon (srows + k0, rows + k0, ksize - k0, xofs, alpha,
485 srcw, dstw, cn, xmin, xmax);
486 else
487 ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
488 srcw, dstw, cn, xmin, xmax);
489 }
490 ne10_img_vresize_linear_neon ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
491 }
492
493 NE10_FREE (buffer_);
494}
495
509 ne10_uint32_t dst_width,
510 ne10_uint32_t dst_height,
511 ne10_uint8_t* src,
512 ne10_uint32_t src_width,
513 ne10_uint32_t src_height,
514 ne10_uint32_t src_stride)
515{
516 ne10_int32_t dstw = dst_width;
517 ne10_int32_t dsth = dst_height;
518 ne10_int32_t srcw = src_width;
519 ne10_int32_t srch = src_height;
520
521 ne10_int32_t cn = 4;
522
523
524 ne10_int32_t xmin = 0;
525 ne10_int32_t xmax = dstw;
526 ne10_int32_t width = dstw * cn;
527
528 ne10_int32_t ksize = 0, ksize2;
529 ksize = 2;
530 ksize2 = ksize / 2;
531
532 ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
533
534 ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
535 ne10_int32_t* yofs = xofs + width;
536 ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
537 ne10_int16_t* ibeta = ialpha + width * ksize;
538
539 ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
540
541 ne10_img_resize_generic_linear_neon (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
542 NE10_FREE (buffer_);
543}
544
void ne10_img_resize_bilinear_rgba_neon(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
image resize of 8-bit data.
void ne10_img_resize_bilinear_rgba_c(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
image resize of 8-bit data.