1 /*
2 *
3 * This file is part of FFmpeg.
4 *
5 * FFmpeg is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * FFmpeg is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <string.h>
21
26
30
32
33 #define randomize_buffers(buf, size) \
34 do { \
35 int j; \
36 for (j = 0; j < size; j+=4) \
37 AV_WN32(buf + j, rnd()); \
38 } while (0)
39
40 static const uint8_t
width[] = {12, 16, 20, 32, 36, 128};
42 {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
43 };
44
45 #define MAX_STRIDE 128
46 #define MAX_HEIGHT 128
47
49 {
55
57
62
64 for (
i = 0;
i < 6;
i ++) {
69 }
71 }
72 }
73
75 {
77
86
87 declare_func(
void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
89 int lumStride, int chromStride, int srcStride);
90
93
95 for (
i = 0;
i < 6;
i ++) {
102
111 }
114 }
115 }
116
118 #define MAX_LINE_SIZE 1920
119 #define BUFSIZE (NUM_LINES * MAX_LINE_SIZE)
120
122 {
123 for (
size_t i = 0;
i < n;
i++) {
125 return 1;
126 }
127 return 0;
128 }
129
131 {
134
142
144 uint8_t *vdst,
int width,
int height,
int lumStride,
146
148
151 int negstride = input_size < 0;
152 const char *negstride_str = negstride ? "_negstride" : "";
154 int linesize =
width + 32;
155 /* calculate height based on specified width to use the entire buffer. */
159 uint8_t *dst_y_0 = buf_y_0;
160 uint8_t *dst_y_1 = buf_y_1;
161 uint8_t *dst_u_0 = buf_u_0;
162 uint8_t *dst_u_1 = buf_u_1;
163 uint8_t *dst_v_0 = buf_v_0;
164 uint8_t *dst_v_1 = buf_v_1;
165
166 if (negstride) {
169 dst_y_0 += (
height - 1) * linesize;
170 dst_y_1 += (
height - 1) * linesize;
171 dst_u_0 += ((
height / 2) - 1) * (linesize / 2);
172 dst_u_1 += ((
height / 2) - 1) * (linesize / 2);
173 dst_v_0 += ((
height / 2) - 1) * (linesize / 2);
174 dst_v_1 += ((
height / 2) - 1) * (linesize / 2);
175 linesize *= -1;
176 }
177
179 memset(buf_y_0, 0xFF,
BUFSIZE);
180 memset(buf_y_1, 0xFF,
BUFSIZE);
181 memset(buf_u_0, 0xFF,
BUFSIZE / 4);
182 memset(buf_u_1, 0xFF,
BUFSIZE / 4);
183 memset(buf_v_0, 0xFF,
BUFSIZE / 4);
184 memset(buf_v_1, 0xFF,
BUFSIZE / 4);
185
187 linesize, linesize / 2, linesize * 3,
ctx->input_rgb2yuv_table);
189 linesize, linesize / 2, linesize * 3,
ctx->input_rgb2yuv_table);
195 linesize, linesize / 2, linesize * 3,
ctx->input_rgb2yuv_table);
196 }
197 }
198 }
199
200 #undef NUM_LINES
201 #undef MAX_LINE_SIZE
202 #undef BUFSIZE
203
205 {
210 // Intentionally using unaligned buffers, as this function doesn't have
211 // any alignment requirements.
212 uint8_t *
src0 = src0_buf + 1;
213 uint8_t *
src1 = src1_buf + 1;
214 uint8_t *dst0 = dst0_buf + 2;
215 uint8_t *dst1 = dst1_buf + 2;
216
218 uint8_t *, int, int, int, int, int);
219
222
224 for (
int i = 0;
i <= 16;
i++) {
225 // Try all widths [1,16], and try one random width.
226
229
230 int src0_offset = 0, src0_stride =
MAX_STRIDE;
231 int src1_offset = 0, src1_stride =
MAX_STRIDE;
232 int dst_offset = 0, dst_stride = 2 *
MAX_STRIDE;
233
236
237 // Try different combinations of negative strides
239 src0_offset = (
h-1)*src0_stride;
240 src0_stride = -src0_stride;
241 }
243 src1_offset = (
h-1)*src1_stride;
244 src1_stride = -src1_stride;
245 }
247 dst_offset = (
h-1)*dst_stride;
248 dst_stride = -dst_stride;
249 }
250
252 w,
h, src0_stride, src1_stride, dst_stride);
254 w,
h, src0_stride, src1_stride, dst_stride);
255 // Check a one pixel-pair edge around the destination area,
256 // to catch overwrites past the end.
258 2 *
w + 2,
h + 1,
"dst");
259 }
260
263 }
265 // Bench the function in a more typical case, with aligned
266 // buffers and widths.
269 }
270 }
271
273 {
279 // Intentionally using unaligned buffers, as this function doesn't have
280 // any alignment requirements.
281 uint8_t *
src = src_buf + 2;
282 uint8_t *dst0_u = dst0_u_buf + 1;
283 uint8_t *dst0_v = dst0_v_buf + 1;
284 uint8_t *dst1_u = dst1_u_buf + 1;
285 uint8_t *dst1_v = dst1_v_buf + 1;
286
289 int dst1Stride, int dst2Stride);
290
292
294 for (
int i = 0;
i <= 16;
i++) {
295 // Try all widths [1,16], and try one random width.
296
299
300 int src_offset = 0, src_stride = 2 *
MAX_STRIDE;
301 int dst_u_offset = 0, dst_u_stride =
MAX_STRIDE;
302 int dst_v_offset = 0, dst_v_stride =
MAX_STRIDE;
303
308
309 // Try different combinations of negative strides
311 src_offset = (
h-1)*src_stride;
312 src_stride = -src_stride;
313 }
315 dst_u_offset = (
h-1)*dst_u_stride;
316 dst_u_stride = -dst_u_stride;
317 }
319 dst_v_offset = (
h-1)*dst_v_stride;
320 dst_v_stride = -dst_v_stride;
321 }
322
323 call_ref(
src + src_offset, dst0_u + dst_u_offset, dst0_v + dst_v_offset,
324 w,
h, src_stride, dst_u_stride, dst_v_stride);
325 call_new(
src + src_offset, dst1_u + dst_u_offset, dst1_v + dst_v_offset,
326 w,
h, src_stride, dst_u_stride, dst_v_stride);
327 // Check a one pixel-pair edge around the destination area,
328 // to catch overwrites past the end.
330 w + 1,
h + 1,
"dst_u");
332 w + 1,
h + 1,
"dst_v");
333 }
334
337 }
339 // Bench the function in a more typical case, with aligned
340 // buffers and widths.
343 }
344 }
345
346 #define MAX_LINE_SIZE 1920
355 };
356
358 {
360
365
367 const uint8_t *unused1,
const uint8_t *unused2,
int width,
369
372
375
378
381
383 const uint8_t *
src =
desc->nb_components == 3 ? src24 : src32;
386
389
390 if (memcmp(dst0_y, dst1_y,
w * 2))
392
393 if (
desc->nb_components == 3 ||
394 // only bench native endian formats
397 }
398 }
399 }
400 }
401
403 {
405
412
414 const uint8_t *
src1,
const uint8_t *
src2,
const uint8_t *src3,
415 int width, uint32_t *pal,
void *opq);
416
419
423
424 ctx->chrSrcHSubSample = (
i % 2) ? 0 : 1;
428
431
433 ctx->chrSrcHSubSample ?
"_half" :
"",
435 const uint8_t *
src =
desc->nb_components == 3 ? src24 : src32;
440
443
444 if (memcmp(dst0_u, dst1_u,
w * 2) || memcmp(dst0_v, dst1_v,
w * 2))
446
447 if (
desc->nb_components == 3 ||
448 // only bench native endian formats
451 }
452 }
453 }
454 }
455
457 {
459
463
465 const uint8_t *
src2,
const uint8_t *src3,
int width,
467
469
472 if (
desc->nb_components < 4)
473 continue;
474
477
480
484
487
488 if (memcmp(dst0_y, dst1_y,
w * 2))
490
491 // only bench native endian formats
494 }
495 }
496 }
497 }
498
499
533 };
534
535 #define INPUT_SIZE 512
536
538 {
539 static const int alpha_values[] = {0, 2048, 4096};
540
543 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
544 const int16_t *alpSrc, uint8_t *dest,
545 int dstW, int uvalpha, int y);
546
547 const int16_t *luma;
548 const int16_t *chru[2];
549 const int16_t *chrv[2];
550 const int16_t *
alpha;
551
556
559
564
565 /* Limit to 14 bit input range */
567 src_y[
i] &= 0x3FFF3FFF;
568 src_a[
i] &= 0x3FFF3FFF;
569 src_u[
i] &= 0x3FFF3FFF;
570 src_v[
i] &= 0x3FFF3FFF;
571 }
572
573 luma = (int16_t *)src_y;
574 alpha = (int16_t *)src_a;
575 for (
int i = 0;
i < 2;
i++) {
578 }
579
585
588
594
596
598 const int chr_alpha = alpha_values[ai];
602
605
606 if (memcmp(dst0, dst1, line_size))
608
610 }
611 }
612
614 }
615 }
616
618 {
619 static const int alpha_values[] = {0, 2048, 4096};
620
623 const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
624 const int16_t *alpSrc[2], uint8_t *dest,
625 int dstW, int yalpha, int uvalpha, int y);
626
627 const int16_t *luma[2];
628 const int16_t *chru[2];
629 const int16_t *chrv[2];
630 const int16_t *
alpha[2];
631
636
639
644
645 /* Limit to 14 bit input range */
647 src_y[
i] &= 0x3FFF3FFF;
648 src_u[
i] &= 0x3FFF3FFF;
649 src_v[
i] &= 0x3FFF3FFF;
650 src_a[
i] &= 0x3FFF3FFF;
651 }
652
653 for (
int i = 0;
i < 2;
i++) {
658 }
659
665
668
674
676
678 const int lum_alpha = alpha_values[ai];
679 const int chr_alpha = alpha_values[ai];
683
686
687 if (memcmp(dst0, dst1, line_size))
689
691 }
692 }
693
695 }
696 }
697
699 {
700 #define LARGEST_FILTER 16
701 static const int filter_sizes[] = {2, 16};
702
705 const int16_t **lumSrcx, int lumFilterSize,
706 const int16_t *chrFilter, const int16_t **chrUSrcx,
707 const int16_t **chrVSrcx, int chrFilterSize,
708 const int16_t **alpSrcx, uint8_t *dest,
709 int dstW, int y);
710
715
718
723
726
731
732 /* Limit to 14 bit input range */
734 src_y[
i] &= 0x3FFF3FFF;
735 src_u[
i] &= 0x3FFF3FFF;
736 src_v[
i] &= 0x3FFF3FFF;
737 src_a[
i] &= 0x3FFF3FFF;
738 }
739
745 }
746
752
755
761
763
765 const int luma_filter_size = filter_sizes[fsi];
766 const int chr_filter_size = filter_sizes[fsi];
767
768 for (
int i = 0;
i < luma_filter_size;
i++)
769 luma_filter[
i] = -((1 << 12) / (luma_filter_size - 1));
770 luma_filter[
rnd() % luma_filter_size] = (1 << 13) - 1;
771
772 for (
int i = 0;
i < chr_filter_size;
i++)
773 chr_filter[
i] = -((1 << 12) / (chr_filter_size - 1));
774 chr_filter[
rnd() % chr_filter_size] = (1 << 13) - 1;
775
779
780 call_ref(
c, luma_filter, luma, luma_filter_size,
781 chr_filter, chru, chrv, chr_filter_size,
783
784 call_new(
c, luma_filter, luma, luma_filter_size,
785 chr_filter, chru, chrv, chr_filter_size,
787
788 if (memcmp(dst0, dst1, line_size))
790
791 bench_new(
c, luma_filter, luma, luma_filter_size,
792 chr_filter, chru, chrv, chr_filter_size,
794 }
795 }
796
798 }
799 }
800
801 #undef INPUT_SIZE
802 #undef LARGEST_FILTER
803
805 {
807
809
811 report(
"shuffle_bytes_2103");
812
814 report(
"shuffle_bytes_0321");
815
817 report(
"shuffle_bytes_1230");
818
820 report(
"shuffle_bytes_3012");
821
823 report(
"shuffle_bytes_3210");
824
826 report(
"shuffle_bytes_3102");
827
829 report(
"shuffle_bytes_2013");
830
832 report(
"shuffle_bytes_1203");
833
835 report(
"shuffle_bytes_2130");
836
839
841 report(
"interleave_bytes");
842
844 report(
"deinterleave_bytes");
845
851
854
857
860
863
865
868
871
874 }