1 /*
2 * Assembly testing and benchmarking tool
3 * Copyright (c) 2015 Henrik Gramner
4 * Copyright (c) 2008 Loren Merritt
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "config.h"
24 #include "config_components.h"
25
26 #ifndef _GNU_SOURCE
27 # define _GNU_SOURCE // for syscall (performance monitoring API), strsignal()
28 #endif
29
30 #include <signal.h>
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
40
41 #if HAVE_IO_H
42 #include <io.h>
43 #endif
44
45 #if defined(_WIN32) && !defined(SIGBUS)
46 /* non-standard, use the same value as mingw-w64 */
47 #define SIGBUS 10
48 #endif
49
50 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
51 #include <windows.h>
52 #define COLOR_RED FOREGROUND_RED
53 #define COLOR_GREEN FOREGROUND_GREEN
54 #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
55 #else
58 #define COLOR_YELLOW 3
59 #endif
60
61 #if HAVE_UNISTD_H
62 #include <unistd.h>
63 #endif
64
65 #if !HAVE_ISATTY
67 #endif
68
69 #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
71
72 void (*checkasm_checked_call)(
void *
func,
int dummy, ...) = checkasm_checked_call_novfp;
73 #endif
74
75 /* List of tests to invoke */
76 static const struct {
80 #if CONFIG_AVCODEC
81 #if CONFIG_AAC_DECODER
84 #endif
85 #if CONFIG_AAC_ENCODER
87 #endif
88 #if CONFIG_AC3DSP
90 #endif
91 #if CONFIG_ALAC_DECODER
93 #endif
94 #if CONFIG_AUDIODSP
96 #endif
97 #if CONFIG_BLOCKDSP
99 #endif
100 #if CONFIG_BSWAPDSP
102 #endif
103 #if CONFIG_DCA_DECODER
105 #endif
106 #if CONFIG_EXR_DECODER
108 #endif
109 #if CONFIG_FLAC_DECODER
111 #endif
112 #if CONFIG_FMTCONVERT
114 #endif
115 #if CONFIG_G722DSP
117 #endif
118 #if CONFIG_H264CHROMA
120 #endif
121 #if CONFIG_H264DSP
123 #endif
124 #if CONFIG_H264PRED
126 #endif
127 #if CONFIG_H264QPEL
129 #endif
130 #if CONFIG_HEVC_DECODER
136 #endif
137 #if CONFIG_HUFFYUV_DECODER
139 #endif
140 #if CONFIG_IDCTDSP
142 #endif
143 #if CONFIG_JPEG2000_DECODER
145 #endif
146 #if CONFIG_LLAUDDSP
148 #endif
149 #if CONFIG_HUFFYUVDSP
151 #endif
152 #if CONFIG_LLVIDENCDSP
154 #endif
155 #if CONFIG_LPC
157 #endif
158 #if CONFIG_ME_CMP
160 #endif
161 #if CONFIG_OPUS_DECODER
163 #endif
164 #if CONFIG_PIXBLOCKDSP
166 #endif
167 #if CONFIG_RV34DSP
169 #endif
170 #if CONFIG_SVQ1_ENCODER
172 #endif
173 #if CONFIG_TAK_DECODER
175 #endif
176 #if CONFIG_UTVIDEO_DECODER
178 #endif
179 #if CONFIG_V210_DECODER
181 #endif
182 #if CONFIG_V210_ENCODER
184 #endif
185 #if CONFIG_VC1DSP
187 #endif
188 #if CONFIG_VP8DSP
190 #endif
191 #if CONFIG_VP9_DECODER
193 #endif
194 #if CONFIG_VIDEODSP
196 #endif
197 #if CONFIG_VORBIS_DECODER
199 #endif
200 #if CONFIG_VVC_DECODER
202 #endif
203 #endif
204 #if CONFIG_AVFILTER
205 #if CONFIG_AFIR_FILTER
207 #endif
208 #if CONFIG_BLEND_FILTER
210 #endif
211 #if CONFIG_BWDIF_FILTER
213 #endif
214 #if CONFIG_COLORSPACE_FILTER
216 #endif
217 #if CONFIG_EQ_FILTER
219 #endif
220 #if CONFIG_GBLUR_FILTER
222 #endif
223 #if CONFIG_HFLIP_FILTER
225 #endif
226 #if CONFIG_NLMEANS_FILTER
228 #endif
229 #if CONFIG_THRESHOLD_FILTER
231 #endif
232 #if CONFIG_SOBEL_FILTER
234 #endif
235 #endif
236 #if CONFIG_SWSCALE
240 #endif
241 #if CONFIG_AVUTIL
245 #endif
247 };
248
249 /* List of cpu flags to check */
250 static const struct {
255 #if ARCH_AARCH64
260 #elif ARCH_ARM
268 #elif ARCH_PPC
272 #elif ARCH_RISCV
282 #elif ARCH_MIPS
285 #elif ARCH_X86
304 #elif ARCH_LOONGARCH
307 #endif
309 };
310
318
319 /* Binary search tree node */
323 uint8_t
color;
/* 0 = red, 1 = black */
326
327 /* Internal state */
328 static struct {
337
338 /* perf */
341
348
349 /* PRNG state */
351
352 /* float compare support code */
354 {
356 }
357
359 {
361
364
366 // handle -0.0 == +0.0
368 }
369
370 if (llabs((
int64_t)x.
i - y.
i) <= max_ulp)
371 return 1;
372
373 return 0;
374 }
375
378 {
380
381 for (
i = 0;
i <
len;
i++) {
383 return 0;
384 }
385 return 1;
386 }
387
389 {
391 if (abs_diff < eps)
392 return 1;
393
394 fprintf(stderr,
"test failed comparing %g with %g (abs diff=%g with EPS=%g)\n",
a,
b, abs_diff, eps);
395
396 return 0;
397 }
398
401 {
403
404 for (
i = 0;
i <
len;
i++) {
406 return 0;
407 }
408 return 1;
409 }
410
412 {
414 }
415
417 unsigned max_ulp,
unsigned len)
418 {
420
421 for (
i = 0;
i <
len;
i++) {
423 return 0;
424 }
425 return 1;
426 }
427
429 {
430 double abs_diff =
fabs(
a -
b);
431
432 return abs_diff < eps;
433 }
434
437 {
439
440 for (
i = 0;
i <
len;
i++) {
442 return 0;
443 }
444 return 1;
445 }
446
447 /* Print colored text to stderr if the terminal supports it */
449 {
452
453 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
454 static HANDLE con;
455 static WORD org_attributes;
456
458 CONSOLE_SCREEN_BUFFER_INFO con_info;
459 con = GetStdHandle(STD_ERROR_HANDLE);
460 if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
461 org_attributes = con_info.wAttributes;
463 } else
465 }
467 SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (
color & 0x0f));
468 #else
470 const char *term = getenv("TERM");
472 }
474 fprintf(stderr,
"\x1b[%d;3%dm", (
color & 0x08) >> 3,
color & 0x07);
475 #endif
476
478 vfprintf(stderr, fmt,
arg);
480
482 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
483 SetConsoleTextAttribute(con, org_attributes);
484 #else
485 fprintf(stderr, "\x1b[0m");
486 #endif
487 }
488 }
489
490 /* Deallocate a tree */
492 {
495 while (v) {
497 free(v);
498 v = next;
499 }
500
504 }
505 }
506
507 /* Allocate a zero-initialized block, clean up and exit on failure */
509 {
510 void *ptr = calloc(1,
size);
511 if (!ptr) {
512 fprintf(stderr, "checkasm: malloc failed\n");
514 exit(1);
515 }
516 return ptr;
517 }
518
519 /* Get the suffix of the specified cpu flag */
521 {
523
526 return cpus[
i].suffix;
527
528 return "c";
529 }
530
532 {
533 return *(
const uint16_t*)
a - *(
const uint16_t*)
b;
534 }
535
536 /* Measure the overhead of the timing code (in decicycles) */
538 {
539 uint16_t nops[10000];
542
543 uint64_t t = 0;
544 for (
i = 0;
i < 10000;
i++) {
548 }
549
550 qsort(nops, 10000,
sizeof(uint16_t),
cmp_nop);
551 for (
i = 2500;
i < 7500;
i++)
553
554 return nop_sum / 500;
555 }
556
557 /* Print benchmark results */
559 {
562
563 /* Only print functions with at least one assembly version */
564 if (
f->versions.cpu ||
f->versions.next) {
566 do {
571 }
572 }
while ((v = v->
next));
573 }
574
576 }
577 }
578
579 /* ASCIIbetical sort except preserving natural order for numbers */
581 {
582 const char *start =
a;
583 int ascii_diff, digit_diff;
584
585 for (; !(ascii_diff = *(
const unsigned char*)
a - *(
const unsigned char*)
b) && *
a;
a++,
b++);
587
589 return digit_diff;
590
591 return ascii_diff;
592 }
593
594 /* Perform a tree rotation in the specified direction and return the new root */
596 {
598 f->child[dir^1] =
r->child[dir];
603 }
604
605 #define is_red(f) ((f) && !(f)->color)
606
607 /* Balance a left-leaning red-black tree at the specified node */
609 {
611
614 f->child[0]->color =
f->child[1]->color = 1;
615 }
616
621 }
622
623 /* Get a node with the specified name, creating it if it doesn't exist */
625 {
627
629 /* Search the tree for a matching node */
633
634 /* Rebalance the tree on the way up if a new node was inserted */
635 if (!
f->versions.func)
637 }
638 } else {
639 /* Allocate and insert a new node into the tree */
640 int name_length = strlen(
name);
642 memcpy(
f->name,
name, name_length + 1);
643 }
644
646 }
647
649
650 /* Crash handling: attempt to catch crashes and handle them
651 * gracefully instead of just aborting abruptly. */
652 #ifdef _WIN32
653 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
656
657 if (!
state.catch_signals)
658 return EXCEPTION_CONTINUE_SEARCH;
659
660 switch (e->ExceptionRecord->ExceptionCode) {
661 case EXCEPTION_FLT_DIVIDE_BY_ZERO:
662 case EXCEPTION_INT_DIVIDE_BY_ZERO:
664 break;
665 case EXCEPTION_ILLEGAL_INSTRUCTION:
666 case EXCEPTION_PRIV_INSTRUCTION:
668 break;
669 case EXCEPTION_ACCESS_VIOLATION:
670 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
671 case EXCEPTION_DATATYPE_MISALIGNMENT:
672 case EXCEPTION_STACK_OVERFLOW:
674 break;
675 case EXCEPTION_IN_PAGE_ERROR:
677 break;
678 default:
679 return EXCEPTION_CONTINUE_SEARCH;
680 }
681 state.catch_signals = 0;
683 return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */
684 }
685 #endif
686 #else
688
691 .sa_flags = SA_RESETHAND,
692 };
693
695 if (
state.catch_signals) {
696 state.catch_signals = 0;
699 }
700 }
701 #endif
702
703 /* Perform tests and benchmarks for the specified cpu flag if supported by the host */
705 {
706 int old_cpu_flag =
state.cpu_flag;
707
708 flag |= old_cpu_flag;
712
713 if (!
flag ||
state.cpu_flag != old_cpu_flag) {
715
719 continue;
722 }
723 }
724 }
725
726 /* Print the name of the current CPU flag, but only do it once */
728 {
729 if (
state.cpu_flag_name) {
732 }
733 }
734
735 #if CONFIG_LINUX_PERF
736 static int bench_init_linux(void)
737 {
738 struct perf_event_attr attr = {
739 .type = PERF_TYPE_HARDWARE,
740 .size = sizeof(struct perf_event_attr),
741 .
config = PERF_COUNT_HW_CPU_CYCLES,
742 .disabled = 1, // start counting only on demand
743 .exclude_kernel = 1,
744 .exclude_hv = 1,
745 };
746
747 printf(
"benchmarking with Linux Perf Monitoring API\n");
748
749 state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
750 if (
state.sysfd == -1) {
751 perror("perf_event_open");
752 return -1;
753 }
754 return 0;
755 }
756 #elif CONFIG_MACOS_KPERF
757 static int bench_init_kperf(void)
758 {
760 return 0;
761 }
762 #else
764 {
765 #ifdef AV_READ_TIME
770 } else {
771 fprintf(stderr, "checkasm: unable to execute platform specific timer\n");
772 return -1;
773 }
774 printf(
"benchmarking with native FFmpeg timers\n");
775 return 0;
776 #else
777 fprintf(stderr, "checkasm: --bench is not supported on your system\n");
778 return -1;
779 #endif
780 }
781 #endif
782
784 {
785 #if CONFIG_LINUX_PERF
786 int ret = bench_init_linux();
787 #elif CONFIG_MACOS_KPERF
788 int ret = bench_init_kperf();
789 #else
791 #endif
794
797 return 0;
798 }
799
801 {
802 #if CONFIG_LINUX_PERF
805 #endif
806 }
807
809 {
810 fprintf(stderr,
811 "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n",
812 path);
813 return 1;
814 }
815
816 int main(
int argc,
char *argv[])
817 {
820
821 #ifdef _WIN32
822 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
824 #endif
825 #else
830 #endif
831 #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
833 checkasm_checked_call = checkasm_checked_call_vfp;
834 #endif
835
837 fprintf(stderr, "checkasm: no tests to perform\n");
838 return 0;
839 }
840
841 for (
i = 1;
i < argc;
i++) {
842 const char *
arg = argv[
i];
843 unsigned long l;
844 char *end;
845
846 if (!strncmp(
arg,
"--bench", 7)) {
848 return 1;
851 state.bench_pattern_len = strlen(
state.bench_pattern);
852 } else
853 state.bench_pattern =
"";
854 }
else if (!strncmp(
arg,
"--test=", 7)) {
856 }
else if (!strcmp(
arg,
"--verbose") || !strcmp(
arg,
"-v")) {
858 }
else if ((l = strtoul(
arg, &end, 10)) <= UINT_MAX &&
859 *end == '0円') {
861 } else {
862 return usage(argv[0]);
863 }
864 }
865
866 fprintf(stderr,
"checkasm: using random seed %u\n",
seed);
868
872
873 if (
state.num_failed) {
874 fprintf(stderr,
"checkasm: %d of %d tests have failed\n",
state.num_failed,
state.num_checked);
876 } else {
877 fprintf(stderr,
"checkasm: all %d tests passed\n",
state.num_checked);
878 if (
state.bench_pattern) {
880 }
881 }
882
886 }
887
888 /* Decide whether or not the specified function needs to be tested and
889 * allocate/initialize data structures if needed. Returns a pointer to a
890 * reference function if the function should be tested, otherwise NULL */
892 {
893 char name_buf[256];
896 int name_length;
898
902
903 if (!
func || name_length <= 0 || name_length >=
sizeof(name_buf))
905
907 state.funcs->color = 1;
908 v = &
state.current_func->versions;
909
912 do {
913 /* Only test functions that haven't already been tested */
916
919
920 prev = v;
921 }
while ((v = v->
next));
922
924 }
925
929 state.current_func_ver = v;
930
933
935 }
936
937 /* Decide whether or not the current function needs to be benchmarked */
939 {
940 return !
state.num_failed &&
state.bench_pattern &&
941 !strncmp(
state.current_func->name,
state.bench_pattern,
state.bench_pattern_len);
942 }
943
944 /* Indicate that the current test has failed */
946 {
947 if (
state.current_func_ver &&
state.current_func_ver->cpu &&
948 state.current_func_ver->ok)
949 {
951
955 vfprintf(stderr, msg,
arg);
957 fprintf(stderr, ")\n");
958
959 state.current_func_ver->ok = 0;
961 }
962 }
963
965 state.catch_signals = enabled;
966 }
967
970 #ifdef __GLIBC__
972 #else
974 s == SIGILL ?
"illegal instruction" :
975 s == SIGBUS ?
"bus error" :
976 "segmentation fault");
977 #endif
978 }
980 }
981
982 /* Get the benchmark context of the current function */
984 {
986 memset(perf, 0, sizeof(*perf));
988 return perf;
989 }
990
991 /* Print the outcome of all tests performed since the last time this function was called */
993 {
994 static int prev_checked, prev_failed, max_length;
995
996 if (
state.num_checked > prev_checked) {
997 int pad_length = max_length + 4;
999
1001 pad_length -= fprintf(stderr,
" - %s.",
state.current_test_name);
1003 pad_length -= vfprintf(stderr,
name,
arg);
1005 fprintf(stderr,
"%*c",
FFMAX(pad_length, 0) + 2,
'[');
1006
1007 if (
state.num_failed == prev_failed)
1009 else
1011 fprintf(stderr, "]\n");
1012
1013 prev_checked =
state.num_checked;
1014 prev_failed =
state.num_failed;
1015 }
else if (!
state.cpu_flag) {
1016 /* Calculate the amount of padding required to make the output vertically aligned */
1017 int length = strlen(
state.current_test_name);
1019
1023
1024 if (length > max_length)
1025 max_length = length;
1026 }
1027 }
1028
1029 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
1030 int checkasm_check_##type(const char *file, int line, \
1031 const type *buf1, ptrdiff_t stride1, \
1032 const type *buf2, ptrdiff_t stride2, \
1033 int w, int h, const char *name) \
1034 { \
1035 int y = 0; \
1036 stride1 /= sizeof(*buf1); \
1037 stride2 /= sizeof(*buf2); \
1038 for (y = 0; y < h; y++) \
1039 if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
1040 break; \
1041 if (y == h) \
1042 return 0; \
1043 checkasm_fail_func("%s:%d", file, line); \
1044 if (!state.verbose) \
1045 return 1; \
1046 fprintf(stderr, "%s:\n", name); \
1047 while (h--) { \
1048 for (int x = 0; x < w; x++) \
1049 fprintf(stderr, " " fmt, buf1[x]); \
1050 fprintf(stderr, " "); \
1051 for (int x = 0; x < w; x++) \
1052 fprintf(stderr, " " fmt, buf2[x]); \
1053 fprintf(stderr, " "); \
1054 for (int x = 0; x < w; x++) \
1055 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
1056 buf1 += stride1; \
1057 buf2 += stride2; \
1058 fprintf(stderr, "\n"); \
1059 } \
1060 return 1; \
1061 }
1062