1 /*
2 * Assembly testing and benchmarking tool
3 * Copyright (c) 2015 Henrik Gramner
4 * Copyright (c) 2008 Loren Merritt
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #ifndef TESTS_CHECKASM_CHECKASM_H
24 #define TESTS_CHECKASM_CHECKASM_H
25
26 #include <stdint.h>
27 #include "config.h"
28
29 #if CONFIG_LINUX_PERF
30 #include <unistd.h> // read(3)
31 #include <sys/ioctl.h>
32 #include <asm/unistd.h>
33 #include <linux/perf_event.h>
34 #elif CONFIG_MACOS_KPERF
36 #endif
37
44
45 #ifdef _WIN32
46 #include <windows.h>
47 #if ARCH_X86_32
48 #include <setjmp.h>
50 #define checkasm_save_context() checkasm_handle_signal(setjmp(checkasm_context_buf))
51 #define checkasm_load_context(s) longjmp(checkasm_context_buf, s)
52 #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
53 /* setjmp/longjmp on Windows on architectures using SEH (all except x86_32)
54 * will try to use SEH to unwind the stack, which doesn't work for assembly
55 * functions without unwind information. */
57 #define checkasm_save_context() \
58 (checkasm_context_buf.status = 0, \
59 RtlCaptureContext(&checkasm_context_buf.c), \
60 checkasm_handle_signal(checkasm_context_buf.status))
61 #define checkasm_load_context(s) \
62 (checkasm_context_buf.status = s, \
63 RtlRestoreContext(&checkasm_context_buf.c, NULL))
64 #else
65 #define checkasm_context void*
66 #define checkasm_save_context() 0
67 #define checkasm_load_context() do {} while (0)
68 #endif
69 #elif defined(_WASI_EMULATED_SIGNAL)
70 #define checkasm_context void*
71 #define checkasm_save_context() 0
72 #define checkasm_load_context() do {} while (0)
73 #else
74 #include <setjmp.h>
76 #define checkasm_save_context() checkasm_handle_signal(sigsetjmp(checkasm_context_buf, 1))
77 #define checkasm_load_context(s) siglongjmp(checkasm_context_buf, s)
78 #endif
79
163
165
174
175 /* float compare utilities */
184 unsigned max_ulp,
unsigned len);
188
190 #define rnd() av_lfg_get(&checkasm_lfg)
191
193
195
196 /* Decide whether or not the specified function needs to be tested */
197 #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
198
199 /* Declare the function prototype. The first argument is the return value, the remaining
200 * arguments are the function parameters. Naming parameters is optional. */
201 #define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
202 #define declare_func_float(ret, ...) declare_new_float(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
203 #define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
204
205 /* Indicate that the current test has failed */
206 #define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
207
208 /* Print the test outcome */
209 #define report checkasm_report
210
211 /* Call the reference function */
212 #define call_ref(...)\
213 (checkasm_set_signal_handler_state(1),\
214 ((func_type *)func_ref)(__VA_ARGS__));\
215 checkasm_set_signal_handler_state(0)
216
217 #if ARCH_X86 && HAVE_X86ASM
218 /* Verifies that clobbered callee-saved registers are properly saved and restored
219 * and that either no MMX registers are touched or emms is issued */
220 void checkasm_checked_call(
void *
func, ...);
221 /* Verifies that clobbered callee-saved registers are properly saved and restored
222 * and issues emms for asm functions which are not required to do so */
223 void checkasm_checked_call_emms(
void *
func, ...);
224 /* Verifies that clobbered callee-saved registers are properly saved and restored
225 * but doesn't issue emms. Meant for dsp functions returning float or double */
226 void checkasm_checked_call_float(
void *
func, ...);
227
228 #if ARCH_X86_64
229 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
230 * This is done by clobbering the stack with junk around the stack pointer and calling the
231 * assembly function through checked_call() with added dummy arguments which forces all
232 * real arguments to be passed on the stack and not in registers. For 32-bit arguments the
233 * upper half of the 64-bit register locations on the stack will now contain junk which will
234 * cause misbehaving functions to either produce incorrect output or segfault. Note that
235 * even though this works extremely well in practice, it's technically not guaranteed
236 * and false negatives is theoretically possible, but there can never be any false positives.
237 */
238 void checkasm_stack_clobber(uint64_t clobber, ...);
239 #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
240 = (void *)checkasm_checked_call;
241 #define declare_new_float(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
242 = (void *)checkasm_checked_call_float;
243 #define declare_new_emms(cpu_flags, ret, ...) \
244 ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
245 ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
246 (void *)checkasm_checked_call;
247 #define CLOB (UINT64_C(0xdeadbeefdeadbeef))
248 #define call_new(...) (checkasm_set_signal_handler_state(1),\
249 checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
250 CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
251 checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__));\
252 checkasm_set_signal_handler_state(0)
253 #elif ARCH_X86_32
254 #define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
255 #define declare_new_float(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call_float;
256 #define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \
257 ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
258 (void *)checkasm_checked_call;
259 #define call_new(...)\
260 (checkasm_set_signal_handler_state(1),\
261 checked_call(func_new, __VA_ARGS__));\
262 checkasm_set_signal_handler_state(0)
263 #endif
264 #elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
265 /* Use a dummy argument, to offset the real parameters by 2, not only 1.
266 * This makes sure that potential 8-byte-alignment of parameters is kept the same
267 * even when the extra parameters have been removed. */
268 void checkasm_checked_call_vfp(
void *
func,
int dummy, ...);
269 void checkasm_checked_call_novfp(
void *
func,
int dummy, ...);
270 extern void (*checkasm_checked_call)(
void *
func,
int dummy, ...);
271 #define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__, \
272 int, int, int, int, int, int, int, int, \
273 int, int, int, int, int, int, int) = (void *)checkasm_checked_call;
274 #define call_new(...) \
275 (checkasm_set_signal_handler_state(1),\
276 checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\
277 checkasm_set_signal_handler_state(0)
278 #elif ARCH_AARCH64 && !defined(__APPLE__)
279 void checkasm_stack_clobber(uint64_t clobber, ...);
280 void checkasm_checked_call(
void *
func, ...);
281 #define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, int, int, __VA_ARGS__,\
282 int, int, int, int, int, int, int, int,\
283 int, int, int, int, int, int, int)\
284 = (void *)checkasm_checked_call;
285 #define CLOB (UINT64_C(0xdeadbeefdeadbeef))
286 #define call_new(...) (checkasm_set_signal_handler_state(1),\
287 checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
288 CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
289 checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
290 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
291 checkasm_set_signal_handler_state(0)
292 #elif ARCH_RISCV
293 void checkasm_set_function(void *);
294 void *checkasm_get_wrapper(void);
295
296 #if HAVE_RV && (__riscv_xlen == 64) && defined (__riscv_d)
297 #define declare_new(ret, ...) \
298 ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper();
299 #define call_new(...) \
300 (checkasm_set_signal_handler_state(1),\
301 checkasm_set_function(func_new), checked_call(__VA_ARGS__));\
302 checkasm_set_signal_handler_state(0)
303 #else
304 #define declare_new(ret, ...)
305 #define call_new(...)\
306 (checkasm_set_signal_handler_state(1),\
307 ((func_type *)func_new)(__VA_ARGS__));\
308 checkasm_set_signal_handler_state(0)
309 #endif
310 #else
311 #define declare_new(ret, ...)
312 #define declare_new_float(ret, ...)
313 #define declare_new_emms(cpu_flags, ret, ...)
314 /* Call the function */
315 #define call_new(...)\
316 (checkasm_set_signal_handler_state(1),\
317 ((func_type *)func_new)(__VA_ARGS__));\
318 checkasm_set_signal_handler_state(0)
319 #endif
320
321 #ifndef declare_new_emms
322 #define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
323 #endif
324 #ifndef declare_new_float
325 #define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__)
326 #endif
327
333
334 #if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
335
336 #if CONFIG_LINUX_PERF
337 #define PERF_START(t) do { \
338 ioctl(sysfd, PERF_EVENT_IOC_RESET, 0); \
339 ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0); \
340 } while (0)
341 #define PERF_STOP(t) do { \
342 int ret; \
343 ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \
344 ret = read(sysfd, &t, sizeof(t)); \
345 (void)ret; \
346 } while (0)
347 #elif CONFIG_MACOS_KPERF
348 #define PERF_START(t) t = ff_kperf_cycles()
349 #define PERF_STOP(t) t = ff_kperf_cycles() - t
350 #else
351 #define PERF_START(t) t = AV_READ_TIME()
352 #define PERF_STOP(t) t = AV_READ_TIME() - t
353 #endif
354
355 #define CALL4(...)\
356 do {\
357 tfunc(__VA_ARGS__); \
358 tfunc(__VA_ARGS__); \
359 tfunc(__VA_ARGS__); \
360 tfunc(__VA_ARGS__); \
361 } while (0)
362
363 #define CALL16(...)\
364 do {\
365 CALL4(__VA_ARGS__); \
366 CALL4(__VA_ARGS__); \
367 CALL4(__VA_ARGS__); \
368 CALL4(__VA_ARGS__); \
369 } while (0)
370
371 /* Benchmark the function */
372 #define bench_new(...)\
373 do {\
374 if (checkasm_bench_func()) {\
375 struct CheckasmPerf *perf = checkasm_get_perf_context();\
376 av_unused const int sysfd = perf->sysfd;\
377 func_type *tfunc = func_new;\
378 uint64_t tsum = 0;\
379 uint64_t ti, tcount = 0;\
380 uint64_t t = 0; \
381 const uint64_t truns = FFMAX(bench_runs >> 3, 1);\
382 checkasm_set_signal_handler_state(1);\
383 for (ti = 0; ti < truns; ti++) {\
384 PERF_START(t);\
385 CALL16(__VA_ARGS__);\
386 CALL16(__VA_ARGS__);\
387 PERF_STOP(t);\
388 if (t*tcount <= tsum*4 && ti > 0) {\
389 tsum += t;\
390 tcount++;\
391 }\
392 }\
393 emms_c();\
394 perf->cycles += tsum;\
395 perf->iterations += tcount;\
396 checkasm_set_signal_handler_state(0);\
397 }\
398 } while (0)
399 #else
400 #define bench_new(...) while(0)
401 #define PERF_START(t) while(0)
402 #define PERF_STOP(t) while(0)
403 #endif
404
405 #define BUF_RECT(type, name, w, h) \
406 LOCAL_ALIGNED_32(type, name##_buf, [((h)+32)*(FFALIGN(w,64)+64) + 64]); \
407 av_unused ptrdiff_t name##_stride = sizeof(type)*(FFALIGN(w,64)+64); \
408 av_unused int name##_buf_h = (h)+32; \
409 type *name = name##_buf + (FFALIGN(w,64)+64)*16 + 64
410
411 #define PIXEL_RECT(name, w, h) \
412 LOCAL_ALIGNED_32(uint8_t, name##_buf, [sizeof(uint16_t) * (((h)+32)*(FFALIGN(w,64)+64) + 64)],); \
413 av_unused ptrdiff_t name##_stride = sizeof(uint16_t) * (FFALIGN(w,64)+64); \
414 av_unused int name##_buf_h = (h)+32; \
415 uint8_t *name = name##_buf + (FFALIGN(w,64)+64)*16 + 64
416
417 #define CLEAR_BUF_RECT(name) \
418 memset(name##_buf, 0x99, name##_stride * name##_buf_h + 64)
419 #define CLEAR_PIXEL_RECT(name) \
420 CLEAR_BUF_RECT(name)
421
422 #define DECL_CHECKASM_CHECK_FUNC(type) \
423 int checkasm_check_##type(const char *file, int line, \
424 const type *buf1, ptrdiff_t stride1, \
425 const type *buf2, ptrdiff_t stride2, \
426 int w, int h, const char *name, \
427 int align_w, int align_h, \
428 int padding)
429
435
437 const float *buf1, ptrdiff_t stride1,
438 const float *buf2, ptrdiff_t stride2,
439 int w,
int h,
const char *
name,
440 unsigned max_ulp, int align_w, int align_h,
441 int padding);
442
443 #define PASTE(a,b) a ## b
444 #define CONCAT(a,b) PASTE(a,b)
445
446 #define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
447 #define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0)
448 /* Check a pointer from BUF_RECT, checking whether there have been
449 * writes outside of the designated area. */
450 #define checkasm_check_padded(...) \
451 checkasm_check2(__VA_ARGS__, 1, 1, 8)
452 /* Check a pointer from BUF_RECT, checking whether there have been
453 * writes outside of the designated area. Allow writing slightly past the
454 * end of the buffer, by aligning w/h to align_w/align_h, and checking
455 * for overwrites outside of that. */
456 #define checkasm_check_padded_align(...) \
457 checkasm_check2(__VA_ARGS__, 8)
458
459 /* This assumes that there is a local variable named "bit_depth".
460 * For tests that don't have that and only operate on a single
461 * bitdepth, just call checkasm_check(uint8_t, ...) directly. */
462 #define checkasm_check_pixel2(buf1, stride1, buf2, stride2, ...) \
463 ((bit_depth > 8) ? \
464 checkasm_check2(uint16_t, (const uint16_t*)buf1, stride1, \
465 (const uint16_t*)buf2, stride2, \
466 __VA_ARGS__) : \
467 checkasm_check2(uint8_t, (const uint8_t*) buf1, stride1, \
468 (const uint8_t*) buf2, stride2, \
469 __VA_ARGS__))
470 #define checkasm_check_pixel(...) \
471 checkasm_check_pixel2(__VA_ARGS__, 0, 0, 0)
472 #define checkasm_check_pixel_padded(...) \
473 checkasm_check_pixel2(__VA_ARGS__, 1, 1, 8)
474 #define checkasm_check_pixel_padded_align(...) \
475 checkasm_check_pixel2(__VA_ARGS__, 8)
476
477 /* This assumes that there is a local variable named "bit_depth"
478 * and that the type-specific buffers obey the name ## _BITDEPTH
479 * convention.
480 * For tests that don't have that and only operate on a single
481 * bitdepth, just call checkasm_check(uint8_t, ...) directly. */
482 #define checkasm_check_dctcoef(buf1, stride1, buf2, stride2, ...) \
483 ((bit_depth > 8) ? \
484 checkasm_check(int32_t, buf1 ## _32, stride1, \
485 buf2 ## _32, stride2, \
486 __VA_ARGS__) : \
487 checkasm_check(int16_t, buf1 ## _16, stride1, \
488 buf2 ## _16, stride2, \
489 __VA_ARGS__))
490
491 #endif /* TESTS_CHECKASM_CHECKASM_H */