1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36
41
49
50 #undef printf
51
52 // BFIN
55
56 // ALTIVEC
58
59 // ARM
65
67
75 };
76
78
84
85 #if HAVE_MMX_INLINE
87 #endif
88 #if HAVE_MMXEXT_INLINE
90 #endif
91 #if HAVE_SSE2_INLINE
93 #endif
94
95 #if HAVE_ALTIVEC
97 #endif
98
99 #if ARCH_BFIN
101 #endif
102
103 { 0 }
104 };
105
106 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
108 int16_t *
block, int16_t *qmat);
109
110 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
113 int i;
114
115 for(i=0; i<64; i++){
116 qmat[i]=4;
117 tmp[i]= dst[i];
118 }
120 }
121 #endif
122
128
129 #if HAVE_MMX_INLINE
132 #endif
133 #if HAVE_MMXEXT_INLINE
135 #endif
136 #if HAVE_SSE2_INLINE
138 #if ARCH_X86_64 && HAVE_YASM
140 #endif
141 #endif
142
143 #if ARCH_BFIN
145 #endif
146
147 #if ARCH_ARM
150 #endif
151 #if HAVE_ARMV5TE
153 #endif
154 #if HAVE_ARMV6
156 #endif
157 #if HAVE_NEON
159 #endif
160
161 #if ARCH_ALPHA
163 #endif
164
165 { 0 }
166 };
167
168 #define AANSCALE_BITS 12
169
171 #define NB_ITS_SPEED 50000
172
174
176 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
177 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
178 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
179 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
180 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
181 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
182 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
183 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
184 };
185
187
189 {
190 int i;
191
192 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
193 for (i = 0; i < 64; i++) {
194 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
195 }
196 }
197
200
202 {
203 int i, j;
204
205 memset(block, 0, 64 * sizeof(*block));
206
207 switch (test) {
208 case 0:
209 for (i = 0; i < 64; i++)
210 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
211 if (is_idct) {
213 for (i = 0; i < 64; i++)
214 block[i] >>= 3;
215 }
216 break;
217 case 1:
219 for (i = 0; i < j; i++) {
221 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
222 }
223 break;
224 case 2:
225 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
226 block[63] = (block[0] & 1) ^ 1;
227 break;
228 }
229 }
230
232 {
233 int i;
234
236 for (i = 0; i < 64; i++)
237 dst[idct_mmx_perm[i]] = src[i];
239 for (i = 0; i < 64; i++)
240 dst[idct_simple_mmx_perm[i]] = src[i];
242 for (i = 0; i < 64; i++)
243 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
245 for (i = 0; i < 64; i++)
246 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
248 for (i = 0; i < 64; i++)
249 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
250 } else {
251 for (i = 0; i < 64; i++)
252 dst[i] = src[i];
253 }
254 }
255
257 {
261 int64_t err2, ti, ti1, it1, err_sum = 0;
262 int64_t sysErr[64], sysErrMax = 0;
263 int maxout = 0;
264 int blockSumErrMax = 0, blockSumErr;
266 const int vals=1<<
bits;
267 double omse, ome;
268 int spec_err;
269
271
272 err_inf = 0;
273 err2 = 0;
274 for (i = 0; i < 64; i++)
275 sysErr[i] = 0;
276 for (it = 0; it <
NB_ITS; it++) {
279
282
284 for (i = 0; i < 64; i++) {
287 }
288 }
289
291
292 blockSumErr = 0;
293 for (i = 0; i < 64; i++) {
295 err_sum += err;
296 v = abs(err);
297 if (v > err_inf)
300 sysErr[i] +=
block[i] - block1[i];
302 if (abs(
block[i]) > maxout)
303 maxout = abs(
block[i]);
304 }
305 if (blockSumErrMax < blockSumErr)
306 blockSumErrMax = blockSumErr;
307 }
308 for (i = 0; i < 64; i++)
309 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
310
311 for (i = 0; i < 64; i++) {
312 if (i % 8 == 0)
313 printf("\n");
314 printf("%7d ", (int) sysErr[i]);
315 }
316 printf("\n");
317
318 omse = (double) err2 / NB_ITS / 64;
319 ome = (double) err_sum / NB_ITS / 64;
320
321 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
322
323 printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
324 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
325 omse, ome, (double) sysErrMax / NB_ITS,
326 maxout, blockSumErrMax);
327
329 return 1;
330
331 if (!speed)
332 return 0;
333
334 /* speed test */
335
338
340 it1 = 0;
341 do {
345 }
349 } while (ti1 < 1000000);
350
351 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
352 (double) it1 * 1000.0 / (double) ti1);
353
354 return 0;
355 }
356
359
361 {
363 static double c8[8][8];
364 static double c4[4][4];
365 double block1[64], block2[64], block3[64];
367 int i, j, k;
368
369 if (!init) {
370 init = 1;
371
372 for (i = 0; i < 8; i++) {
373 sum = 0;
374 for (j = 0; j < 8; j++) {
375 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
376 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
377 sum += c8[i][j] * c8[i][j];
378 }
379 }
380
381 for (i = 0; i < 4; i++) {
382 sum = 0;
383 for (j = 0; j < 4; j++) {
384 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
385 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
386 sum += c4[i][j] * c4[i][j];
387 }
388 }
389 }
390
391 /* butterfly */
392 s = 0.5 * sqrt(2.0);
393 for (i = 0; i < 4; i++) {
394 for (j = 0; j < 8; j++) {
395 block1[8 * (2 * i) + j] =
396 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
397 block1[8 * (2 * i + 1) + j] =
398 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
399 }
400 }
401
402 /* idct8 on lines */
403 for (i = 0; i < 8; i++) {
404 for (j = 0; j < 8; j++) {
405 sum = 0;
406 for (k = 0; k < 8; k++)
407 sum += c8[k][j] * block1[8 * i + k];
408 block2[8 * i + j] = sum;
409 }
410 }
411
412 /* idct4 */
413 for (i = 0; i < 8; i++) {
414 for (j = 0; j < 4; j++) {
415 /* top */
416 sum = 0;
417 for (k = 0; k < 4; k++)
418 sum += c4[k][j] * block2[8 * (2 * k) + i];
419 block3[8 * (2 * j) + i] = sum;
420
421 /* bottom */
422 sum = 0;
423 for (k = 0; k < 4; k++)
424 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
425 block3[8 * (2 * j + 1) + i] = sum;
426 }
427 }
428
429 /* clamp and store the result */
430 for (i = 0; i < 8; i++) {
431 for (j = 0; j < 8; j++) {
432 v = block3[8 * i + j];
433 if (v < 0) v = 0;
434 else if (v > 255) v = 255;
435 dest[i * linesize + j] = (int)
rint(v);
436 }
437 }
438 }
439
441 void (*idct248_put)(
uint8_t *dest,
int line_size,
443 int speed)
444 {
445 int it, i, it1, ti, ti1, err_max,
v;
447
449
450 /* just one test to see if code is correct (precision is less
451 important here) */
452 err_max = 0;
453 for (it = 0; it <
NB_ITS; it++) {
454 /* XXX: use forward transform to generate values */
455 for (i = 0; i < 64; i++)
458
459 for (i = 0; i < 64; i++)
462
463 for (i = 0; i < 64; i++)
466
467 for (i = 0; i < 64; i++) {
469 if (v == 255)
471 if (v > err_max)
473 }
474 #if 0
475 printf("ref=\n");
476 for(i=0;i<8;i++) {
477 int j;
478 for(j=0;j<8;j++) {
480 }
481 printf("\n");
482 }
483
484 printf("out=\n");
485 for(i=0;i<8;i++) {
486 int j;
487 for(j=0;j<8;j++) {
489 }
490 printf("\n");
491 }
492 #endif
493 }
494 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
495
496 if (!speed)
497 return;
498
500 it1 = 0;
501 do {
503 for (i = 0; i < 64; i++)
506 }
510 } while (ti1 < 1000000);
511
512 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
513 (double) it1 * 1000.0 / (double) ti1);
514 }
515
517 {
518 printf("dct-test [-i] [<test-number>] [<bits>]\n"
519 "test-number 0 -> test with random matrixes\n"
520 " 1 -> test with random sparse matrixes\n"
521 " 2 -> do 3. test from mpeg4 std\n"
522 "bits Number of time domain bits to use, 8 is default\n"
523 "-i test IDCT implementations\n"
524 "-4 test IDCT248 implementations\n"
525 "-t speed test\n");
526 }
527
528 #if !HAVE_GETOPT
530 #endif
531
532 int main(
int argc,
char **argv)
533 {
534 int test_idct = 0, test_248_dct = 0;
537 int speed = 0;
538 int err = 0;
540
542
545
546 for (;;) {
547 c =
getopt(argc, argv,
"ih4t");
548 if (c == -1)
549 break;
550 switch (c) {
551 case 'i':
552 test_idct = 1;
553 break;
554 case '4':
555 test_248_dct = 1;
556 break;
557 case 't':
558 speed = 1;
559 break;
560 default:
561 case 'h':
563 return 0;
564 }
565 }
566
568 test = atoi(argv[
optind]);
569 if(optind+1 < argc) bits= atoi(argv[optind+1]);
570
571 printf("ffmpeg DCT/IDCT test\n");
572
573 if (test_248_dct) {
575 } else {
576 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
577 for (i = 0; algos[i].
name; i++)
579 err |=
dct_error(&algos[i], test, test_idct, speed, bits);
580 }
581 }
582
583 if (err)
584 printf("Error: %d.\n", err);
585
586 return !!err;
587 }