1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
26 */
27
28 #include "config.h"
29 #include "config_components.h"
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #if HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 #include <math.h>
37
44
54 #if CONFIG_PRORES_DECODER
56 #endif
57
64 };
65
70 #if CONFIG_FAANDCT
72 #endif /* CONFIG_FAANDCT */
73 };
74
75 #if CONFIG_PRORES_DECODER
76 static void ff_prores_idct_wrap(int16_t *
dst){
79
82 }
86 }
87 }
88 #endif
89
96 #if CONFIG_PRORES_DECODER
98 #endif
99 #if CONFIG_FAANIDCT
101 #endif /* CONFIG_FAANIDCT */
102 #if CONFIG_MPEG4_DECODER
104 #endif /* CONFIG_MPEG4_DECODER */
105 };
106
107 #if ARCH_AARCH64
109 #elif ARCH_ARM
111 #elif ARCH_PPC
113 #elif ARCH_X86
115 #else
118 #endif
119
120 #define AANSCALE_BITS 12
121
123 #define NB_ITS_SPEED 50000
124
127
129 {
131
133
135 case 0:
136 for (
i = 0;
i < 64;
i++)
138 if (is_idct) {
140 for (
i = 0;
i < 64;
i++)
142 }
143 break;
144 case 1:
146 for (
i = 0;
i < j;
i++) {
149 }
150 break;
151 case 2:
154 break;
155 }
156 }
157
160 {
162
163 #if ARCH_X86
165 return;
166 #endif
167
170 for (
i = 0;
i < 64;
i++)
171 dst[(
i & 0x38) | ((
i & 6) >> 1) | ((
i & 1) << 2)] =
src[
i];
172 break;
174 for (
i = 0;
i < 64;
i++)
175 dst[(
i & 0x24) | ((
i & 3) << 3) | ((
i >> 3) & 3)] =
src[
i];
176 break;
178 for (
i = 0;
i < 64;
i++)
180 break;
181 default:
182 for (
i = 0;
i < 64;
i++)
184 break;
185 }
186 }
187
189 {
192 int err_inf, v;
193 int64_t err2, ti, ti1, it1, err_sum = 0;
194 int64_t sysErr[64], sysErrMax = 0;
195 int64_t err2_matrix[64], err2_max = 0;
196 int maxout = 0;
197 int blockSumErrMax = 0, blockSumErr;
199 const int vals=1<<
bits;
200 double omse, ome;
201 int spec_err;
202
204
205 err_inf = 0;
206 err2 = 0;
207 for (
i = 0;
i < 64;
i++)
208 err2_matrix[
i] = sysErr[
i] = 0;
212
214
215 if (!strcmp(
dct->name,
"IJG-AAN-INT")) {
216 for (
i = 0;
i < 64;
i++) {
219 }
220 }
221
223 if (!strcmp(
dct->name,
"PR-SSE2"))
224 for (
i = 0;
i < 64;
i++)
226
227 blockSumErr = 0;
228 for (
i = 0;
i < 64;
i++) {
230 err_sum += err;
232 if (v > err_inf)
233 err_inf = v;
237 blockSumErr += v;
240 }
241 if (blockSumErrMax < blockSumErr)
242 blockSumErrMax = blockSumErr;
243 }
244 for (
i = 0;
i < 64;
i++) {
246 err2_max =
FFMAX(err2_max ,
FFABS(err2_matrix[
i]));
247 }
248
249 for (
i = 0;
i < 64;
i++) {
252 printf(
"%7d ", (
int) sysErr[
i]);
253 }
255
258
259 spec_err = is_idct && (err_inf > 1 || omse > 0.02 ||
fabs(ome) > 0.0015);
261 spec_err = is_idct && ((
double) err2_max /
NB_ITS > 0.06 || (
double) sysErrMax /
NB_ITS > 0.015);
262
263 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
264 is_idct ?
"IDCT" :
"DCT",
dct->name, err_inf,
265 omse, ome, (
double) sysErrMax /
NB_ITS,
266 maxout, blockSumErrMax);
267
268 if (spec_err && !
dct->nonspec) {
270 return 1;
271 }
272
273 if (!speed)
274 return 0;
275
276 /* speed test */
277
280
282 it1 = 0;
283 do {
287 }
290 } while (ti1 < 1000000);
291
292 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT",
dct->name,
293 (double) it1 * 1000.0 / (double) ti1);
294
295 return 0;
296 }
297
300
302 {
304 static double c8[8][8];
305 static double c4[4][4];
306 double block1[64], block2[64], block3[64];
309
312
313 for (
i = 0;
i < 8;
i++) {
314 sum = 0;
315 for (j = 0; j < 8; j++) {
316 s = (
i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
317 c8[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 8.0);
318 sum += c8[
i][j] * c8[
i][j];
319 }
320 }
321
322 for (
i = 0;
i < 4;
i++) {
323 sum = 0;
324 for (j = 0; j < 4; j++) {
325 s = (
i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
326 c4[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 4.0);
327 sum += c4[
i][j] * c4[
i][j];
328 }
329 }
330 }
331
332 /* butterfly */
334 for (
i = 0;
i < 4;
i++) {
335 for (j = 0; j < 8; j++) {
340 }
341 }
342
343 /* idct8 on lines */
344 for (
i = 0;
i < 8;
i++) {
345 for (j = 0; j < 8; j++) {
346 sum = 0;
347 for (k = 0; k < 8; k++)
348 sum += c8[k][j] *
block1[8 *
i + k];
349 block2[8 *
i + j] = sum;
350 }
351 }
352
353 /* idct4 */
354 for (
i = 0;
i < 8;
i++) {
355 for (j = 0; j < 4; j++) {
356 /* top */
357 sum = 0;
358 for (k = 0; k < 4; k++)
359 sum += c4[k][j] * block2[8 * (2 * k) +
i];
360 block3[8 * (2 * j) +
i] = sum;
361
362 /* bottom */
363 sum = 0;
364 for (k = 0; k < 4; k++)
365 sum += c4[k][j] * block2[8 * (2 * k + 1) +
i];
366 block3[8 * (2 * j + 1) +
i] = sum;
367 }
368 }
369
370 /* clamp and store the result */
371 for (
i = 0;
i < 8;
i++) {
372 for (j = 0; j < 8; j++) {
373 v = block3[8 *
i + j];
374 if (v < 0) v = 0;
375 else if (v > 255) v = 255;
376 dest[
i * linesize + j] = (int)
rint(v);
377 }
378 }
379 }
380
382 void (*idct248_put)(uint8_t *dest,
383 ptrdiff_t line_size,
385 int speed)
386 {
387 int it,
i, it1, ti, ti1, err_max, v;
389
391
392 /* just one test to see if code is correct (precision is less
393 important here) */
394 err_max = 0;
396 /* XXX: use forward transform to generate values */
397 for (
i = 0;
i < 64;
i++)
400
401 for (
i = 0;
i < 64;
i++)
404
405 for (
i = 0;
i < 64;
i++)
408
409 for (
i = 0;
i < 64;
i++) {
411 if (v == 255)
413 if (v > err_max)
414 err_max = v;
415 }
416 #if 0
419 int j;
420 for(j=0;j<8;j++) {
422 }
424 }
425
428 int j;
429 for(j=0;j<8;j++) {
431 }
433 }
434 #endif
435 }
436 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248",
name, err_max);
437
438 if (!speed)
439 return;
440
442 it1 = 0;
443 do {
445 for (
i = 0;
i < 64;
i++)
448 }
451 } while (ti1 < 1000000);
452
453 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248",
name,
454 (double) it1 * 1000.0 / (double) ti1);
455 }
456
458 {
459 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
460 "test-number 0 -> test with random matrixes\n"
461 " 1 -> test with random sparse matrixes\n"
462 " 2 -> do 3. test from MPEG-4 std\n"
463 "bits Number of time domain bits to use, 8 is default\n"
464 "-i test IDCT implementations\n"
465 "-4 test IDCT248 implementations\n"
466 "-t speed test\n");
467 }
468
469 #if !HAVE_GETOPT
471 #endif
472
473 int main(
int argc,
char **argv)
474 {
475 int test_idct = 0, test_248_dct = 0;
478 int speed = 0;
479 int err = 0;
481
483
484 for (;;) {
485 c =
getopt(argc, argv,
"ih4t");
487 break;
489 case 'i':
490 test_idct = 1;
491 break;
492 case '4':
493 test_248_dct = 1;
494 break;
495 case 't':
496 speed = 1;
497 break;
498 default:
499 case 'h':
501 return 0;
502 }
503 }
504
508
509 printf(
"ffmpeg DCT/IDCT test\n");
510
511 if (test_248_dct) {
513 } else {
515 if (test_idct) {
518
522 }
523 #if CONFIG_FDCTDSP
524 else {
527
531 }
532 #endif /* CONFIG_FDCTDSP */
533 }
534
535 if (err)
536 printf(
"Error: %d.\n", err);
537
538 return !!err;
539 }