FFmpeg: libavcodec/dct-test.c Source File

FFmpeg

[フレーム]

libavcodec

dct-test.c

Go to the documentation of this file.

1 /*

3 * 2007 Marc Hoffman <marc.hoffman@analog.com>

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 /**

23 * @file

25 * Started from sample code by Juan J. Sierralta P.

26 */

28 #include "config.h"

29 #include <stdlib.h>

30 #include <stdio.h>

31 #include <string.h>

32 #if HAVE_UNISTD_H

33 #include <unistd.h>

34 #endif

35 #include <math.h>

37 #include "libavutil/cpu.h"

38 #include "libavutil/common.h"

39 #include "libavutil/lfg.h"

40 #include "libavutil/time.h"

42 #include "dct.h"

43 #include "simple_idct.h"

44 #include "aandcttab.h"

45 #include "faandct.h"

46 #include "faanidct.h"

47 #include "x86/idct_xvid.h"

48 #include "dctref.h"

50 #undef printf

52 // BFIN

53 void ff_bfin_idct(int16_t *block);

54 void ff_bfin_fdct(int16_t *block);

56 // ALTIVEC

57 void ff_fdct_altivec(int16_t *block);

59 // ARM

60 void ff_j_rev_dct_arm(int16_t *data);

61 void ff_simple_idct_arm(int16_t *data);

62 void ff_simple_idct_armv5te(int16_t *data);

63 void ff_simple_idct_armv6(int16_t *data);

64 void ff_simple_idct_neon(int16_t *data);

66 void ff_simple_idct_axp(int16_t *data);

68 struct algo {

69 const char *name;

70 void (*func)(int16_t *block);

71 enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,

72 SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format;

73 int mm_support;

74 int nonspec;

75 };

77 static int cpu_flags;

79 static const struct algo fdct_tab[] = {

80 { "REF-DBL", ff_ref_fdct, NO_PERM },

81 { "FAAN", ff_faandct, NO_PERM },

82 { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },

83 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },

85 #if HAVE_MMX_INLINE

86 { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },

87 #endif

88 #if HAVE_MMXEXT_INLINE

89 { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT },

90 #endif

91 #if HAVE_SSE2_INLINE

92 { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },

93 #endif

95 #if HAVE_ALTIVEC

96 { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },

97 #endif

99 #if ARCH_BFIN

100 { "BFINfdct", ff_bfin_fdct, NO_PERM },

101 #endif

102

103 { 0 }

104 };

105

106 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM

107 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,

108 int16_t *block, int16_t *qmat);

109

110 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){

111 DECLARE_ALIGNED(16, static int16_t, qmat)[64];

112 DECLARE_ALIGNED(16, static int16_t, tmp)[64];

113 int i;

114

115 for(i=0; i<64; i++){

116 qmat[i]=4;

117 tmp[i]= dst[i];

118 }

119 ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);

120 }

121 #endif

122

123 static const struct algo idct_tab[] = {

124 { "FAANI", ff_faanidct, NO_PERM },

125 { "REF-DBL", ff_ref_idct, NO_PERM },

126 { "INT", ff_j_rev_dct, MMX_PERM },

127 { "SIMPLE-C", ff_simple_idct_8, NO_PERM },

128

129 #if HAVE_MMX_INLINE

130 { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },

131 { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },

132 #endif

133 #if HAVE_MMXEXT_INLINE

134 { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },

135 #endif

136 #if HAVE_SSE2_INLINE

137 { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },

138 #if ARCH_X86_64 && HAVE_YASM

139 { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },

140 #endif

141 #endif

142

143 #if ARCH_BFIN

144 { "BFINidct", ff_bfin_idct, NO_PERM },

145 #endif

146

147 #if ARCH_ARM

148 { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },

149 { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },

150 #endif

151 #if HAVE_ARMV5TE

152 { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },

153 #endif

154 #if HAVE_ARMV6

155 { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },

156 #endif

157 #if HAVE_NEON

158 { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },

159 #endif

160

161 #if ARCH_ALPHA

162 { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },

163 #endif

164

165 { 0 }

166 };

167

168 #define AANSCALE_BITS 12

169

170 #define NB_ITS 20000

171 #define NB_ITS_SPEED 50000

172

173 static short idct_mmx_perm[64];

174

175 static short idct_simple_mmx_perm[64] = {

176 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,

177 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,

178 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,

179 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,

180 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,

181 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,

182 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,

183 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,

184 };

185

186 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };

187

188 static void idct_mmx_init(void)

189 {

190 int i;

191

192 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */

193 for (i = 0; i < 64; i++) {

194 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);

195 }

196 }

197

198 DECLARE_ALIGNED(16, static int16_t, block)[64];

199 DECLARE_ALIGNED(8, static int16_t, block1)[64];

200

201 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)

202 {

203 int i, j;

204

205 memset(block, 0, 64 * sizeof(*block));

206

207 switch (test) {

208 case 0:

209 for (i = 0; i < 64; i++)

210 block[i] = (av_lfg_get(prng) % (2*vals)) -vals;

211 if (is_idct) {

212 ff_ref_fdct(block);

213 for (i = 0; i < 64; i++)

214 block[i] >>= 3;

215 }

216 break;

217 case 1:

218 j = av_lfg_get(prng) % 10 + 1;

219 for (i = 0; i < j; i++) {

220 int idx = av_lfg_get(prng) % 64;

221 block[idx] = av_lfg_get(prng) % (2*vals) -vals;

222 }

223 break;

224 case 2:

225 block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);

226 block[63] = (block[0] & 1) ^ 1;

227 break;

228 }

229 }

230

231 static void permute(int16_t dst[64], const int16_t src[64], int perm)

232 {

233 int i;

234

235 if (perm == MMX_PERM) {

236 for (i = 0; i < 64; i++)

237 dst[idct_mmx_perm[i]] = src[i];

238 } else if (perm == MMX_SIMPLE_PERM) {

239 for (i = 0; i < 64; i++)

240 dst[idct_simple_mmx_perm[i]] = src[i];

241 } else if (perm == SSE2_PERM) {

242 for (i = 0; i < 64; i++)

243 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];

244 } else if (perm == PARTTRANS_PERM) {

245 for (i = 0; i < 64; i++)

246 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];

247 } else if (perm == TRANSPOSE_PERM) {

248 for (i = 0; i < 64; i++)

249 dst[(i>>3) | ((i<<3)&0x38)] = src[i];

250 } else {

251 for (i = 0; i < 64; i++)

252 dst[i] = src[i];

253 }

254 }

255

256 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)

257 {

258 void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;

259 int it, i, scale;

260 int err_inf, v;

261 int64_t err2, ti, ti1, it1, err_sum = 0;

262 int64_t sysErr[64], sysErrMax = 0;

263 int maxout = 0;

264 int blockSumErrMax = 0, blockSumErr;

265 AVLFG prng;

266 const int vals=1<<bits;

267 double omse, ome;

268 int spec_err;

269

270 av_lfg_init(&prng, 1);

271

272 err_inf = 0;

273 err2 = 0;

274 for (i = 0; i < 64; i++)

275 sysErr[i] = 0;

276 for (it = 0; it < NB_ITS; it++) {

277 init_block(block1, test, is_idct, &prng, vals);

278 permute(block, block1, dct->format);

279

280 dct->func(block);

281 emms_c();

282

283 if (dct->format == SCALE_PERM) {

284 for (i = 0; i < 64; i++) {

285 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];

286 block[i] = (block[i] * scale) >> AANSCALE_BITS;

287 }

288 }

289

290 ref(block1);

291

292 blockSumErr = 0;

293 for (i = 0; i < 64; i++) {

294 int err = block[i] - block1[i];

295 err_sum += err;

296 v = abs(err);

297 if (v > err_inf)

298 err_inf = v;

299 err2 += v * v;

300 sysErr[i] += block[i] - block1[i];

301 blockSumErr += v;

302 if (abs(block[i]) > maxout)

303 maxout = abs(block[i]);

304 }

305 if (blockSumErrMax < blockSumErr)

306 blockSumErrMax = blockSumErr;

307 }

308 for (i = 0; i < 64; i++)

309 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));

310

311 for (i = 0; i < 64; i++) {

312 if (i % 8 == 0)

313 printf("\n");

314 printf("%7d ", (int) sysErr[i]);

315 }

316 printf("\n");

317

318 omse = (double) err2 / NB_ITS / 64;

319 ome = (double) err_sum / NB_ITS / 64;

320

321 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);

322

323 printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",

324 is_idct ? "IDCT" : "DCT", dct->name, err_inf,

325 omse, ome, (double) sysErrMax / NB_ITS,

326 maxout, blockSumErrMax);

327

328 if (spec_err && !dct->nonspec)

329 return 1;

330

331 if (!speed)

332 return 0;

333

334 /* speed test */

335

336 init_block(block, test, is_idct, &prng, vals);

337 permute(block1, block, dct->format);

338

339 ti = av_gettime();

340 it1 = 0;

341 do {

342 for (it = 0; it < NB_ITS_SPEED; it++) {

343 memcpy(block, block1, sizeof(block));

344 dct->func(block);

345 }

346 emms_c();

347 it1 += NB_ITS_SPEED;

348 ti1 = av_gettime() - ti;

349 } while (ti1 < 1000000);

350

351 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,

352 (double) it1 * 1000.0 / (double) ti1);

353

354 return 0;

355 }

356

357 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];

358 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];

359

360 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)

361 {

362 static int init;

363 static double c8[8][8];

364 static double c4[4][4];

365 double block1[64], block2[64], block3[64];

366 double s, sum, v;

367 int i, j, k;

368

369 if (!init) {

370 init = 1;

371

372 for (i = 0; i < 8; i++) {

373 sum = 0;

374 for (j = 0; j < 8; j++) {

375 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);

376 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);

377 sum += c8[i][j] * c8[i][j];

378 }

379 }

380

381 for (i = 0; i < 4; i++) {

382 sum = 0;

383 for (j = 0; j < 4; j++) {

384 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);

385 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);

386 sum += c4[i][j] * c4[i][j];

387 }

388 }

389 }

390

391 /* butterfly */

392 s = 0.5 * sqrt(2.0);

393 for (i = 0; i < 4; i++) {

394 for (j = 0; j < 8; j++) {

395 block1[8 * (2 * i) + j] =

396 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;

397 block1[8 * (2 * i + 1) + j] =

398 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;

399 }

400 }

401

402 /* idct8 on lines */

403 for (i = 0; i < 8; i++) {

404 for (j = 0; j < 8; j++) {

405 sum = 0;

406 for (k = 0; k < 8; k++)

407 sum += c8[k][j] * block1[8 * i + k];

408 block2[8 * i + j] = sum;

409 }

410 }

411

412 /* idct4 */

413 for (i = 0; i < 8; i++) {

414 for (j = 0; j < 4; j++) {

415 /* top */

416 sum = 0;

417 for (k = 0; k < 4; k++)

418 sum += c4[k][j] * block2[8 * (2 * k) + i];

419 block3[8 * (2 * j) + i] = sum;

420

421 /* bottom */

422 sum = 0;

423 for (k = 0; k < 4; k++)

424 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];

425 block3[8 * (2 * j + 1) + i] = sum;

426 }

427 }

428

429 /* clamp and store the result */

430 for (i = 0; i < 8; i++) {

431 for (j = 0; j < 8; j++) {

432 v = block3[8 * i + j];

433 if (v < 0) v = 0;

434 else if (v > 255) v = 255;

435 dest[i * linesize + j] = (int) rint(v);

436 }

437 }

438 }

439

440 static void idct248_error(const char *name,

441 void (*idct248_put)(uint8_t *dest, int line_size,

442 int16_t *block),

443 int speed)

444 {

445 int it, i, it1, ti, ti1, err_max, v;

446 AVLFG prng;

447

448 av_lfg_init(&prng, 1);

449

450 /* just one test to see if code is correct (precision is less

451 important here) */

452 err_max = 0;

453 for (it = 0; it < NB_ITS; it++) {

454 /* XXX: use forward transform to generate values */

455 for (i = 0; i < 64; i++)

456 block1[i] = av_lfg_get(&prng) % 256 - 128;

457 block1[0] += 1024;

458

459 for (i = 0; i < 64; i++)

460 block[i] = block1[i];

461 idct248_ref(img_dest1, 8, block);

462

463 for (i = 0; i < 64; i++)

464 block[i] = block1[i];

465 idct248_put(img_dest, 8, block);

466

467 for (i = 0; i < 64; i++) {

468 v = abs((int) img_dest[i] - (int) img_dest1[i]);

469 if (v == 255)

470 printf("%d %d\n", img_dest[i], img_dest1[i]);

471 if (v > err_max)

472 err_max = v;

473 }

474 #if 0

475 printf("ref=\n");

476 for(i=0;i<8;i++) {

477 int j;

478 for(j=0;j<8;j++) {

479 printf(" %3d", img_dest1[i*8+j]);

480 }

481 printf("\n");

482 }

483

484 printf("out=\n");

485 for(i=0;i<8;i++) {

486 int j;

487 for(j=0;j<8;j++) {

488 printf(" %3d", img_dest[i*8+j]);

489 }

490 printf("\n");

491 }

492 #endif

493 }

494 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);

495

496 if (!speed)

497 return;

498

499 ti = av_gettime();

500 it1 = 0;

501 do {

502 for (it = 0; it < NB_ITS_SPEED; it++) {

503 for (i = 0; i < 64; i++)

504 block[i] = block1[i];

505 idct248_put(img_dest, 8, block);

506 }

507 emms_c();

508 it1 += NB_ITS_SPEED;

509 ti1 = av_gettime() - ti;

510 } while (ti1 < 1000000);

511

512 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,

513 (double) it1 * 1000.0 / (double) ti1);

514 }

515

516 static void help(void)

517 {

518 printf("dct-test [-i] [<test-number>] [<bits>]\n"

519 "test-number 0 -> test with random matrixes\n"

520 " 1 -> test with random sparse matrixes\n"

521 " 2 -> do 3. test from mpeg4 std\n"

522 "bits Number of time domain bits to use, 8 is default\n"

523 "-i test IDCT implementations\n"

524 "-4 test IDCT248 implementations\n"

525 "-t speed test\n");

526 }

527

528 #if !HAVE_GETOPT

529 #include "compat/getopt.c"

530 #endif

531

532 int main(int argc, char **argv)

533 {

534 int test_idct = 0, test_248_dct = 0;

535 int c, i;

536 int test = 1;

537 int speed = 0;

538 int err = 0;

539 int bits=8;

540

541 cpu_flags = av_get_cpu_flags();

542

543 ff_ref_dct_init();

544 idct_mmx_init();

545

546 for (;;) {

547 c = getopt(argc, argv, "ih4t");

548 if (c == -1)

549 break;

550 switch (c) {

551 case 'i':

552 test_idct = 1;

553 break;

554 case '4':

555 test_248_dct = 1;

556 break;

557 case 't':

558 speed = 1;

559 break;

560 default:

561 case 'h':

562 help();

563 return 0;

564 }

565 }

566

567 if (optind < argc)

568 test = atoi(argv[optind]);

569 if(optind+1 < argc) bits= atoi(argv[optind+1]);

570

571 printf("ffmpeg DCT/IDCT test\n");

572

573 if (test_248_dct) {

574 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);

575 } else {

576 const struct algo *algos = test_idct ? idct_tab : fdct_tab;

577 for (i = 0; algos[i].name; i++)

578 if (!(~cpu_flags & algos[i].mm_support)) {

579 err |= dct_error(&algos[i], test, test_idct, speed, bits);

580 }

581 }

582

583 if (err)

584 printf("Error: %d.\n", err);

585

586 return !!err;

587 }

Generated on Sat Jan 25 2014 19:51:46 for FFmpeg by doxygen 1.8.2