FFmpeg: tests/checkasm/h264dsp.c Source File

FFmpeg

[フレーム]

h264dsp.c

Go to the documentation of this file.

1 /*

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or modify

7 * it under the terms of the GNU General Public License as published by

8 * the Free Software Foundation; either version 2 of the License, or

9 * (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14 * GNU General Public License for more details.

15 *

16 * You should have received a copy of the GNU General Public License along

17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

19 */

21 #include <string.h>

22 #include "checkasm.h"

23 #include "libavcodec/h264dsp.h"

24 #include "libavcodec/h264data.h"

25 #include "libavcodec/h264idct.h"

26 #include "libavcodec/h264_parse.h"

27 #include "libavutil/common.h"

28 #include "libavutil/intreadwrite.h"

29 #include "libavutil/mem_internal.h"

31 static const uint32_t pixel_mask[5] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff };

32 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };

34 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)

35 #define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))

36 #define PIXEL_STRIDE 16

38 #define randomize_buffers(idx) \

39 do { \

40 int x, y; \

41 uint32_t mask = pixel_mask[(idx)]; \

42 for (y = 0; y < sz; y++) { \

43 for (x = 0; x < PIXEL_STRIDE; x += 4) { \

44 AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask); \

45 AV_WN32A(dst + y * PIXEL_STRIDE + x, rnd() & mask); \

46 } \

47 for (x = 0; x < sz; x++) { \

48 if (bit_depth == 8) { \

49 coef[y * sz + x] = src[y * PIXEL_STRIDE + x] - \

50 dst[y * PIXEL_STRIDE + x]; \

51 } else { \

52 ((int32_t *)coef)[y * sz + x] = \

53 ((uint16_t *)src)[y * (PIXEL_STRIDE/2) + x] - \

54 ((uint16_t *)dst)[y * (PIXEL_STRIDE/2) + x]; \

55 } \

56 } \

57 } \

58 } while (0)

60 #define dct4x4_impl(size, dctcoef) \

61 static void dct4x4_##size(dctcoef *coef) \

62 { \

63 int i, y, x; \

64 dctcoef tmp[16]; \

65 for (i = 0; i < 4; i++) { \

66 const int z0 = coef[i*4 + 0] + coef[i*4 + 3]; \

67 const int z1 = coef[i*4 + 1] + coef[i*4 + 2]; \

68 const int z2 = coef[i*4 + 0] - coef[i*4 + 3]; \

69 const int z3 = coef[i*4 + 1] - coef[i*4 + 2]; \

70 tmp[i + 4*0] = z0 + z1; \

71 tmp[i + 4*1] = 2*z2 + z3; \

72 tmp[i + 4*2] = z0 - z1; \

73 tmp[i + 4*3] = z2 - 2*z3; \

74 } \

75 for (i = 0; i < 4; i++) { \

76 const int z0 = tmp[i*4 + 0] + tmp[i*4 + 3]; \

77 const int z1 = tmp[i*4 + 1] + tmp[i*4 + 2]; \

78 const int z2 = tmp[i*4 + 0] - tmp[i*4 + 3]; \

79 const int z3 = tmp[i*4 + 1] - tmp[i*4 + 2]; \

80 coef[i*4 + 0] = z0 + z1; \

81 coef[i*4 + 1] = 2*z2 + z3; \

82 coef[i*4 + 2] = z0 - z1; \

83 coef[i*4 + 3] = z2 - 2*z3; \

84 } \

85 for (y = 0; y < 4; y++) { \

86 for (x = 0; x < 4; x++) { \

87 const int64_t scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \

88 const int idx = (y & 1) + (x & 1); \

89 coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15; \

90 } \

91 } \

92 }

94 #define DCT8_1D(src, srcstride, dst, dststride) do { \

95 const int a0 = (src)[srcstride * 0] + (src)[srcstride * 7]; \

96 const int a1 = (src)[srcstride * 0] - (src)[srcstride * 7]; \

97 const int a2 = (src)[srcstride * 1] + (src)[srcstride * 6]; \

98 const int a3 = (src)[srcstride * 1] - (src)[srcstride * 6]; \

99 const int a4 = (src)[srcstride * 2] + (src)[srcstride * 5]; \

100 const int a5 = (src)[srcstride * 2] - (src)[srcstride * 5]; \

101 const int a6 = (src)[srcstride * 3] + (src)[srcstride * 4]; \

102 const int a7 = (src)[srcstride * 3] - (src)[srcstride * 4]; \

103 const int b0 = a0 + a6; \

104 const int b1 = a2 + a4; \

105 const int b2 = a0 - a6; \

106 const int b3 = a2 - a4; \

107 const int b4 = a3 + a5 + (a1 + (a1 >> 1)); \

108 const int b5 = a1 - a7 - (a5 + (a5 >> 1)); \

109 const int b6 = a1 + a7 - (a3 + (a3 >> 1)); \

110 const int b7 = a3 - a5 + (a7 + (a7 >> 1)); \

111 (dst)[dststride * 0] = b0 + b1; \

112 (dst)[dststride * 1] = b4 + (b7 >> 2); \

113 (dst)[dststride * 2] = b2 + (b3 >> 1); \

114 (dst)[dststride * 3] = b5 + (b6 >> 2); \

115 (dst)[dststride * 4] = b0 - b1; \

116 (dst)[dststride * 5] = b6 - (b5 >> 2); \

117 (dst)[dststride * 6] = (b2 >> 1) - b3; \

118 (dst)[dststride * 7] = (b4 >> 2) - b7; \

119 } while (0)

120

121 #define dct8x8_impl(size, dctcoef) \

122 static void dct8x8_##size(dctcoef *coef) \

123 { \

124 int i, x, y; \

125 dctcoef tmp[64]; \

126 for (i = 0; i < 8; i++) \

127 DCT8_1D(coef + i, 8, tmp + i, 8); \

128 \

129 for (i = 0; i < 8; i++) \

130 DCT8_1D(tmp + 8*i, 1, coef + i, 8); \

131 \

132 for (y = 0; y < 8; y++) { \

133 for (x = 0; x < 8; x++) { \

134 static const int scale[] = { \

135 13107 * 20, 11428 * 18, 20972 * 32, \

136 12222 * 19, 16777 * 25, 15481 * 24, \

137 }; \

138 static const int idxmap[] = { \

139 0, 3, 4, 3, \

140 3, 1, 5, 1, \

141 4, 5, 2, 5, \

142 3, 1, 5, 1, \

143 }; \

144 const int idx = idxmap[(y & 3) * 4 + (x & 3)]; \

145 coef[y*8 + x] = ((int64_t)coef[y*8 + x] * \

146 scale[idx] + (1 << 17)) >> 18; \

147 } \

148 } \

149 }

150

151 dct4x4_impl(16, int16_t)

152 dct4x4_impl(32, int32_t)

153

154 dct8x8_impl(16, int16_t)

155 dct8x8_impl(32, int32_t)

156

157 static void dct4x4(int16_t *coef, int bit_depth)

158 {

159 if (bit_depth == 8)

160 dct4x4_16(coef);

161 else

162 dct4x4_32((int32_t *) coef);

163 }

164

165 static void dct8x8(int16_t *coef, int bit_depth)

166 {

167 if (bit_depth == 8) {

168 dct8x8_16(coef);

169 } else {

170 dct8x8_32((int32_t *) coef);

171 }

172 }

173

174

175 static void check_idct(void)

176 {

177 static const int depths[5] = { 8, 9, 10, 12, 14 };

178 LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);

179 LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);

180 LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);

181 LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]);

182 LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]);

183 LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);

184 LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);

185 H264DSPContext h;

186 int bit_depth, sz, align, dc, i;

187 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);

188

189 for (i = 0; i < FF_ARRAY_ELEMS(depths); i++) {

190 bit_depth = depths[i];

191 ff_h264dsp_init(&h, bit_depth, 1);

192

193 for (dc = 0; dc <= 2; dc++) {

194 for (sz = 4; sz <= 8; sz += 4) {

195 void (*idct)(uint8_t *, int16_t *, int) = NULL;

196 const char fmts[3][28] = {

197 "h264_idct%d_add_%dbpp", "h264_idct%d_dc_add_%dbpp",

198 "h264_add_pixels%d_%dbpp",

199 };

200

201 randomize_buffers(i);

202

203 if (sz == 4)

204 dct4x4(coef, bit_depth);

205 else

206 dct8x8(coef, bit_depth);

207

208 switch ((sz << 2) | dc) {

209 case (4 << 2) | 0: idct = h.h264_idct_add; break;

210 case (4 << 2) | 1: idct = h.h264_idct_dc_add; break;

211 case (4 << 2) | 2: idct = h.h264_add_pixels4_clear; break;

212 case (8 << 2) | 0: idct = h.h264_idct8_add; break;

213 case (8 << 2) | 1: idct = h.h264_idct8_dc_add; break;

214 case (8 << 2) | 2: idct = h.h264_add_pixels8_clear; break;

215 }

216

217 if (check_func(idct, fmts[dc], sz, bit_depth)) {

218 for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) {

219 uint8_t *dst1 = dst1_base + align;

220 if (dc) {

221 memset(subcoef0, 0, sz * sz * SIZEOF_COEF);

222 memcpy(subcoef0, coef, SIZEOF_COEF);

223 } else {

224 memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);

225 }

226 memcpy(dst0, dst, sz * PIXEL_STRIDE);

227 memcpy(dst1, dst, sz * PIXEL_STRIDE);

228 memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);

229 call_ref(dst0, subcoef0, PIXEL_STRIDE);

230 call_new(dst1, subcoef1, PIXEL_STRIDE);

231 if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) ||

232 memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF))

233 fail();

234 bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL);

235 }

236 }

237 }

238 }

239 }

240 }

241

242 static void check_idct_multiple(void)

243 {

244 LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]);

245 LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]);

246 LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]);

247 LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]);

248 LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]);

249 LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]);

250 LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]);

251 H264DSPContext h;

252 int bit_depth, i, y, func;

253 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]);

254

255 for (bit_depth = 8; bit_depth <= 10; bit_depth++) {

256 ff_h264dsp_init(&h, bit_depth, 1);

257 for (func = 0; func < 3; func++) {

258 void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL;

259 const char *name;

260 int sz = 4, intra = 0;

261 int block_offset[16] = { 0 };

262 switch (func) {

263 case 0:

264 idct = h.h264_idct_add16;

265 name = "h264_idct_add16";

266 break;

267 case 1:

268 idct = h.h264_idct_add16intra;

269 name = "h264_idct_add16intra";

270 intra = 1;

271 break;

272 case 2:

273 idct = h.h264_idct8_add4;

274 name = "h264_idct8_add4";

275 sz = 8;

276 break;

277 }

278 memset(nnzc, 0, 15 * 8);

279 memset(coef_full, 0, 16 * 16 * SIZEOF_COEF);

280 for (i = 0; i < 16 * 16; i += sz * sz) {

281 uint8_t src[8 * 8 * 2];

282 uint8_t dst[8 * 8 * 2];

283 int16_t coef[8 * 8 * 2];

284 int index = i / sz;

285 int block_y = (index / 16) * sz;

286 int block_x = index % 16;

287 int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;

288 int nnz = rnd() % 3;

289

290 randomize_buffers(bit_depth - 8);

291 if (sz == 4)

292 dct4x4(coef, bit_depth);

293 else

294 dct8x8(coef, bit_depth);

295

296 for (y = 0; y < sz; y++)

297 memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL],

298 &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL);

299

300 if (nnz > 1)

301 nnz = sz * sz;

302 memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])],

303 coef, nnz * SIZEOF_COEF);

304

305 if (intra && nnz == 1)

306 nnz = 0;

307

308 nnzc[scan8[i / 16]] = nnz;

309 block_offset[i / 16] = offset;

310 }

311

312 if (check_func(idct, "%s_%dbpp", name, bit_depth)) {

313 memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF);

314 memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF);

315 memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL);

316 memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL);

317 call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc);

318 call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);

319 if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) ||

320 memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF))

321 fail();

322 bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc);

323 }

324 }

325 }

326 }

327

328 static void check_idct_dequant(void)

329 {

330 static const int depths[5] = { 8, 9, 10, 12, 14 };

331 LOCAL_ALIGNED_16(int16_t, src16, [16]);

332 LOCAL_ALIGNED_16(int32_t, src32, [16]);

333 LOCAL_ALIGNED_16(int16_t, dst0_16, [16 * 16]);

334 LOCAL_ALIGNED_16(int16_t, dst1_16, [16 * 16]);

335 LOCAL_ALIGNED_16(int32_t, dst0_32, [16 * 16]);

336 LOCAL_ALIGNED_16(int32_t, dst1_32, [16 * 16]);

337 H264DSPContext h;

338 int bit_depth, i, qmul;

339 declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_SSE2, void, int16_t *output, int16_t *input, int qmul);

340

341 qmul = rnd() % 4096;

342

343 for (i = 0; i < FF_ARRAY_ELEMS(depths); i++) {

344 bit_depth = depths[i];

345 ff_h264dsp_init(&h, bit_depth, 1);

346

347 void *src, *dst_ref, *dst_new;

348 if (bit_depth == 8) {

349 src = src16;

350 dst_ref = dst0_16;

351 dst_new = dst1_16;

352 for (int j = 0; j < 16; j++)

353 src16[j] = (rnd() % 512) - 256;

354 } else {

355 src = src32;

356 dst_ref = dst0_32;

357 dst_new = dst1_32;

358 for (int j = 0; j < 16; j++)

359 src32[j] = (rnd() % (1 << (bit_depth + 1))) - (1 << bit_depth);

360 }

361 memset(dst_ref, 0, 16 * 16 * SIZEOF_COEF);

362 memset(dst_new, 0, 16 * 16 * SIZEOF_COEF);

363

364 if (check_func(h.h264_luma_dc_dequant_idct, "h264_luma_dc_dequant_idct_%d", bit_depth)) {

365

366 call_ref(dst_ref, src, qmul);

367 call_new(dst_new, src, qmul);

368 checkasm_check_dctcoef(dst0, 16*SIZEOF_COEF, dst1, 16*SIZEOF_COEF, 16, 16, "dst");

369 bench_new(dst_new, src, qmul);

370 }

371 }

372 }

373

374

375 static void check_loop_filter(void)

376 {

377 LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);

378 LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);

379 LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);

380 H264DSPContext h;

381 int bit_depth;

382 int alphas[36], betas[36];

383 int8_t tc0[36][4];

384

385 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,

386 int alpha, int beta, int8_t *tc0);

387

388 for (bit_depth = 8; bit_depth <= 10; bit_depth++) {

389 int i, j, a, c;

390 uint32_t mask = pixel_mask_lf[bit_depth - 8];

391 ff_h264dsp_init(&h, bit_depth, 1);

392 for (i = 35, a = 255, c = 250; i >= 0; i--) {

393 alphas[i] = a << (bit_depth - 8);

394 betas[i] = (i + 1) / 2 << (bit_depth - 8);

395 tc0[i][0] = tc0[i][3] = (c + 6) / 10;

396 tc0[i][1] = (c + 7) / 15;

397 tc0[i][2] = (c + 9) / 20;

398 a = a*9/10;

399 c = c*9/10;

400 }

401

402 #define CHECK_LOOP_FILTER(name, align, idc) \

403 do { \

404 if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \

405 for (j = 0; j < 36; j++) { \

406 intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \

407 for (i = 0; i < 1024; i+=4) { \

408 AV_WN32A(dst + i, rnd() & mask); \

409 } \

410 memcpy(dst0, dst, 32 * 16 * 2); \

411 memcpy(dst1, dst, 32 * 16 * 2); \

412 \

413 call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \

414 call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \

415 if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \

416 fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d " \

417 "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \

418 tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \

419 fail(); \

420 } \

421 bench_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]);\

422 } \

423 } \

424 } while (0)

425

426 CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);

427 CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);

428 CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);

429 CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);

430 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);

431 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);

432

433 ff_h264dsp_init(&h, bit_depth, 2);

434 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);

435 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);

436 #undef CHECK_LOOP_FILTER

437 }

438 }

439

440 static void check_loop_filter_intra(void)

441 {

442 LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);

443 LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);

444 LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);

445 H264DSPContext h;

446 int bit_depth;

447 int alphas[36], betas[36];

448

449 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,

450 int alpha, int beta);

451

452 for (bit_depth = 8; bit_depth <= 10; bit_depth++) {

453 int i, j, a;

454 uint32_t mask = pixel_mask_lf[bit_depth - 8];

455 ff_h264dsp_init(&h, bit_depth, 1);

456 for (i = 35, a = 255; i >= 0; i--) {

457 alphas[i] = a << (bit_depth - 8);

458 betas[i] = (i + 1) / 2 << (bit_depth - 8);

459 a = a*9/10;

460 }

461

462 #define CHECK_LOOP_FILTER(name, align, idc) \

463 do { \

464 if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \

465 for (j = 0; j < 36; j++) { \

466 intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \

467 for (i = 0; i < 1024; i+=4) { \

468 AV_WN32A(dst + i, rnd() & mask); \

469 } \

470 memcpy(dst0, dst, 32 * 16 * 2); \

471 memcpy(dst1, dst, 32 * 16 * 2); \

472 \

473 call_ref(dst0 + off, 32, alphas[j], betas[j]); \

474 call_new(dst1 + off, 32, alphas[j], betas[j]); \

475 if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) { \

476 fprintf(stderr, #name #idc ": j:%d, alpha:%d beta:%d\n", \

477 j, alphas[j], betas[j]); \

478 fail(); \

479 } \

480 bench_new(dst1 + off, 32, alphas[j], betas[j]); \

481 } \

482 } \

483 } while (0)

484

485 CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);

486 CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);

487 CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);

488 CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);

489 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);

490 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);

491

492 ff_h264dsp_init(&h, bit_depth, 2);

493 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);

494 CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);

495 #undef CHECK_LOOP_FILTER

496 }

497 }

498

499 void checkasm_check_h264dsp(void)

500 {

501 check_idct();

502 check_idct_multiple();

503 check_idct_dequant();

504 report("idct");

505

506 check_loop_filter();

507 report("loop_filter");

508

509 check_loop_filter_intra();

510 report("loop_filter_intra");

511 }

CHECK_LOOP_FILTER

#define CHECK_LOOP_FILTER(name, align, idc)

func

int(* func)(AVBPrint *dst, const char *in, const char *arg)

Definition: jacosubdec.c:68

declare_func_emms

#define declare_func_emms(cpu_flags, ret,...)

Definition: checkasm.h:203

name

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name

Definition: writing_filters.txt:88

mem_internal.h

output

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output

Definition: filter_design.txt:226

check_idct

static void check_idct(void)

Definition: h264dsp.c:175

mask

int mask

Definition: mediacodecdec_common.c:154

h264_parse.h

SIZEOF_PIXEL

#define SIZEOF_PIXEL

Definition: h264dsp.c:34

check_func

#define check_func(func,...)

Definition: checkasm.h:197

h264_v_loop_filter_luma_intra

static void FUNCC() h264_v_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)

Definition: h264dsp_template.c:218

call_ref

#define call_ref(...)

Definition: checkasm.h:212

bit_depth

static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)

Definition: af_astats.c:246

pix

enum AVPixelFormat pix

Definition: ohcodec.c:55

h264_h_loop_filter_chroma_mbaff_intra

static void FUNCC() h264_h_loop_filter_chroma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)

Definition: h264dsp_template.c:317

fail

#define fail()

Definition: checkasm.h:206

pixel_mask

static const uint32_t pixel_mask[5]

Definition: h264dsp.c:31

h264_h_loop_filter_luma_mbaff_intra

static void FUNCC() h264_h_loop_filter_luma_mbaff_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)

Definition: h264dsp_template.c:226

checkasm.h

scan8

static const uint8_t scan8[16 *3+3]

Definition: h264_parse.h:40

idct

static void idct(int16_t block[64])

Definition: 4xm.c:167

h264_h_loop_filter_chroma_mbaff

static void FUNCC() h264_h_loop_filter_chroma_mbaff(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)

Definition: h264dsp_template.c:272

h264_h_loop_filter_luma_intra

static void FUNCC() h264_h_loop_filter_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)

Definition: h264dsp_template.c:222

rnd

#define rnd()

Definition: checkasm.h:190

checkasm_check_h264dsp

void checkasm_check_h264dsp(void)

Definition: h264dsp.c:499

FF_ARRAY_ELEMS

#define FF_ARRAY_ELEMS(a)

Definition: sinewin_tablegen.c:29

checkasm_check_dctcoef

#define checkasm_check_dctcoef(buf1, stride1, buf2, stride2,...)

Definition: checkasm.h:482

intreadwrite.h

LOCAL_ALIGNED_16

#define LOCAL_ALIGNED_16(t, v,...)

Definition: mem_internal.h:130

h264data.h

call_new

#define call_new(...)

Definition: checkasm.h:315

NULL

#define NULL

Definition: coverity.c:32

h264idct.h

h264dsp.h

index

int index

Definition: gxfenc.c:90

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

dct8x8

static void dct8x8(int16_t *coef, int bit_depth)

Definition: h264dsp.c:165

dct8x8_impl

#define dct8x8_impl(size, dctcoef)

Definition: h264dsp.c:121

AV_CPU_FLAG_SSE2

#define AV_CPU_FLAG_SSE2

PIV SSE2 functions.

Definition: cpu.h:35

H264DSPContext

Context for storing H.264 DSP functions.

Definition: h264dsp.h:42

Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]

Definition: snow.txt:400

dst

uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst

Definition: dsp.h:87

h264_v_loop_filter_chroma_intra

static void FUNCC() h264_v_loop_filter_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)

Definition: h264dsp_template.c:309

align

static const uint8_t *BS_FUNC() align(BSCTX *bc)

Skip bits to a byte boundary.

Definition: bitstream_template.h:419

dct4x4_impl

#define dct4x4_impl(size, dctcoef)

Definition: h264dsp.c:60

The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a

Definition: undefined.txt:41

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

check_idct_multiple

static void check_idct_multiple(void)

Definition: h264dsp.c:242

h264_h_loop_filter_luma

static void FUNCC() h264_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)

Definition: h264dsp_template.c:156

input

and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input

Definition: filter_design.txt:172

check_loop_filter_intra

static void check_loop_filter_intra(void)

Definition: h264dsp.c:440

report

#define report

Definition: checkasm.h:209

check_loop_filter

static void check_loop_filter(void)

Definition: h264dsp.c:375

SIZEOF_COEF

#define SIZEOF_COEF