FFmpeg: tests/checkasm/vp8dsp.c Source File

FFmpeg

[フレーム]

vp8dsp.c

Go to the documentation of this file.

1 /*

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or modify

7 * it under the terms of the GNU General Public License as published by

8 * the Free Software Foundation; either version 2 of the License, or

9 * (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14 * GNU General Public License for more details.

15 *

16 * You should have received a copy of the GNU General Public License along

17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

19 */

21 #include <stdbool.h>

22 #include <string.h>

24 #include "config_components.h"

25 #include "libavcodec/vp8dsp.h"

27 #include "libavutil/common.h"

28 #include "libavutil/intreadwrite.h"

29 #include "libavutil/mem_internal.h"

31 #include "checkasm.h"

33 #define PIXEL_STRIDE 16

35 #define randomize_buffers(src, dst, stride, coef) \

36 do { \

37 int x, y; \

38 for (y = 0; y < 4; y++) { \

39 AV_WN32A((src) + y * (stride), rnd()); \

40 AV_WN32A((dst) + y * (stride), rnd()); \

41 for (x = 0; x < 4; x++) \

42 (coef)[y * 4 + x] = (src)[y * (stride) + x] - \

43 (dst)[y * (stride) + x]; \

44 } \

45 } while (0)

47 static void dct4x4(int16_t *coef)

48 {

49 int i;

50 for (i = 0; i < 4; i++) {

51 const int a1 = (coef[i*4 + 0] + coef[i*4 + 3]) * 8;

52 const int b1 = (coef[i*4 + 1] + coef[i*4 + 2]) * 8;

53 const int c1 = (coef[i*4 + 1] - coef[i*4 + 2]) * 8;

54 const int d1 = (coef[i*4 + 0] - coef[i*4 + 3]) * 8;

55 coef[i*4 + 0] = a1 + b1;

56 coef[i*4 + 1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;

57 coef[i*4 + 2] = a1 - b1;

58 coef[i*4 + 3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;

59 }

60 for (i = 0; i < 4; i++) {

61 const int a1 = coef[i + 0*4] + coef[i + 3*4];

62 const int b1 = coef[i + 1*4] + coef[i + 2*4];

63 const int c1 = coef[i + 1*4] - coef[i + 2*4];

64 const int d1 = coef[i + 0*4] - coef[i + 3*4];

65 coef[i + 0*4] = (a1 + b1 + 7) >> 4;

66 coef[i + 1*4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + !!d1;

67 coef[i + 2*4] = (a1 - b1 + 7) >> 4;

68 coef[i + 3*4] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;

69 }

70 }

72 static void wht4x4(int16_t *coef)

73 {

74 int i;

75 for (i = 0; i < 4; i++) {

76 int a1 = coef[0 * 4 + i];

77 int b1 = coef[1 * 4 + i];

78 int c1 = coef[2 * 4 + i];

79 int d1 = coef[3 * 4 + i];

80 int e1;

81 a1 += b1;

82 d1 -= c1;

83 e1 = (a1 - d1) >> 1;

84 b1 = e1 - b1;

85 c1 = e1 - c1;

86 a1 -= c1;

87 d1 += b1;

88 coef[0 * 4 + i] = a1;

89 coef[1 * 4 + i] = c1;

90 coef[2 * 4 + i] = d1;

91 coef[3 * 4 + i] = b1;

92 }

93 for (i = 0; i < 4; i++) {

94 int a1 = coef[i * 4 + 0];

95 int b1 = coef[i * 4 + 1];

96 int c1 = coef[i * 4 + 2];

97 int d1 = coef[i * 4 + 3];

98 int e1;

99 a1 += b1;

100 d1 -= c1;

101 e1 = (a1 - d1) >> 1;

102 b1 = e1 - b1;

103 c1 = e1 - c1;

104 a1 -= c1;

105 d1 += b1;

106 coef[i * 4 + 0] = a1 * 2;

107 coef[i * 4 + 1] = c1 * 2;

108 coef[i * 4 + 2] = d1 * 2;

109 coef[i * 4 + 3] = b1 * 2;

110 }

111 }

112

113 static void check_idct(VP8DSPContext *d, bool is_vp7)

114 {

115 LOCAL_ALIGNED_16(uint8_t, src, [4 * 4]);

116 LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4]);

117 LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4]);

118 LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4]);

119 LOCAL_ALIGNED_16(int16_t, coef, [4 * 4]);

120 LOCAL_ALIGNED_16(int16_t, subcoef0, [4 * 4]);

121 LOCAL_ALIGNED_16(int16_t, subcoef1, [4 * 4]);

122 int dc;

123 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride);

124

125 randomize_buffers(src, dst, 4, coef);

126

127 dct4x4(coef);

128

129 for (dc = 0; dc <= 1; dc++) {

130 void (*idct)(uint8_t *, int16_t *, ptrdiff_t) = dc ? d->vp8_idct_dc_add : d->vp8_idct_add;

131

132 if (check_func(idct, "vp%d_idct_%sadd", 8 - is_vp7, dc ? "dc_" : "")) {

133 if (dc) {

134 memset(subcoef0, 0, 4 * 4 * sizeof(int16_t));

135 subcoef0[0] = coef[0];

136 } else {

137 memcpy(subcoef0, coef, 4 * 4 * sizeof(int16_t));

138 }

139 memcpy(dst0, dst, 4 * 4);

140 memcpy(dst1, dst, 4 * 4);

141 memcpy(subcoef1, subcoef0, 4 * 4 * sizeof(int16_t));

142 // Note, this uses a pixel stride of 4, even though the real decoder uses a stride as a

143 // multiple of 16. If optimizations want to take advantage of that, this test needs to be

144 // updated to make it more like the h264dsp tests.

145 call_ref(dst0, subcoef0, 4);

146 call_new(dst1, subcoef1, 4);

147 if (memcmp(dst0, dst1, 4 * 4) ||

148 memcmp(subcoef0, subcoef1, 4 * 4 * sizeof(int16_t)))

149 fail();

150

151 bench_new(dst1, subcoef1, 4);

152 }

153 }

154 }

155

156 static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)

157 {

158 LOCAL_ALIGNED_16(uint8_t, src, [4 * 4 * 4]);

159 LOCAL_ALIGNED_16(uint8_t, dst, [4 * 4 * 4]);

160 LOCAL_ALIGNED_16(uint8_t, dst0, [4 * 4 * 4]);

161 LOCAL_ALIGNED_16(uint8_t, dst1, [4 * 4 * 4]);

162 LOCAL_ALIGNED_16(int16_t, coef, [4], [4 * 4]);

163 LOCAL_ALIGNED_16(int16_t, subcoef0, [4], [4 * 4]);

164 LOCAL_ALIGNED_16(int16_t, subcoef1, [4], [4 * 4]);

165 int i, chroma;

166 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);

167

168 for (chroma = 0; chroma <= 1; chroma++) {

169 void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d->vp8_idct_dc_add4uv : d->vp8_idct_dc_add4y;

170 if (check_func(idct4dc, "vp%d_idct_dc_add4%s", 8 - is_vp7, chroma ? "uv" : "y")) {

171 ptrdiff_t stride = chroma ? 8 : 16;

172 int w = chroma ? 2 : 4;

173 for (i = 0; i < 4; i++) {

174 int blockx = 4 * (i % w);

175 int blocky = 4 * (i / w);

176 randomize_buffers(src + stride * blocky + blockx, dst + stride * blocky + blockx, stride, coef[i]);

177 dct4x4(coef[i]);

178 memset(&coef[i][1], 0, 15 * sizeof(int16_t));

179 }

180

181 memcpy(dst0, dst, 4 * 4 * 4);

182 memcpy(dst1, dst, 4 * 4 * 4);

183 memcpy(subcoef0, coef, 4 * 4 * 4 * sizeof(int16_t));

184 memcpy(subcoef1, coef, 4 * 4 * 4 * sizeof(int16_t));

185 call_ref(dst0, subcoef0, stride);

186 call_new(dst1, subcoef1, stride);

187 if (memcmp(dst0, dst1, 4 * 4 * 4) ||

188 memcmp(subcoef0, subcoef1, 4 * 4 * 4 * sizeof(int16_t)))

189 fail();

190 bench_new(dst1, subcoef1, stride);

191 }

192 }

193

194 }

195

196 static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)

197 {

198 LOCAL_ALIGNED_16(int16_t, dc, [4 * 4]);

199 LOCAL_ALIGNED_16(int16_t, dc0, [4 * 4]);

200 LOCAL_ALIGNED_16(int16_t, dc1, [4 * 4]);

201 int16_t block[4][4][16];

202 LOCAL_ALIGNED_16(int16_t, block0, [4], [4][16]);

203 LOCAL_ALIGNED_16(int16_t, block1, [4], [4][16]);

204 int dc_only;

205 int blockx, blocky;

206 declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t block[4][4][16], int16_t dc[16]);

207

208 for (blocky = 0; blocky < 4; blocky++) {

209 for (blockx = 0; blockx < 4; blockx++) {

210 uint8_t src[16], dst[16];

211 randomize_buffers(src, dst, 4, block[blocky][blockx]);

212

213 dct4x4(block[blocky][blockx]);

214 dc[blocky * 4 + blockx] = block[blocky][blockx][0];

215 block[blocky][blockx][0] = rnd();

216 }

217 }

218 wht4x4(dc);

219

220 for (dc_only = 0; dc_only <= 1; dc_only++) {

221 void (*idct)(int16_t [4][4][16], int16_t [16]) = dc_only ? d->vp8_luma_dc_wht_dc : d->vp8_luma_dc_wht;

222

223 if (check_func(idct, "vp%d_luma_dc_wht%s", 8 - is_vp7, dc_only ? "_dc" : "")) {

224 if (dc_only) {

225 memset(dc0, 0, 16 * sizeof(int16_t));

226 dc0[0] = dc[0];

227 } else {

228 memcpy(dc0, dc, 16 * sizeof(int16_t));

229 }

230 memcpy(dc1, dc0, 16 * sizeof(int16_t));

231 memcpy(block0, block, 4 * 4 * 16 * sizeof(int16_t));

232 memcpy(block1, block, 4 * 4 * 16 * sizeof(int16_t));

233 call_ref(block0, dc0);

234 call_new(block1, dc1);

235 if (memcmp(block0, block1, 4 * 4 * 16 * sizeof(int16_t)) ||

236 memcmp(dc0, dc1, 16 * sizeof(int16_t)))

237 fail();

238 bench_new(block1, dc1);

239 }

240 }

241 }

242

243 #define SRC_BUF_STRIDE 32

244 #define SRC_BUF_SIZE (((size << (size < 16)) + 5) * SRC_BUF_STRIDE)

245 // The mc subpixel interpolation filter needs the 2 previous pixels in either

246 // direction, the +1 is to make sure the actual load addresses always are

247 // unaligned.

248 #define src (buf + 2 * SRC_BUF_STRIDE + 2 + 1)

249

250 #undef randomize_buffers

251 #define randomize_buffers() \

252 do { \

253 int k; \

254 for (k = 0; k < SRC_BUF_SIZE; k += 4) { \

255 AV_WN32A(buf + k, rnd()); \

256 } \

257 } while (0)

258

259 static void check_mc(VP8DSPContext *d)

260 {

261 LOCAL_ALIGNED_16(uint8_t, buf, [32 * 32]);

262 BUF_RECT(uint8_t, dst0, 16, 16);

263 BUF_RECT(uint8_t, dst1, 16, 16);

264 int type, k, dx, dy;

265 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t,

266 const uint8_t *, ptrdiff_t, int, int, int);

267

268 for (type = 0; type < 2; type++) {

269 for (k = 1; k < 8; k++) {

270 int hsize = k / 3;

271 int size = 16 >> hsize;

272 int height = (size << 1) >> (k % 3);

273 for (dy = 0; dy < 3; dy++) {

274 for (dx = 0; dx < 3; dx++) {

275 char str[100];

276 vp8_mc_func func = (type ? d->put_vp8_bilinear_pixels_tab : d->put_vp8_epel_pixels_tab)[hsize][dy][dx];

277

278 if (dx || dy) {

279 if (type == 0) {

280 static const char *dx_names[] = { "", "h4", "h6" };

281 static const char *dy_names[] = { "", "v4", "v6" };

282 snprintf(str, sizeof(str), "epel%d_%s%s", size, dx_names[dx], dy_names[dy]);

283 } else {

284 snprintf(str, sizeof(str), "bilin%d_%s%s", size, dx ? "h" : "", dy ? "v" : "");

285 }

286 } else {

287 snprintf(str, sizeof(str), "pixels%d", size);

288 }

289

290 if (check_func(func, "vp8_put_%s", str)) {

291 int mx, my;

292 int i;

293 if (type == 0) {

294 mx = dx == 2 ? 2 + 2 * (rnd() % 3) : dx == 1 ? 1 + 2 * (rnd() % 4) : 0;

295 my = dy == 2 ? 2 + 2 * (rnd() % 3) : dy == 1 ? 1 + 2 * (rnd() % 4) : 0;

296 } else {

297 mx = dx ? 1 + (rnd() % 7) : 0;

298 my = dy ? 1 + (rnd() % 7) : 0;

299 }

300 randomize_buffers();

301 for (i = -2; i <= 3; i++) {

302 int val = (i == -1 || i == 2) ? 0 : 0xff;

303 // Set pixels in the first row and column to the maximum pattern,

304 // to test for potential overflows in the filter.

305 src[i ] = val;

306 src[i * SRC_BUF_STRIDE] = val;

307 }

308 CLEAR_BUF_RECT(dst0);

309 CLEAR_BUF_RECT(dst1);

310 call_ref(dst0, dst0_stride, src, SRC_BUF_STRIDE, height, mx, my);

311 call_new(dst1, dst1_stride, src, SRC_BUF_STRIDE, height, mx, my);

312 checkasm_check_padded(uint8_t, dst0, dst0_stride, dst1, dst1_stride, size, height, "dst");

313 bench_new(dst1, size, src, SRC_BUF_STRIDE, height, mx, my);

314 }

315 }

316 }

317 }

318 }

319 }

320

321 #undef randomize_buffers

322

323 #define setpx(a, b, c) buf[(a) + (b) * jstride] = av_clip_uint8(c)

324 // Set the pixel to c +/- [0,d]

325 #define setdx(a, b, c, d) setpx(a, b, c - (d) + (rnd() % ((d) * 2 + 1)))

326 // Set the pixel to c +/- [d,d+e] (making sure it won't be clipped)

327 #define setdx2(a, b, o, c, d, e) setpx(a, b, o = c + ((d) + (rnd() % (e))) * (c >= 128 ? -1 : 1))

328

329 static void randomize_loopfilter_buffers(int lineoff, int str,

330 int dir, int flim_E, int flim_I,

331 int hev_thresh, uint8_t *buf,

332 int force_hev)

333 {

334 uint32_t mask = 0xff;

335 int off = dir ? lineoff : lineoff * str;

336 int istride = dir ? 1 : str;

337 int jstride = dir ? str : 1;

338 int i;

339 for (i = 0; i < 8; i += 2) {

340 // Row 0 will trigger hev for q0/q1, row 2 will trigger hev for p0/p1,

341 // rows 4 and 6 will not trigger hev.

342 // force_hev 1 will make sure all rows trigger hev, while force_hev -1

343 // makes none of them trigger it.

344 int idx = off + i * istride, p2, p1, p0, q0, q1, q2;

345 setpx(idx, 0, q0 = rnd() & mask);

346 if (i == 0 && force_hev >= 0 || force_hev > 0)

347 setdx2(idx, 1, q1, q0, hev_thresh + 1, flim_I - hev_thresh - 1);

348 else

349 setdx(idx, 1, q1 = q0, hev_thresh);

350 setdx(idx, 2, q2 = q1, flim_I);

351 setdx(idx, 3, q2, flim_I);

352 setdx(idx, -1, p0 = q0, flim_E >> 2);

353 if (i == 2 && force_hev >= 0 || force_hev > 0)

354 setdx2(idx, -2, p1, p0, hev_thresh + 1, flim_I - hev_thresh - 1);

355 else

356 setdx(idx, -2, p1 = p0, hev_thresh);

357 setdx(idx, -3, p2 = p1, flim_I);

358 setdx(idx, -4, p2, flim_I);

359 }

360 }

361

362 // Fill the buffer with random pixels

363 static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)

364 {

365 int x, y;

366 for (y = 0; y < h; y++)

367 for (x = 0; x < w; x++)

368 buf[y * stride + x] = rnd() & 0xff;

369 }

370

371 #define randomize_buffers(buf, lineoff, str, force_hev) \

372 randomize_loopfilter_buffers(lineoff, str, dir, flim_E, flim_I, hev_thresh, buf, force_hev)

373

374 static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)

375 {

376 LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);

377 LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);

378 int dir, edge, force_hev;

379 int flim_E = 20, flim_I = 10, hev_thresh = 7;

380 declare_func(void, uint8_t *, ptrdiff_t, int, int, int);

381

382 for (dir = 0; dir < 2; dir++) {

383 int midoff = dir ? 4 * 16 : 4;

384 int midoff_aligned = dir ? 4 * 16 : 16;

385 uint8_t *buf0 = base0 + midoff_aligned;

386 uint8_t *buf1 = base1 + midoff_aligned;

387 for (edge = 0; edge < 2; edge++) {

388 void (*func)(uint8_t *, ptrdiff_t, int, int, int) = NULL;

389 switch (dir << 1 | edge) {

390 case (0 << 1) | 0: func = d->vp8_h_loop_filter16y; break;

391 case (1 << 1) | 0: func = d->vp8_v_loop_filter16y; break;

392 case (0 << 1) | 1: func = d->vp8_h_loop_filter16y_inner; break;

393 case (1 << 1) | 1: func = d->vp8_v_loop_filter16y_inner; break;

394 }

395 if (check_func(func, "vp%d_loop_filter16y%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {

396 for (force_hev = -1; force_hev <= 1; force_hev++) {

397 fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);

398 randomize_buffers(buf0, 0, 16, force_hev);

399 randomize_buffers(buf0, 8, 16, force_hev);

400 memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);

401 call_ref(buf0, 16, flim_E, flim_I, hev_thresh);

402 call_new(buf1, 16, flim_E, flim_I, hev_thresh);

403 if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))

404 fail();

405 }

406 fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);

407 randomize_buffers(buf0, 0, 16, 0);

408 randomize_buffers(buf0, 8, 16, 0);

409 bench_new(buf0, 16, flim_E, flim_I, hev_thresh);

410 }

411 }

412 }

413 }

414

415 static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)

416 {

417 LOCAL_ALIGNED_16(uint8_t, base0u, [32 + 16 * 16]);

418 LOCAL_ALIGNED_16(uint8_t, base0v, [32 + 16 * 16]);

419 LOCAL_ALIGNED_16(uint8_t, base1u, [32 + 16 * 16]);

420 LOCAL_ALIGNED_16(uint8_t, base1v, [32 + 16 * 16]);

421 int dir, edge, force_hev;

422 int flim_E = 20, flim_I = 10, hev_thresh = 7;

423 declare_func(void, uint8_t *, uint8_t *, ptrdiff_t, int, int, int);

424

425 for (dir = 0; dir < 2; dir++) {

426 int midoff = dir ? 4 * 16 : 4;

427 int midoff_aligned = dir ? 4 * 16 : 16;

428 uint8_t *buf0u = base0u + midoff_aligned;

429 uint8_t *buf0v = base0v + midoff_aligned;

430 uint8_t *buf1u = base1u + midoff_aligned;

431 uint8_t *buf1v = base1v + midoff_aligned;

432 for (edge = 0; edge < 2; edge++) {

433 void (*func)(uint8_t *, uint8_t *, ptrdiff_t, int, int, int) = NULL;

434 switch (dir << 1 | edge) {

435 case (0 << 1) | 0: func = d->vp8_h_loop_filter8uv; break;

436 case (1 << 1) | 0: func = d->vp8_v_loop_filter8uv; break;

437 case (0 << 1) | 1: func = d->vp8_h_loop_filter8uv_inner; break;

438 case (1 << 1) | 1: func = d->vp8_v_loop_filter8uv_inner; break;

439 }

440 if (check_func(func, "vp%d_loop_filter8uv%s_%s", 8 - is_vp7, edge ? "_inner" : "", dir ? "v" : "h")) {

441 for (force_hev = -1; force_hev <= 1; force_hev++) {

442 fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);

443 fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);

444 randomize_buffers(buf0u, 0, 16, force_hev);

445 randomize_buffers(buf0v, 0, 16, force_hev);

446 memcpy(buf1u - midoff, buf0u - midoff, 16 * 16);

447 memcpy(buf1v - midoff, buf0v - midoff, 16 * 16);

448

449 call_ref(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);

450 call_new(buf1u, buf1v, 16, flim_E, flim_I, hev_thresh);

451 if (memcmp(buf0u - midoff, buf1u - midoff, 16 * 16) ||

452 memcmp(buf0v - midoff, buf1v - midoff, 16 * 16))

453 fail();

454 }

455 fill_loopfilter_buffers(buf0u - midoff, 16, 16, 16);

456 fill_loopfilter_buffers(buf0v - midoff, 16, 16, 16);

457 randomize_buffers(buf0u, 0, 16, 0);

458 randomize_buffers(buf0v, 0, 16, 0);

459 bench_new(buf0u, buf0v, 16, flim_E, flim_I, hev_thresh);

460 }

461 }

462 }

463 }

464

465 static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)

466 {

467 LOCAL_ALIGNED_16(uint8_t, base0, [32 + 16 * 16]);

468 LOCAL_ALIGNED_16(uint8_t, base1, [32 + 16 * 16]);

469 int dir;

470 int flim_E = 20, flim_I = 30, hev_thresh = 0;

471 declare_func(void, uint8_t *, ptrdiff_t, int);

472

473 for (dir = 0; dir < 2; dir++) {

474 int midoff = dir ? 4 * 16 : 4;

475 int midoff_aligned = dir ? 4 * 16 : 16;

476 uint8_t *buf0 = base0 + midoff_aligned;

477 uint8_t *buf1 = base1 + midoff_aligned;

478 void (*func)(uint8_t *, ptrdiff_t, int) = dir ? d->vp8_v_loop_filter_simple : d->vp8_h_loop_filter_simple;

479 if (check_func(func, "vp%d_loop_filter_simple_%s", 8 - is_vp7, dir ? "v" : "h")) {

480 fill_loopfilter_buffers(buf0 - midoff, 16, 16, 16);

481 randomize_buffers(buf0, 0, 16, -1);

482 randomize_buffers(buf0, 8, 16, -1);

483 memcpy(buf1 - midoff, buf0 - midoff, 16 * 16);

484 call_ref(buf0, 16, flim_E);

485 call_new(buf1, 16, flim_E);

486 if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16))

487 fail();

488 bench_new(buf0, 16, flim_E);

489 }

490 }

491 }

492

493 static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)

494 {

495 #if CONFIG_VP7_DECODER

496 if (is_vp7)

497 ff_vp7dsp_init(d);

498 else

499 #endif

500 ff_vp8dsp_init(d);

501 check_idct(d, is_vp7);

502 check_idct_dc4(d, is_vp7);

503 check_luma_dc_wht(d, is_vp7);

504 report("idct");

505 check_loopfilter_16y(d, is_vp7);

506 check_loopfilter_8uv(d, is_vp7);

507 check_loopfilter_simple(d, is_vp7);

508 report("loopfilter");

509 }

510

511 void checkasm_check_vp8dsp(void)

512 {

513 VP8DSPContext d;

514

515 ff_vp78dsp_init(&d);

516 check_mc(&d);

517 report("mc");

518 checkasm_check_vp78dsp(&d, false);

519 #if CONFIG_VP7_DECODER

520 checkasm_check_vp78dsp(&d, true);

521 #endif

522 }

BUF_RECT

#define BUF_RECT(type, name, w, h)

Definition: checkasm.h:405

func

int(* func)(AVBPrint *dst, const char *in, const char *arg)

Definition: jacosubdec.c:68

check_loopfilter_8uv

static void check_loopfilter_8uv(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:415

VP8DSPContext::vp8_h_loop_filter8uv

void(* vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:54

declare_func_emms

#define declare_func_emms(cpu_flags, ret,...)

Definition: checkasm.h:203

VP8DSPContext::vp8_h_loop_filter8uv_inner

void(* vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:65

static const uint8_t q1[256]

Definition: twofish.c:100

mem_internal.h

VP8DSPContext::vp8_v_loop_filter8uv

void(* vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:52

mask

int mask

Definition: mediacodecdec_common.c:154

check_luma_dc_wht

static void check_luma_dc_wht(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:196

check_idct_dc4

static void check_idct_dc4(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:156

uint8_t w

Definition: llviddspenc.c:38

check_func

#define check_func(func,...)

Definition: checkasm.h:197

VP8DSPContext::vp8_v_loop_filter16y

void(* vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:48

chroma

static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)

Definition: vf_waveform.c:1639

wht4x4

static void wht4x4(int16_t *coef)

Definition: vp8dsp.c:72

static const uint64_t c1

Definition: murmur3.c:52

call_ref

#define call_ref(...)

Definition: checkasm.h:212

checkasm_check_vp78dsp

static void checkasm_check_vp78dsp(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:493

uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx

Definition: dsp.h:57

static double b1(void *priv, double x, double y)

Definition: vf_xfade.c:2034

randomize_buffers

#define randomize_buffers(src, dst, stride, coef)

Definition: vp8dsp.c:371

fail

#define fail()

Definition: checkasm.h:206

check_loopfilter_16y

static void check_loopfilter_16y(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:374

checkasm.h

val

static double val(void *priv, double ch)

Definition: aeval.c:77

type

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type

Definition: writing_filters.txt:86

checkasm_check_padded

#define checkasm_check_padded(...)

Definition: checkasm.h:450

idct

static void idct(int16_t block[64])

Definition: 4xm.c:167

ff_vp7dsp_init

void ff_vp7dsp_init(VP8DSPContext *c)

rnd

#define rnd()

Definition: checkasm.h:190

ff_vp8dsp_init

void ff_vp8dsp_init(VP8DSPContext *c)

VP8DSPContext::vp8_v_loop_filter16y_inner

void(* vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:58

vp8dsp.h

intreadwrite.h

randomize_loopfilter_buffers

static void randomize_loopfilter_buffers(int lineoff, int str, int dir, int flim_E, int flim_I, int hev_thresh, uint8_t *buf, int force_hev)

Definition: vp8dsp.c:329

LOCAL_ALIGNED_16

#define LOCAL_ALIGNED_16(t, v,...)

Definition: mem_internal.h:130

VP8DSPContext::vp8_h_loop_filter_simple

void(* vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)

Definition: vp8dsp.h:70

check_loopfilter_simple

static void check_loopfilter_simple(VP8DSPContext *d, bool is_vp7)

Definition: vp8dsp.c:465

static const uint8_t q0[256]

Definition: twofish.c:81

uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my

Definition: dsp.h:57

VP8DSPContext::vp8_v_loop_filter_simple

void(* vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim)

Definition: vp8dsp.h:69

if(ret)

Definition: filter_design.txt:179

checkasm_check_vp8dsp

void checkasm_check_vp8dsp(void)

Definition: vp8dsp.c:511

call_new

#define call_new(...)

Definition: checkasm.h:315

NULL

#define NULL

Definition: coverity.c:32

VP8DSPContext::vp8_h_loop_filter16y

void(* vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:50

vp8_mc_func

void(* vp8_mc_func)(uint8_t *dst, ptrdiff_t dstStride, const uint8_t *src, ptrdiff_t srcStride, int h, int x, int y)

Definition: vp8dsp.h:33

VP8DSPContext::put_vp8_bilinear_pixels_tab

vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]

Definition: vp8dsp.h:81

VP8DSPContext::vp8_h_loop_filter16y_inner

void(* vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)

Definition: vp8dsp.h:60

VP8DSPContext::vp8_luma_dc_wht

void(* vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16])

Definition: vp8dsp.h:38

check_mc

static void check_mc(VP8DSPContext *d)

Definition: vp8dsp.c:259

setdx

#define setdx(a, b, c, d)

Definition: vp8dsp.c:325

VP8DSPContext

Definition: vp8dsp.h:37

setdx2

#define setdx2(a, b, o, c, d, e)

Definition: vp8dsp.c:327

Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]

Definition: snow.txt:400

height

#define height

Definition: dsp.h:89

dst

uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst

Definition: dsp.h:87

size

int size

Definition: twinvq_data.h:10344

VP8DSPContext::vp8_idct_dc_add

void(* vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride)