FFmpeg: tests/checkasm/vc1dsp.c Source File

FFmpeg

[フレーム]

vc1dsp.c

Go to the documentation of this file.

1 /*

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or modify

7 * it under the terms of the GNU General Public License as published by

8 * the Free Software Foundation; either version 2 of the License, or

9 * (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14 * GNU General Public License for more details.

15 *

16 * You should have received a copy of the GNU General Public License along

17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

19 */

21 #include <string.h>

23 #include "checkasm.h"

25 #include "libavcodec/vc1dsp.h"

27 #include "libavutil/common.h"

28 #include "libavutil/internal.h"

29 #include "libavutil/intreadwrite.h"

30 #include "libavutil/mem_internal.h"

32 #define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) },

33 #define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height },

35 typedef struct {

36 const char *name;

37 size_t offset;

38 int width;

39 int height;

40 } test;

42 typedef struct matrix {

43 size_t width;

44 size_t height;

45 float d[];

46 } matrix;

48 static const matrix T8 = { 8, 8, {

49 12, 12, 12, 12, 12, 12, 12, 12,

50 16, 15, 9, 4, -4, -9, -15, -16,

51 16, 6, -6, -16, -16, -6, 6, 16,

52 15, -4, -16, -9, 9, 16, 4, -15,

53 12, -12, -12, 12, 12, -12, -12, 12,

54 9, -16, 4, 15, -15, -4, 16, -9,

55 6, -16, 16, -6, -6, 16, -16, 6,

56 4, -9, 15, -16, 16, -15, 9, -4

57 } };

59 static const matrix T4 = { 4, 4, {

60 17, 17, 17, 17,

61 22, 10, -10, -22,

62 17, -17, -17, 17,

63 10, -22, 22, -10

64 } };

66 static const matrix T8t = { 8, 8, {

67 12, 16, 16, 15, 12, 9, 6, 4,

68 12, 15, 6, -4, -12, -16, -16, -9,

69 12, 9, -6, -16, -12, 4, 16, 15,

70 12, 4, -16, -9, 12, 15, -6, -16,

71 12, -4, -16, 9, 12, -15, -6, 16,

72 12, -9, -6, 16, -12, -4, 16, -15,

73 12, -15, 6, 4, -12, 16, -16, 9,

74 12, -16, 16, -15, 12, -9, 6, -4

75 } };

77 static const matrix T4t = { 4, 4, {

78 17, 22, 17, 10,

79 17, 10, -17, -22,

80 17, -10, -17, 22,

81 17, -22, 17, -10

82 } };

84 static matrix *new_matrix(size_t width, size_t height)

85 {

86 matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float));

87 if (out == NULL) {

88 fprintf(stderr, "Memory allocation failure\n");

89 exit(EXIT_FAILURE);

90 }

91 out->width = width;

92 out->height = height;

93 return out;

94 }

96 static matrix *multiply(const matrix *a, const matrix *b)

97 {

98 matrix *out;

99 if (a->width != b->height) {

100 fprintf(stderr, "Incompatible multiplication\n");

101 exit(EXIT_FAILURE);

102 }

103 out = new_matrix(b->width, a->height);

104 for (int j = 0; j < out->height; ++j)

105 for (int i = 0; i < out->width; ++i) {

106 float sum = 0;

107 for (int k = 0; k < a->width; ++k)

108 sum += a->d[j * a->width + k] * b->d[k * b->width + i];

109 out->d[j * out->width + i] = sum;

110 }

111 return out;

112 }

113

114 static void normalise(matrix *a)

115 {

116 for (int j = 0; j < a->height; ++j)

117 for (int i = 0; i < a->width; ++i) {

118 float *p = a->d + j * a->width + i;

119 *p *= 64;

120 if (a->height == 4)

121 *p /= (const unsigned[]) { 289, 292, 289, 292 } [j];

122 else

123 *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j];

124 if (a->width == 4)

125 *p /= (const unsigned[]) { 289, 292, 289, 292 } [i];

126 else

127 *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i];

128 }

129 }

130

131 static void divide_and_round_nearest(matrix *a, float by)

132 {

133 for (int j = 0; j < a->height; ++j)

134 for (int i = 0; i < a->width; ++i) {

135 float *p = a->d + j * a->width + i;

136 *p = rintf(*p / by);

137 }

138 }

139

140 static void tweak(matrix *a)

141 {

142 for (int j = 4; j < a->height; ++j)

143 for (int i = 0; i < a->width; ++i) {

144 float *p = a->d + j * a->width + i;

145 *p += 1;

146 }

147 }

148

149 /* The VC-1 spec places restrictions on the values permitted at three

150 * different stages:

151 * - D: the input coefficients in frequency domain

152 * - E: the intermediate coefficients, inverse-transformed only horizontally

153 * - R: the fully inverse-transformed coefficients

154 *

155 * To fully cater for the ranges specified requires various intermediate

156 * values to be held to 17-bit precision; yet these conditions do not appear

157 * to be utilised in real-world streams. At least some assembly

158 * implementations have chosen to restrict these values to 16-bit precision,

159 * to accelerate the decoding of real-world streams at the cost of strict

160 * adherence to the spec. To avoid our test marking these as failures,

161 * reduce our random inputs.

162 */

163 #define ATTENUATION 4

164

165 static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height)

166 {

167 matrix *raw, *tmp, *D, *E, *R;

168 raw = new_matrix(width, height);

169 for (int i = 0; i < width * height; ++i)

170 raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION;

171 tmp = multiply(height == 8 ? &T8 : &T4, raw);

172 D = multiply(tmp, width == 8 ? &T8t : &T4t);

173 normalise(D);

174 divide_and_round_nearest(D, 1);

175 for (int i = 0; i < width * height; ++i) {

176 if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) {

177 /* Rare, so simply try again */

178 av_free(raw);

179 av_free(tmp);

180 av_free(D);

181 return generate_inverse_quantized_transform_coefficients(width, height);

182 }

183 }

184 E = multiply(D, width == 8 ? &T8 : &T4);

185 divide_and_round_nearest(E, 8);

186 for (int i = 0; i < width * height; ++i)

187 if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) {

188 /* Rare, so simply try again */

189 av_free(raw);

190 av_free(tmp);

191 av_free(D);

192 av_free(E);

193 return generate_inverse_quantized_transform_coefficients(width, height);

194 }

195 R = multiply(height == 8 ? &T8t : &T4t, E);

196 tweak(R);

197 divide_and_round_nearest(R, 128);

198 for (int i = 0; i < width * height; ++i)

199 if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) {

200 /* Rare, so simply try again */

201 av_free(raw);

202 av_free(tmp);

203 av_free(D);

204 av_free(E);

205 av_free(R);

206 return generate_inverse_quantized_transform_coefficients(width, height);

207 }

208 av_free(raw);

209 av_free(tmp);

210 av_free(E);

211 av_free(R);

212 return D;

213 }

214

215 #define RANDOMIZE_BUFFER16(name, size) \

216 do { \

217 int i; \

218 for (i = 0; i < size; ++i) { \

219 uint16_t r = rnd(); \

220 AV_WN16A(name##0 + i, r); \

221 AV_WN16A(name##1 + i, r); \

222 } \

223 } while (0)

224

225 #define RANDOMIZE_BUFFER8(name, size) \

226 do { \

227 int i; \

228 for (i = 0; i < size; ++i) { \

229 uint8_t r = rnd(); \

230 name##0[i] = r; \

231 name##1[i] = r; \

232 } \

233 } while (0)

234

235 #define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \

236 do { \

237 uint8_t *p##0 = name##0, *p##1 = name##1; \

238 int i = (size); \

239 while (i-- > 0) { \

240 int x = 0x80 | (rnd() & 0x7F); \

241 x >>= rnd() % 9; \

242 if (rnd() & 1) \

243 x = -x; \

244 *p##1++ = *p##0++ = 0x80 + x; \

245 } \

246 } while (0)

247

248 static void check_inv_trans_inplace(void)

249 {

250 /* Inverse transform input coefficients are stored in a 16-bit buffer

251 * with row stride of 8 coefficients irrespective of transform size.

252 * vc1_inv_trans_8x8 differs from the others in two ways: coefficients

253 * are stored in column-major order, and the outputs are written back

254 * to the input buffer, so we oversize it slightly to catch overruns. */

255 LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]);

256 LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]);

257

258 VC1DSPContext h;

259

260 ff_vc1dsp_init(&h);

261

262 if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) {

263 matrix *coeffs;

264 declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *);

265 RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8);

266 coeffs = generate_inverse_quantized_transform_coefficients(8, 8);

267 for (int j = 0; j < 8; ++j)

268 for (int i = 0; i < 8; ++i) {

269 int idx = 8 + i * 8 + j;

270 inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i];

271 }

272 call_ref(inv_trans_in0 + 8);

273 call_new(inv_trans_in1 + 8);

274 if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t)))

275 fail();

276 bench_new(inv_trans_in1 + 8);

277 av_free(coeffs);

278 }

279 }

280

281 static void check_inv_trans_adding(void)

282 {

283 /* Inverse transform input coefficients are stored in a 16-bit buffer

284 * with row stride of 8 coefficients irrespective of transform size. */

285 LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]);

286 LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]);

287

288 /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and

289 * added with saturation to an array of unsigned 8-bit values. Oversize

290 * this by 8 samples left and right and one row above and below. */

291 LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]);

292 LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]);

293

294 VC1DSPContext h;

295

296 const test tests[] = {

297 VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4)

298 VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8)

299 VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4)

300 VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8)

301 VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4)

302 VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8)

303 VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4)

304 };

305

306 ff_vc1dsp_init(&h);

307

308 for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {

309 void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset);

310 if (check_func(func, "vc1dsp.%s", tests[t].name)) {

311 matrix *coeffs;

312 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *);

313 RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8);

314 RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24);

315 coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height);

316 for (int j = 0; j < tests[t].height; ++j)

317 for (int i = 0; i < tests[t].width; ++i) {

318 int idx = j * 8 + i;

319 inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i];

320 }

321 call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0);

322 call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1);

323 if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24))

324 fail();

325 bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8);

326 av_free(coeffs);

327 }

328 }

329 }

330

331 static void check_loop_filter(void)

332 {

333 /* Deblocking filter buffers are big enough to hold a 16x16 block,

334 * plus 16 columns left and 4 rows above to hold filter inputs

335 * (depending on whether v or h neighbouring block edge, oversized

336 * horizontally to maintain 16-byte alignment) plus 16 columns and

337 * 4 rows below to catch write overflows */

338 LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]);

339 LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]);

340

341 VC1DSPContext h;

342

343 const test tests[] = {

344 VC1DSP_TEST(vc1_v_loop_filter4)

345 VC1DSP_TEST(vc1_h_loop_filter4)

346 VC1DSP_TEST(vc1_v_loop_filter8)

347 VC1DSP_TEST(vc1_h_loop_filter8)

348 VC1DSP_TEST(vc1_v_loop_filter16)

349 VC1DSP_TEST(vc1_h_loop_filter16)

350 };

351

352 ff_vc1dsp_init(&h);

353

354 for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {

355 void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset);

356 declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);

357 if (check_func(func, "vc1dsp.%s", tests[t].name)) {

358 for (int count = 1000; count > 0; --count) {

359 int pq = rnd() % 31 + 1;

360 RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48);

361 call_ref(filter_buf0 + 4 * 48 + 16, 48, pq);

362 call_new(filter_buf1 + 4 * 48 + 16, 48, pq);

363 if (memcmp(filter_buf0, filter_buf1, 24 * 48))

364 fail();

365 }

366 }

367 for (int j = 0; j < 24; ++j)

368 for (int i = 0; i < 48; ++i)

369 filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4);

370 if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name))

371 bench_new(filter_buf1 + 4 * 48 + 16, 48, 1);

372 if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name))

373 bench_new(filter_buf1 + 4 * 48 + 16, 48, 31);

374 }

375 }

376

377 #define TEST_UNESCAPE \

378 do { \

379 for (int count = 100; count > 0; --count) { \

380 escaped_offset = rnd() & 7; \

381 unescaped_offset = rnd() & 7; \

382 escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \

383 RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \

384 len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \

385 len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \

386 if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \

387 fail(); \

388 } \

389 } while (0)

390

391 static void check_unescape(void)

392 {

393 /* This appears to be a typical length of buffer in use */

394 #define LOG2_UNESCAPE_BUF_SIZE 17

395 #define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)

396 LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]);

397 LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]);

398 LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]);

399 LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]);

400

401 VC1DSPContext h;

402

403 ff_vc1dsp_init(&h);

404

405 if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) {

406 int len0, len1, escaped_offset, unescaped_offset, escaped_len;

407 declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *);

408

409 /* Test data which consists of escapes sequences packed as tightly as possible */

410 for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x)

411 escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);

412 TEST_UNESCAPE;

413

414 /* Test random data */

415 RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE);

416 TEST_UNESCAPE;

417

418 /* Test data with escape sequences at random intervals */

419 for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) {

420 int gap, gap_msb;

421 escaped1[x+0] = escaped0[x+0] = 0;

422 escaped1[x+1] = escaped0[x+1] = 0;

423 escaped1[x+2] = escaped0[x+2] = 3;

424 escaped1[x+3] = escaped0[x+3] = rnd() & 3;

425 gap_msb = 2u << (rnd() % 8);

426 gap = (rnd() &~ -gap_msb) | gap_msb;

427 x += gap;

428 }

429 TEST_UNESCAPE;

430

431 /* Test data which is known to contain no escape sequences */

432 memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE);

433 memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE);

434 TEST_UNESCAPE;

435

436 /* Benchmark the no-escape-sequences case */

437 bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1);

438 }

439 }

440

441 void checkasm_check_vc1dsp(void)

442 {

443 check_inv_trans_inplace();

444 check_inv_trans_adding();

445 report("inv_trans");

446

447 check_loop_filter();

448 report("loop_filter");

449

450 check_unescape();

451 report("unescape_buffer");

452 }

static const matrix T8

Definition: vc1dsp.c:48

RANDOMIZE_BUFFER16

#define RANDOMIZE_BUFFER16(name, size)

Definition: vc1dsp.c:215

func

int(* func)(AVBPrint *dst, const char *in, const char *arg)

Definition: jacosubdec.c:68

declare_func_emms

#define declare_func_emms(cpu_flags, ret,...)

Definition: checkasm.h:128

T8t

static const matrix T8t

Definition: vc1dsp.c:66

name

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name

Definition: writing_filters.txt:88

mem_internal.h

out

FILE * out

Definition: movenc.c:54

static const matrix T4

Definition: vc1dsp.c:59

#define u(width, name, range_min, range_max)

Definition: cbs_h2645.c:262

vc1dsp.h

matrix

Definition: vc1dsp.c:42

ATTENUATION

#define ATTENUATION

Definition: vc1dsp.c:163

test::height

int height

Definition: vc1dsp.c:39

tmp

static uint8_t tmp[11]

Definition: aes_ctr.c:28

#define R

Definition: huffyuvdsp.h:34

check_func

#define check_func(func,...)

Definition: checkasm.h:122

#define b

Definition: input.c:34

test

Definition: idctdsp.c:34

check_loop_filter

static void check_loop_filter(void)

Definition: vc1dsp.c:331

divide_and_round_nearest

static void divide_and_round_nearest(matrix *a, float by)

Definition: vc1dsp.c:131

call_ref

#define call_ref(...)

Definition: checkasm.h:137

D(D(float, sse)

Definition: rematrix_init.c:29

T4t

static const matrix T4t

Definition: vc1dsp.c:77

fail

#define fail()

Definition: checkasm.h:131

checkasm.h

rnd

#define rnd()

Definition: checkasm.h:115

tweak

static void tweak(matrix *a)

Definition: vc1dsp.c:140

FF_ARRAY_ELEMS

#define FF_ARRAY_ELEMS(a)

Definition: sinewin_tablegen.c:29

generate_inverse_quantized_transform_coefficients

static matrix * generate_inverse_quantized_transform_coefficients(size_t width, size_t height)

Definition: vc1dsp.c:165

width

#define width

intreadwrite.h

matrix::width

size_t width

Definition: vc1dsp.c:43

LOCAL_ALIGNED_16

#define LOCAL_ALIGNED_16(t, v,...)

Definition: mem_internal.h:130

#define E

Definition: avdct.c:32

matrix::height

size_t height

Definition: vc1dsp.c:44

LOCAL_ALIGNED_8

#define LOCAL_ALIGNED_8(t, v,...)

Definition: mem_internal.h:124

call_new

#define call_new(...)

Definition: checkasm.h:209

NULL

#define NULL

Definition: coverity.c:32

TEST_UNESCAPE

#define TEST_UNESCAPE

Definition: vc1dsp.c:377

check_unescape

static void check_unescape(void)

Definition: vc1dsp.c:391

ff_vc1dsp_init

av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)

Definition: vc1dsp.c:974

test::width

int width

Definition: vc1dsp.c:38

UNESCAPE_BUF_SIZE

#define UNESCAPE_BUF_SIZE

height

#define height

The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a

Definition: undefined.txt:41

matrix::d

float d[]

Definition: vc1dsp.c:45

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

RANDOMIZE_BUFFER8_MID_WEIGHTED

#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size)

Definition: vc1dsp.c:235

new_matrix

static matrix * new_matrix(size_t width, size_t height)