1 /*
2 * Copyright (c) 2022 Ben Avison
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #include <string.h>
22
24
26
31
32 #define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) },
33 #define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height },
34
35 typedef struct {
41
47
49 12, 12, 12, 12, 12, 12, 12, 12,
50 16, 15, 9, 4, -4, -9, -15, -16,
51 16, 6, -6, -16, -16, -6, 6, 16,
52 15, -4, -16, -9, 9, 16, 4, -15,
53 12, -12, -12, 12, 12, -12, -12, 12,
54 9, -16, 4, 15, -15, -4, 16, -9,
55 6, -16, 16, -6, -6, 16, -16, 6,
56 4, -9, 15, -16, 16, -15, 9, -4
57 } };
58
60 17, 17, 17, 17,
61 22, 10, -10, -22,
62 17, -17, -17, 17,
63 10, -22, 22, -10
64 } };
65
67 12, 16, 16, 15, 12, 9, 6, 4,
68 12, 15, 6, -4, -12, -16, -16, -9,
69 12, 9, -6, -16, -12, 4, 16, 15,
70 12, 4, -16, -9, 12, 15, -6, -16,
71 12, -4, -16, 9, 12, -15, -6, 16,
72 12, -9, -6, 16, -12, -4, 16, -15,
73 12, -15, 6, 4, -12, 16, -16, 9,
74 12, -16, 16, -15, 12, -9, 6, -4
75 } };
76
78 17, 22, 17, 10,
79 17, 10, -17, -22,
80 17, -10, -17, 22,
81 17, -22, 17, -10
82 } };
83
85 {
88 fprintf(stderr, "Memory allocation failure\n");
89 exit(EXIT_FAILURE);
90 }
94 }
95
97 {
99 if (
a->width !=
b->height) {
100 fprintf(stderr, "Incompatible multiplication\n");
101 exit(EXIT_FAILURE);
102 }
104 for (
int j = 0; j <
out->height; ++j)
105 for (
int i = 0;
i <
out->width; ++
i) {
106 float sum = 0;
107 for (
int k = 0; k <
a->width; ++k)
108 sum +=
a->d[j *
a->width + k] *
b->d[k *
b->width +
i];
109 out->d[j *
out->width +
i] = sum;
110 }
112 }
113
115 {
116 for (
int j = 0; j <
a->height; ++j)
117 for (
int i = 0;
i <
a->width; ++
i) {
118 float *p =
a->d + j *
a->width +
i;
119 *p *= 64;
121 *p /= (const unsigned[]) { 289, 292, 289, 292 } [j];
122 else
123 *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j];
125 *p /= (
const unsigned[]) { 289, 292, 289, 292 } [
i];
126 else
127 *p /= (
const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [
i];
128 }
129 }
130
132 {
133 for (
int j = 0; j <
a->height; ++j)
134 for (
int i = 0;
i <
a->width; ++
i) {
135 float *p =
a->d + j *
a->width +
i;
136 *p = rintf(*p / by);
137 }
138 }
139
141 {
142 for (
int j = 4; j <
a->height; ++j)
143 for (
int i = 0;
i <
a->width; ++
i) {
144 float *p =
a->d + j *
a->width +
i;
145 *p += 1;
146 }
147 }
148
149 /* The VC-1 spec places restrictions on the values permitted at three
150 * different stages:
151 * - D: the input coefficients in frequency domain
152 * - E: the intermediate coefficients, inverse-transformed only horizontally
153 * - R: the fully inverse-transformed coefficients
154 *
155 * To fully cater for the ranges specified requires various intermediate
156 * values to be held to 17-bit precision; yet these conditions do not appear
157 * to be utilised in real-world streams. At least some assembly
158 * implementations have chosen to restrict these values to 16-bit precision,
159 * to accelerate the decoding of real-world streams at the cost of strict
160 * adherence to the spec. To avoid our test marking these as failures,
161 * reduce our random inputs.
162 */
163 #define ATTENUATION 4
164
166 {
177 /* Rare, so simply try again */
182 }
183 }
188 /* Rare, so simply try again */
194 }
200 /* Rare, so simply try again */
207 }
213 }
214
215 #define RANDOMIZE_BUFFER16(name, size) \
216 do { \
217 int i; \
218 for (i = 0; i < size; ++i) { \
219 uint16_t r = rnd(); \
220 AV_WN16A(name##0 + i, r); \
221 AV_WN16A(name##1 + i, r); \
222 } \
223 } while (0)
224
225 #define RANDOMIZE_BUFFER8(name, size) \
226 do { \
227 int i; \
228 for (i = 0; i < size; ++i) { \
229 uint8_t r = rnd(); \
230 name##0[i] = r; \
231 name##1[i] = r; \
232 } \
233 } while (0)
234
235 #define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \
236 do { \
237 uint8_t *p##0 = name##0, *p##1 = name##1; \
238 int i = (size); \
239 while (i-- > 0) { \
240 int x = 0x80 | (rnd() & 0x7F); \
241 x >>= rnd() % 9; \
242 if (rnd() & 1) \
243 x = -x; \
244 *p##1++ = *p##0++ = 0x80 + x; \
245 } \
246 } while (0)
247
249 {
250 /* Inverse transform input coefficients are stored in a 16-bit buffer
251 * with row stride of 8 coefficients irrespective of transform size.
252 * vc1_inv_trans_8x8 differs from the others in two ways: coefficients
253 * are stored in column-major order, and the outputs are written back
254 * to the input buffer, so we oversize it slightly to catch overruns. */
257
259
261
262 if (
check_func(
h.vc1_inv_trans_8x8,
"vc1dsp.vc1_inv_trans_8x8")) {
267 for (int j = 0; j < 8; ++j)
268 for (
int i = 0;
i < 8; ++
i) {
269 int idx = 8 +
i * 8 + j;
270 inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->
d[j * 8 +
i];
271 }
274 if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t)))
278 }
279 }
280
282 {
283 /* Inverse transform input coefficients are stored in a 16-bit buffer
284 * with row stride of 8 coefficients irrespective of transform size. */
287
288 /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and
289 * added with saturation to an array of unsigned 8-bit values. Oversize
290 * this by 8 samples left and right and one row above and below. */
293
295
304 };
305
307
309 void (*
func)(uint8_t *, ptrdiff_t, int16_t *) = *(
void **)((intptr_t) &
h +
tests[t].
offset);
316 for (
int j = 0; j <
tests[t].height; ++j)
317 for (
int i = 0;
i <
tests[t].width; ++
i) {
319 inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->
d[j *
tests[t].width +
i];
320 }
321 call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0);
322 call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1);
323 if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24))
325 bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8);
327 }
328 }
329 }
330
332 {
333 /* Deblocking filter buffers are big enough to hold a 16x16 block,
334 * plus 16 columns left and 4 rows above to hold filter inputs
335 * (depending on whether v or h neighbouring block edge, oversized
336 * horizontally to maintain 16-byte alignment) plus 16 columns and
337 * 4 rows below to catch write overflows */
340
342
350 };
351
353
358 for (int count = 1000; count > 0; --count) {
359 int pq =
rnd() % 31 + 1;
361 call_ref(filter_buf0 + 4 * 48 + 16, 48, pq);
362 call_new(filter_buf1 + 4 * 48 + 16, 48, pq);
363 if (memcmp(filter_buf0, filter_buf1, 24 * 48))
365 }
366 }
367 for (int j = 0; j < 24; ++j)
368 for (
int i = 0;
i < 48; ++
i)
369 filter_buf1[j * 48 +
i] = 0x60 + 0x40 * (
i >= 16 && j >= 4);
371 bench_new(filter_buf1 + 4 * 48 + 16, 48, 1);
373 bench_new(filter_buf1 + 4 * 48 + 16, 48, 31);
374 }
375 }
376
377 #define TEST_UNESCAPE \
378 do { \
379 for (int count = 100; count > 0; --count) { \
380 escaped_offset = rnd() & 7; \
381 unescaped_offset = rnd() & 7; \
382 escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \
383 RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \
384 len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \
385 len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \
386 if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \
387 fail(); \
388 } \
389 } while (0)
390
392 {
393 /* This appears to be a typical length of buffer in use */
394 #define LOG2_UNESCAPE_BUF_SIZE 17
395 #define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)
400
402
404
405 if (
check_func(
h.vc1_unescape_buffer,
"vc1dsp.vc1_unescape_buffer")) {
406 int len0, len1, escaped_offset, unescaped_offset, escaped_len;
408
409 /* Test data which consists of escapes sequences packed as tightly as possible */
411 escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);
413
414 /* Test random data */
417
418 /* Test data with escape sequences at random intervals */
420 int gap, gap_msb;
421 escaped1[x+0] = escaped0[x+0] = 0;
422 escaped1[x+1] = escaped0[x+1] = 0;
423 escaped1[x+2] = escaped0[x+2] = 3;
424 escaped1[x+3] = escaped0[x+3] =
rnd() & 3;
425 gap_msb = 2
u << (
rnd() % 8);
426 gap = (
rnd() &~ -gap_msb) | gap_msb;
427 x += gap;
428 }
430
431 /* Test data which is known to contain no escape sequences */
435
436 /* Benchmark the no-escape-sequences case */
438 }
439 }
440
442 {
446
449
451 report(
"unescape_buffer");
452 }