FFmpeg: tests/checkasm/sw_scale.c Source File

FFmpeg

[フレーム]

sw_scale.c

Go to the documentation of this file.

1 /*

2 *

3 * This file is part of FFmpeg.

4 *

5 * FFmpeg is free software; you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License as published by

7 * the Free Software Foundation; either version 2 of the License, or

8 * (at your option) any later version.

9 *

10 * FFmpeg is distributed in the hope that it will be useful,

11 * but WITHOUT ANY WARRANTY; without even the implied warranty of

12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 * GNU General Public License for more details.

14 *

15 * You should have received a copy of the GNU General Public License along

16 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

18 */

20 #include <string.h>

22 #include "libavutil/common.h"

23 #include "libavutil/intreadwrite.h"

24 #include "libavutil/mem_internal.h"

26 #include "libswscale/swscale.h"

27 #include "libswscale/swscale_internal.h"

29 #include "checkasm.h"

31 #define randomize_buffers(buf, size) \

32 do { \

33 int j; \

34 for (j = 0; j < size; j+=4) \

35 AV_WN32(buf + j, rnd()); \

36 } while (0)

38 static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,

39 const int16_t **src, uint8_t *dest, int dstW,

40 const uint8_t *dither, int offset)

41 {

42 // This corresponds to the yuv2planeX_8_c function

43 int i;

44 for (i = 0; i < dstW; i++) {

45 int val = dither[(i + offset) & 7] << 12;

46 int j;

47 for (j = 0; j < filterSize; j++)

48 val += src[j][i] * filter[j];

50 dest[i]= av_clip_uint8(val >> 19);

51 }

52 }

54 static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)

55 {

56 for (size_t i = 0; i < n; i++) {

57 if (abs(ref[i] - test[i]) > accuracy)

58 return 1;

59 }

60 return 0;

61 }

63 static void print_data(uint8_t *p, size_t len, size_t offset)

64 {

65 size_t i = 0;

66 for (; i < len; i++) {

67 if (i % 8 == 0) {

68 printf("0x%04zx: ", i+offset);

69 }

70 printf("0x%02x ", (uint32_t) p[i]);

71 if (i % 8 == 7) {

72 printf("\n");

73 }

74 }

75 if (i % 8 != 0) {

76 printf("\n");

77 }

78 }

80 static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)

81 {

82 for (size_t i = 0; i < len; i++) {

83 if (a[i] != b[i]) {

84 size_t offset_of_mismatch = i;

85 size_t offset;

86 if (i >= 8) i-=8;

87 offset = i & (~7);

88 printf("test a:\n");

89 print_data(&a[offset], 32, offset);

90 printf("\ntest b:\n");

91 print_data(&b[offset], 32, offset);

92 printf("\n");

93 return offset_of_mismatch;

94 }

95 }

96 return len;

97 }

99 static void check_yuv2yuv1(int accurate)

100 {

101 struct SwsContext *ctx;

102 int osi, isi;

103 int dstW, offset;

104 size_t fail_offset;

105 const int input_sizes[] = {8, 24, 128, 144, 256, 512};

106 const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);

107 #define LARGEST_INPUT_SIZE 512

108

109 const int offsets[] = {0, 3, 8, 11, 16, 19};

110 const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);

111 const char *accurate_str = (accurate) ? "accurate" : "approximate";

112

113 declare_func(void,

114 const int16_t *src, uint8_t *dest,

115 int dstW, const uint8_t *dither, int offset);

116

117 LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);

118 LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);

119 LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);

120 LOCAL_ALIGNED_8(uint8_t, dither, [8]);

121

122 randomize_buffers((uint8_t*)dither, 8);

123 randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));

124 ctx = sws_alloc_context();

125 if (accurate)

126 ctx->flags |= SWS_ACCURATE_RND;

127 if (sws_init_context(ctx, NULL, NULL) < 0)

128 fail();

129

130 ff_sws_init_scale(ctx);

131 for (isi = 0; isi < INPUT_SIZES; ++isi) {

132 dstW = input_sizes[isi];

133 for (osi = 0; osi < OFFSET_SIZES; osi++) {

134 offset = offsets[osi];

135 if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d_%s", offset, dstW, accurate_str)){

136 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));

137 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));

138

139 call_ref(src_pixels, dst0, dstW, dither, offset);

140 call_new(src_pixels, dst1, dstW, dither, offset);

141 if (cmp_off_by_n(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {

142 fail();

143 printf("failed: yuv2yuv1_%d_%di_%s\n", offset, dstW, accurate_str);

144 fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));

145 printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",

146 (int) src_pixels[fail_offset],

147 (int) dither[(fail_offset + fail_offset) & 7],

148 (int) dst0[fail_offset],

149 (int) dst1[fail_offset]);

150 }

151 if(dstW == LARGEST_INPUT_SIZE)

152 bench_new(src_pixels, dst1, dstW, dither, offset);

153 }

154 }

155 }

156 sws_freeContext(ctx);

157 }

158

159 static void check_yuv2yuvX(int accurate)

160 {

161 struct SwsContext *ctx;

162 int fsi, osi, isi, i, j;

163 int dstW;

164 #define LARGEST_FILTER 16

165 // ff_yuv2planeX_8_sse2 can't handle odd filter sizes

166 const int filter_sizes[] = {2, 4, 8, 16};

167 const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);

168 #define LARGEST_INPUT_SIZE 512

169 static const int input_sizes[] = {8, 24, 128, 144, 256, 512};

170 const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);

171 const char *accurate_str = (accurate) ? "accurate" : "approximate";

172

173 declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,

174 int filterSize, const int16_t **src, uint8_t *dest,

175 int dstW, const uint8_t *dither, int offset);

176

177 const int16_t **src;

178 LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);

179 LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);

180 LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);

181 LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);

182 LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]);

183 union VFilterData{

184 const int16_t *src;

185 uint16_t coeff[8];

186 } *vFilterData;

187 uint8_t d_val = rnd();

188 memset(dither, d_val, LARGEST_INPUT_SIZE);

189 randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));

190 ctx = sws_alloc_context();

191 if (accurate)

192 ctx->flags |= SWS_ACCURATE_RND;

193 if (sws_init_context(ctx, NULL, NULL) < 0)

194 fail();

195

196 ff_sws_init_scale(ctx);

197 for(isi = 0; isi < INPUT_SIZES; ++isi){

198 dstW = input_sizes[isi];

199 for(osi = 0; osi < 64; osi += 16){

200 if (dstW <= osi)

201 continue;

202 for (fsi = 0; fsi < FILTER_SIZES; ++fsi) {

203 // Generate filter coefficients for the given filter size,

204 // with some properties:

205 // - The coefficients add up to the intended sum (4096, 1<<12)

206 // - The coefficients contain negative values

207 // - The filter intermediates don't overflow for worst case

208 // inputs (all positive coefficients are coupled with

209 // input_max and all negative coefficients with input_min,

210 // or vice versa).

211 // Produce a filter with all coefficients set to

212 // -((1<<12)/(filter_size-1)) except for one (randomly chosen)

213 // which is set to ((1<<13)-1).

214 for (i = 0; i < filter_sizes[fsi]; ++i)

215 filter_coeff[i] = -((1 << 12) / (filter_sizes[fsi] - 1));

216 filter_coeff[rnd() % filter_sizes[fsi]] = (1 << 13) - 1;

217

218 src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);

219 vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));

220 memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));

221 for (i = 0; i < filter_sizes[fsi]; ++i) {

222 src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];

223 vFilterData[i].src = src[i] - osi;

224 for(j = 0; j < 4; ++j)

225 vFilterData[i].coeff[j + 4] = filter_coeff[i];

226 }

227 if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d_%s", filter_sizes[fsi], osi, dstW, accurate_str)){

228 // use vFilterData for the mmx function

229 const int16_t *filter = ctx->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];

230 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));

231 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));

232

233 // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that

234 // function or not, so we can't pass it the parameters correctly.

235 yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);

236

237 call_new(filter, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);

238 if (cmp_off_by_n(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]), accurate ? 0 : 2)) {

239 fail();

240 printf("failed: yuv2yuvX_%d_%d_%d_%s\n", filter_sizes[fsi], osi, dstW, accurate_str);

241 show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));

242 }

243 if(dstW == LARGEST_INPUT_SIZE)

244 bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);

245

246 }

247 av_freep(&src);

248 av_freep(&vFilterData);

249 }

250 }

251 }

252 sws_freeContext(ctx);

253 #undef FILTER_SIZES

254 }

255

256 #undef SRC_PIXELS

257 #define SRC_PIXELS 512

258

259 static void check_hscale(void)

260 {

261 #define MAX_FILTER_WIDTH 40

262 #define FILTER_SIZES 6

263 static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };

264

265 #define HSCALE_PAIRS 2

266 static const int hscale_pairs[HSCALE_PAIRS][2] = {

267 { 8, 14 },

268 { 8, 18 },

269 };

270

271 #define LARGEST_INPUT_SIZE 512

272 #define INPUT_SIZES 6

273 static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};

274

275 int i, j, fsi, hpi, width, dstWi;

276 struct SwsContext *ctx;

277

278 // padded

279 LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);

280 LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);

281 LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);

282

283 // padded

284 LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);

285 LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);

286 LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);

287 LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);

288

289 // The dst parameter here is either int16_t or int32_t but we use void* to

290 // just cover both cases.

291 declare_func(void, void *c, void *dst, int dstW,

292 const uint8_t *src, const int16_t *filter,

293 const int32_t *filterPos, int filterSize);

294

295 ctx = sws_alloc_context();

296 if (sws_init_context(ctx, NULL, NULL) < 0)

297 fail();

298

299 randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);

300

301 for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {

302 for (fsi = 0; fsi < FILTER_SIZES; fsi++) {

303 for (dstWi = 0; dstWi < INPUT_SIZES; dstWi++) {

304 width = filter_sizes[fsi];

305

306 ctx->srcBpc = hscale_pairs[hpi][0];

307 ctx->dstBpc = hscale_pairs[hpi][1];

308 ctx->hLumFilterSize = ctx->hChrFilterSize = width;

309

310 for (i = 0; i < SRC_PIXELS; i++) {

311 filterPos[i] = i;

312 filterPosAvx[i] = i;

313

314 // These filter cofficients are chosen to try break two corner

315 // cases, namely:

316 //

317 // - Negative filter coefficients. The filters output signed

318 // values, and it should be possible to end up with negative

319 // output values.

320 //

321 // - Positive clipping. The hscale filter function has clipping

322 // at (1<<15) - 1

323 //

324 // The coefficients sum to the 1.0 point for the hscale

325 // functions (1 << 14).

326

327 for (j = 0; j < width; j++) {

328 filter[i * width + j] = -((1 << 14) / (width - 1));

329 }

330 filter[i * width + (rnd() % width)] = ((1 << 15) - 1);

331 }

332

333 for (i = 0; i < MAX_FILTER_WIDTH; i++) {

334 // These values should be unused in SIMD implementations but

335 // may still be read, random coefficients here should help show

336 // issues where they are used in error.

337

338 filter[SRC_PIXELS * width + i] = rnd();

339 }

340 ctx->dstW = ctx->chrDstW = input_sizes[dstWi];

341 ff_sws_init_scale(ctx);

342 memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));

343 ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, ctx->dstW);

344

345 if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {

346 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));

347 memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));

348

349 call_ref(NULL, dst0, ctx->dstW, src, filter, filterPos, width);

350 call_new(NULL, dst1, ctx->dstW, src, filterAvx2, filterPosAvx, width);

351 if (memcmp(dst0, dst1, ctx->dstW * sizeof(dst0[0])))

352 fail();

353 bench_new(NULL, dst0, ctx->dstW, src, filter, filterPosAvx, width);

354 }

355 }

356 }

357 }

358 sws_freeContext(ctx);

359 }

360

361 void checkasm_check_sw_scale(void)

362 {

363 check_hscale();

364 report("hscale");

365 check_yuv2yuv1(0);

366 check_yuv2yuv1(1);

367 report("yuv2yuv1");

368 check_yuv2yuvX(0);

369 check_yuv2yuvX(1);

370 report("yuv2yuvX");

371 }

FILTER_SIZES

#define FILTER_SIZES

declare_func_emms

#define declare_func_emms(cpu_flags, ret,...)

Definition: checkasm.h:176

check_yuv2yuv1

static void check_yuv2yuv1(int accurate)

Definition: sw_scale.c:99

SwsContext::dstW

int dstW

Width of destination luma/alpha planes.

Definition: swscale_internal.h:514

mem_internal.h

check_func

#define check_func(func,...)

Definition: checkasm.h:170

#define b

Definition: input.c:41

test

Definition: idctdsp.c:35

filter

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter

Definition: filter_design.txt:228

call_ref

#define call_ref(...)

Definition: checkasm.h:185

av_malloc

#define av_malloc(s)

Definition: tableprint_vlc.h:30

print_data

static void print_data(uint8_t *p, size_t len, size_t offset)

Definition: sw_scale.c:63

fail

#define fail()

Definition: checkasm.h:179

checkasm.h

val

static double val(void *priv, double ch)

Definition: aeval.c:78

check_hscale

static void check_hscale(void)

Definition: sw_scale.c:259

rnd

#define rnd()

Definition: checkasm.h:163

width

#define width

intreadwrite.h

offsets

static const int offsets[]

Definition: hevc_pel.c:34

AVFormatContext::flags

int flags

Flags modifying the (de)muxer behaviour.

Definition: avformat.h:1406

LARGEST_FILTER

#define LARGEST_FILTER

cmp_off_by_n

static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)

Definition: sw_scale.c:54

LOCAL_ALIGNED_16

#define LOCAL_ALIGNED_16(t, v,...)

Definition: mem_internal.h:150

ctx

AVFormatContext * ctx

Definition: movenc.c:48

yuv2planeX_8_ref

static void yuv2planeX_8_ref(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)

Definition: sw_scale.c:38

LOCAL_ALIGNED_8

#define LOCAL_ALIGNED_8(t, v,...)

Definition: mem_internal.h:144

HSCALE_PAIRS

#define HSCALE_PAIRS

SRC_PIXELS

#define SRC_PIXELS

Definition: sw_scale.c:257

call_new

#define call_new(...)

Definition: checkasm.h:288

NULL

#define NULL

Definition: coverity.c:32

LOCAL_ALIGNED_32

#define LOCAL_ALIGNED_32(t, v,...)

Definition: mem_internal.h:156

sws_alloc_context

struct SwsContext * sws_alloc_context(void)

Allocate an empty SwsContext.

Definition: utils.c:1180

abs

#define abs(x)

Definition: cuda_runtime.h:35

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

ff_sws_init_scale

void ff_sws_init_scale(SwsContext *c)

Definition: swscale.c:590

ff_shuffle_filter_coefficients

int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)

Definition: utils.c:272

check_yuv2yuvX

static void check_yuv2yuvX(int accurate)

Definition: sw_scale.c:159

printf

printf("static const uint8_t my_array[100] = {\n")

The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a

Definition: undefined.txt:41

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

SWS_ACCURATE_RND

#define SWS_ACCURATE_RND

Definition: swscale.h:90

show_differences

static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)

Definition: sw_scale.c:80

report

#define report