FFmpeg: tests/checkasm/sw_scale.c Source File

FFmpeg

[フレーム]

sw_scale.c

Go to the documentation of this file.

1 /*

2 *

3 * This file is part of FFmpeg.

4 *

5 * FFmpeg is free software; you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License as published by

7 * the Free Software Foundation; either version 2 of the License, or

8 * (at your option) any later version.

9 *

10 * FFmpeg is distributed in the hope that it will be useful,

11 * but WITHOUT ANY WARRANTY; without even the implied warranty of

12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 * GNU General Public License for more details.

14 *

15 * You should have received a copy of the GNU General Public License along

16 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,

17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

18 */

20 #include <string.h>

22 #include "libavutil/common.h"

23 #include "libavutil/intreadwrite.h"

24 #include "libavutil/mem_internal.h"

26 #include "libswscale/swscale.h"

27 #include "libswscale/swscale_internal.h"

29 #include "checkasm.h"

31 #define randomize_buffers(buf, size) \

32 do { \

33 int j; \

34 for (j = 0; j < size; j+=4) \

35 AV_WN32(buf + j, rnd()); \

36 } while (0)

38 // This reference function is the same approximate algorithm employed by the

39 // SIMD functions

40 static void ref_function(const int16_t *filter, int filterSize,

41 const int16_t **src, uint8_t *dest, int dstW,

42 const uint8_t *dither, int offset)

43 {

44 int i, d;

45 d = ((filterSize - 1) * 8 + dither[0]) >> 4;

46 for ( i = 0; i < dstW; i++) {

47 int16_t val = d;

48 int j;

49 union {

50 int val;

51 int16_t v[2];

52 } t;

53 for (j = 0; j < filterSize; j++){

54 t.val = (int)src[j][i + offset] * (int)filter[j];

55 val += t.v[1];

56 }

57 dest[i]= av_clip_uint8(val>>3);

58 }

59 }

61 static void check_yuv2yuvX(void)

62 {

63 struct SwsContext *ctx;

64 int fsi, osi, isi, i, j;

65 int dstW;

66 #define LARGEST_FILTER 16

67 #define FILTER_SIZES 4

68 static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};

69 #define LARGEST_INPUT_SIZE 512

70 #define INPUT_SIZES 6

71 static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};

73 declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,

74 int filterSize, const int16_t **src, uint8_t *dest,

75 int dstW, const uint8_t *dither, int offset);

77 const int16_t **src;

78 LOCAL_ALIGNED_16(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);

79 LOCAL_ALIGNED_16(int16_t, filter_coeff, [LARGEST_FILTER]);

80 LOCAL_ALIGNED_16(uint8_t, dst0, [LARGEST_INPUT_SIZE]);

81 LOCAL_ALIGNED_16(uint8_t, dst1, [LARGEST_INPUT_SIZE]);

82 LOCAL_ALIGNED_16(uint8_t, dither, [LARGEST_INPUT_SIZE]);

83 union VFilterData{

84 const int16_t *src;

85 uint16_t coeff[8];

86 } *vFilterData;

87 uint8_t d_val = rnd();

88 memset(dither, d_val, LARGEST_INPUT_SIZE);

89 randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));

90 randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));

91 ctx = sws_alloc_context();

92 if (sws_init_context(ctx, NULL, NULL) < 0)

93 fail();

95 ff_sws_init_scale(ctx);

96 for(isi = 0; isi < INPUT_SIZES; ++isi){

97 dstW = input_sizes[isi];

98 for(osi = 0; osi < 64; osi += 16){

99 for(fsi = 0; fsi < FILTER_SIZES; ++fsi){

100 src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);

101 vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));

102 memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));

103 for(i = 0; i < filter_sizes[fsi]; ++i){

104 src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];

105 vFilterData[i].src = src[i];

106 for(j = 0; j < 4; ++j)

107 vFilterData[i].coeff[j + 4] = filter_coeff[i];

108 }

109 if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){

110 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));

111 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));

112

113 // The reference function is not the scalar function selected when mmx

114 // is deactivated as the SIMD functions do not give the same result as

115 // the scalar ones due to rounding. The SIMD functions are activated by

116 // the flag SWS_ACCURATE_RND

117 ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);

118 // There's no point in calling new for the reference function

119 if(ctx->use_mmx_vfilter){

120 call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);

121 if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))

122 fail();

123 if(dstW == LARGEST_INPUT_SIZE)

124 bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);

125 }

126 }

127 av_freep(&src);

128 av_freep(&vFilterData);

129 }

130 }

131 }

132 sws_freeContext(ctx);

133 #undef FILTER_SIZES

134 }

135

136 #undef SRC_PIXELS

137 #define SRC_PIXELS 512

138

139 static void check_hscale(void)

140 {

141 #define MAX_FILTER_WIDTH 40

142 #define FILTER_SIZES 6

143 static const int filter_sizes[FILTER_SIZES] = { 4, 8, 12, 16, 32, 40 };

144

145 #define HSCALE_PAIRS 2

146 static const int hscale_pairs[HSCALE_PAIRS][2] = {

147 { 8, 14 },

148 { 8, 18 },

149 };

150

151 #define LARGEST_INPUT_SIZE 512

152 #define INPUT_SIZES 6

153 static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};

154

155 int i, j, fsi, hpi, width, dstWi;

156 struct SwsContext *ctx;

157

158 // padded

159 LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);

160 LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);

161 LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);

162

163 // padded

164 LOCAL_ALIGNED_32(int16_t, filter, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);

165 LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);

166 LOCAL_ALIGNED_32(int16_t, filterAvx2, [SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH]);

167 LOCAL_ALIGNED_32(int32_t, filterPosAvx, [SRC_PIXELS]);

168

169 // The dst parameter here is either int16_t or int32_t but we use void* to

170 // just cover both cases.

171 declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,

172 const uint8_t *src, const int16_t *filter,

173 const int32_t *filterPos, int filterSize);

174

175 int cpu_flags = av_get_cpu_flags();

176

177 ctx = sws_alloc_context();

178 if (sws_init_context(ctx, NULL, NULL) < 0)

179 fail();

180

181 randomize_buffers(src, SRC_PIXELS + MAX_FILTER_WIDTH - 1);

182

183 for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {

184 for (fsi = 0; fsi < FILTER_SIZES; fsi++) {

185 for (dstWi = 0; dstWi < INPUT_SIZES; dstWi++) {

186 width = filter_sizes[fsi];

187

188 ctx->srcBpc = hscale_pairs[hpi][0];

189 ctx->dstBpc = hscale_pairs[hpi][1];

190 ctx->hLumFilterSize = ctx->hChrFilterSize = width;

191

192 for (i = 0; i < SRC_PIXELS; i++) {

193 filterPos[i] = i;

194 filterPosAvx[i] = i;

195

196 // These filter cofficients are chosen to try break two corner

197 // cases, namely:

198 //

199 // - Negative filter coefficients. The filters output signed

200 // values, and it should be possible to end up with negative

201 // output values.

202 //

203 // - Positive clipping. The hscale filter function has clipping

204 // at (1<<15) - 1

205 //

206 // The coefficients sum to the 1.0 point for the hscale

207 // functions (1 << 14).

208

209 for (j = 0; j < width; j++) {

210 filter[i * width + j] = -((1 << 14) / (width - 1));

211 }

212 filter[i * width + (rnd() % width)] = ((1 << 15) - 1);

213 }

214

215 for (i = 0; i < MAX_FILTER_WIDTH; i++) {

216 // These values should be unused in SIMD implementations but

217 // may still be read, random coefficients here should help show

218 // issues where they are used in error.

219

220 filter[SRC_PIXELS * width + i] = rnd();

221 }

222 ctx->dstW = ctx->chrDstW = input_sizes[dstWi];

223 ff_sws_init_scale(ctx);

224 memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));

225 if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER))

226 ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);

227

228 if (check_func(ctx->hcScale, "hscale_%d_to_%d__fs_%d_dstW_%d", ctx->srcBpc, ctx->dstBpc + 1, width, ctx->dstW)) {

229 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));

230 memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));

231

232 call_ref(NULL, dst0, ctx->dstW, src, filter, filterPos, width);

233 call_new(NULL, dst1, ctx->dstW, src, filterAvx2, filterPosAvx, width);

234 if (memcmp(dst0, dst1, ctx->dstW * sizeof(dst0[0])))

235 fail();

236 bench_new(NULL, dst0, ctx->dstW, src, filter, filterPosAvx, width);

237 }

238 }

239 }

240 }

241 sws_freeContext(ctx);

242 }

243

244 void checkasm_check_sw_scale(void)

245 {

246 check_hscale();

247 report("hscale");

248 check_yuv2yuvX();

249 report("yuv2yuvX");

250 }

FILTER_SIZES

#define FILTER_SIZES

declare_func_emms

#define declare_func_emms(cpu_flags, ret,...)

Definition: checkasm.h:128

SwsContext::dstW

int dstW

Width of destination luma/alpha planes.

Definition: swscale_internal.h:513

mem_internal.h

check_yuv2yuvX

static void check_yuv2yuvX(void)

Definition: sw_scale.c:61

check_func

#define check_func(func,...)

Definition: checkasm.h:122

filter

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter

Definition: filter_design.txt:228

av_get_cpu_flags

int av_get_cpu_flags(void)

Return the flags which specify extensions supported by the CPU.

Definition: cpu.c:101

call_ref

#define call_ref(...)

Definition: checkasm.h:137

cpu_flags

static atomic_int cpu_flags

Definition: cpu.c:52

av_malloc

#define av_malloc(s)

Definition: tableprint_vlc.h:30

fail

#define fail()

Definition: checkasm.h:131

checkasm.h

val

static double val(void *priv, double ch)

Definition: aeval.c:77

check_hscale

static void check_hscale(void)

Definition: sw_scale.c:139

AV_CPU_FLAG_SLOW_GATHER

#define AV_CPU_FLAG_SLOW_GATHER

CPU has slow gathers.

Definition: cpu.h:58

rnd

#define rnd()

Definition: checkasm.h:115

width

#define width

intreadwrite.h

LARGEST_FILTER

#define LARGEST_FILTER

LOCAL_ALIGNED_16

#define LOCAL_ALIGNED_16(t, v,...)

Definition: mem_internal.h:130

ctx

AVFormatContext * ctx

Definition: movenc.c:48

HSCALE_PAIRS

#define HSCALE_PAIRS

SRC_PIXELS

#define SRC_PIXELS

Definition: sw_scale.c:137

call_new

#define call_new(...)

Definition: checkasm.h:209

NULL

#define NULL

Definition: coverity.c:32

LOCAL_ALIGNED_32

#define LOCAL_ALIGNED_32(t, v,...)

Definition: mem_internal.h:136

sws_alloc_context

struct SwsContext * sws_alloc_context(void)

Allocate an empty SwsContext.

Definition: utils.c:1150

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

ff_sws_init_scale

void ff_sws_init_scale(SwsContext *c)

Definition: swscale.c:589

ff_shuffle_filter_coefficients

int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW)

Definition: utils.c:262

AV_CPU_FLAG_AVX2

#define AV_CPU_FLAG_AVX2

AVX2 functions: requires OS support even if YMM registers aren't used.

Definition: cpu.h:52

offset