FFmpeg: libavcodec/x86/mpegaudiodsp.c Source File

FFmpeg

[フレーム]

mpegaudiodsp.c

Go to the documentation of this file.

1 /*

2 * MMX optimized MP3 decoding functions

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 #include "libavutil/attributes.h"

23 #include "libavutil/cpu.h"

24 #include "libavutil/internal.h"

25 #include "libavutil/x86/asm.h"

26 #include "libavutil/x86/cpu.h"

27 #include "libavcodec/mpegaudiodsp.h"

29 #define DECL(CPU)\

30 static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\

31 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);

33 DECL(sse)

34 DECL(sse2)

35 DECL(sse3)

36 DECL(ssse3)

37 DECL(avx)

39 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,

40 float *tmpbuf);

41 void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,

42 float *tmpbuf);

44 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];

46 #if HAVE_SSE2_INLINE

48 #define MACS(rt, ra, rb) rt+=(ra)*(rb)

49 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)

51 #define SUM8(op, sum, w, p) \

52 { \

53 op(sum, (w)[0 * 64], (p)[0 * 64]); \

54 op(sum, (w)[1 * 64], (p)[1 * 64]); \

55 op(sum, (w)[2 * 64], (p)[2 * 64]); \

56 op(sum, (w)[3 * 64], (p)[3 * 64]); \

57 op(sum, (w)[4 * 64], (p)[4 * 64]); \

58 op(sum, (w)[5 * 64], (p)[5 * 64]); \

59 op(sum, (w)[6 * 64], (p)[6 * 64]); \

60 op(sum, (w)[7 * 64], (p)[7 * 64]); \

61 }

63 static void apply_window(const float *buf, const float *win1,

64 const float *win2, float *sum1, float *sum2, int len)

65 {

66 x86_reg count = - 4*len;

67 const float *win1a = win1+len;

68 const float *win2a = win2+len;

69 const float *bufa = buf+len;

70 float *sum1a = sum1+len;

71 float *sum2a = sum2+len;

74 #define MULT(a, b) \

75 "movaps " #a "(%1,%0), %%xmm1 \n\t" \

76 "movaps " #a "(%3,%0), %%xmm2 \n\t" \

77 "mulps %%xmm2, %%xmm1 \n\t" \

78 "subps %%xmm1, %%xmm0 \n\t" \

79 "mulps " #b "(%2,%0), %%xmm2 \n\t" \

80 "subps %%xmm2, %%xmm4 \n\t" \

82 __asm__ volatile(

83 "1: \n\t"

84 "xorps %%xmm0, %%xmm0 \n\t"

85 "xorps %%xmm4, %%xmm4 \n\t"

87 MULT( 0, 0)

88 MULT( 256, 64)

89 MULT( 512, 128)

90 MULT( 768, 192)

91 MULT(1024, 256)

92 MULT(1280, 320)

93 MULT(1536, 384)

94 MULT(1792, 448)

96 "movaps %%xmm0, (%4,%0) \n\t"

97 "movaps %%xmm4, (%5,%0) \n\t"

98 "add 16,ドル %0 \n\t"

99 "jl 1b \n\t"

100 :"+&r"(count)

101 :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)

102 );

103

104 #undef MULT

105 }

106

107 static void apply_window_mp3(float *in, float *win, int *unused, float *out,

108 int incr)

109 {

110 LOCAL_ALIGNED_16(float, suma, [17]);

111 LOCAL_ALIGNED_16(float, sumb, [17]);

112 LOCAL_ALIGNED_16(float, sumc, [17]);

113 LOCAL_ALIGNED_16(float, sumd, [17]);

114

115 float sum;

116

117 /* copy to avoid wrap */

118 __asm__ volatile(

119 "movaps 0(%0), %%xmm0 \n\t" \

120 "movaps 16(%0), %%xmm1 \n\t" \

121 "movaps 32(%0), %%xmm2 \n\t" \

122 "movaps 48(%0), %%xmm3 \n\t" \

123 "movaps %%xmm0, 0(%1) \n\t" \

124 "movaps %%xmm1, 16(%1) \n\t" \

125 "movaps %%xmm2, 32(%1) \n\t" \

126 "movaps %%xmm3, 48(%1) \n\t" \

127 "movaps 64(%0), %%xmm0 \n\t" \

128 "movaps 80(%0), %%xmm1 \n\t" \

129 "movaps 96(%0), %%xmm2 \n\t" \

130 "movaps 112(%0), %%xmm3 \n\t" \

131 "movaps %%xmm0, 64(%1) \n\t" \

132 "movaps %%xmm1, 80(%1) \n\t" \

133 "movaps %%xmm2, 96(%1) \n\t" \

134 "movaps %%xmm3, 112(%1) \n\t"

135 ::"r"(in), "r"(in+512)

136 :"memory"

137 );

138

139 apply_window(in + 16, win , win + 512, suma, sumc, 16);

140 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);

141

142 SUM8(MACS, suma[0], win + 32, in + 48);

143

144 sumc[ 0] = 0;

145 sumb[16] = 0;

146 sumd[16] = 0;

147

148 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \

149 "movups " #sumd "(%4), %%xmm0 \n\t" \

150 "shufps 0ドルx1b, %%xmm0, %%xmm0 \n\t" \

151 "subps " #suma "(%1), %%xmm0 \n\t" \

152 "movaps %%xmm0," #out1 "(%0) \n\t" \

153 \

154 "movups " #sumc "(%3), %%xmm0 \n\t" \

155 "shufps 0ドルx1b, %%xmm0, %%xmm0 \n\t" \

156 "addps " #sumb "(%2), %%xmm0 \n\t" \

157 "movaps %%xmm0," #out2 "(%0) \n\t"

158

159 if (incr == 1) {

160 __asm__ volatile(

161 SUMS( 0, 48, 4, 52, 0, 112)

162 SUMS(16, 32, 20, 36, 16, 96)

163 SUMS(32, 16, 36, 20, 32, 80)

164 SUMS(48, 0, 52, 4, 48, 64)

165

166 :"+&r"(out)

167 :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])

168 :"memory"

169 );

170 out += 16*incr;

171 } else {

172 int j;

173 float *out2 = out + 32 * incr;

174 out[0 ] = -suma[ 0];

175 out += incr;

176 out2 -= incr;

177 for(j=1;j<16;j++) {

178 *out = -suma[ j] + sumd[16-j];

179 *out2 = sumb[16-j] + sumc[ j];

180 out += incr;

181 out2 -= incr;

182 }

183 }

184

185 sum = 0;

186 SUM8(MLSS, sum, win + 16 + 32, in + 32);

187 *out = sum;

188 }

189

190 #endif /* HAVE_SSE2_INLINE */

191

192 #if HAVE_YASM

193 #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \

194 static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \

195 int count, int switch_point, int block_type) \

196 { \

197 int align_end = count - (count & 3); \

198 int j; \

199 for (j = 0; j < align_end; j+= 4) { \

200 LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \

201 float *win = mdct_win_sse[switch_point && j < 4][block_type]; \

202 /* apply window & overlap with previous buffer */ \

203 \

204 /* select window */ \

205 ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \

206 in += 4*18; \

207 buf += 4*18; \

208 out += 4; \

209 } \

210 for (; j < count; j++) { \

211 /* apply window & overlap with previous buffer */ \

212 \

213 /* select window */ \

214 int win_idx = (switch_point && j < 2) ? 0 : block_type; \

215 float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \

216 \

217 ff_imdct36_float_ ## CPU1(out, buf, in, win); \

218 \

219 in += 18; \

220 buf++; \

221 out++; \

222 } \

223 }

224

225 #if HAVE_SSE

226 DECL_IMDCT_BLOCKS(sse,sse)

227 DECL_IMDCT_BLOCKS(sse2,sse)

228 DECL_IMDCT_BLOCKS(sse3,sse)

229 DECL_IMDCT_BLOCKS(ssse3,sse)

230 #endif

231 #if HAVE_AVX_EXTERNAL

232 DECL_IMDCT_BLOCKS(avx,avx)

233 #endif

234 #endif /* HAVE_YASM */

235

236 av_cold void ff_mpadsp_init_x86(MPADSPContext *s)

237 {

238 int mm_flags = av_get_cpu_flags();

239

240 int i, j;

241 for (j = 0; j < 4; j++) {

242 for (i = 0; i < 40; i ++) {

243 mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i];

244 mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i];

245 mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i];

246 mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i];

247 mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i];

248 mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i];

249 mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i];

250 mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i];

251 }

252 }

253

254 #if HAVE_SSE2_INLINE

255 if (mm_flags & AV_CPU_FLAG_SSE2) {

256 s->apply_window_float = apply_window_mp3;

257 }

258 #endif /* HAVE_SSE2_INLINE */

259

260 #if HAVE_YASM

261 if (EXTERNAL_AVX(mm_flags)) {

262 s->imdct36_blocks_float = imdct36_blocks_avx;

263 } else if (EXTERNAL_SSSE3(mm_flags)) {

264 s->imdct36_blocks_float = imdct36_blocks_ssse3;

265 } else if (EXTERNAL_SSE3(mm_flags)) {

266 s->imdct36_blocks_float = imdct36_blocks_sse3;

267 } else if (EXTERNAL_SSE2(mm_flags)) {

268 s->imdct36_blocks_float = imdct36_blocks_sse2;

269 } else if (EXTERNAL_SSE(mm_flags)) {

270 s->imdct36_blocks_float = imdct36_blocks_sse;

271 }

272 #endif /* HAVE_YASM */

273 }

Generated on Wed Jul 10 2013 23:48:02 for FFmpeg by doxygen 1.8.2