1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
24
25 #include "config.h"
26
50
53
55
68
70
71 #define DECL_INIT_FN(basis, interleave) \
72 static av_cold int b ##basis## _i ##interleave(AVTXContext *s, \
73 const FFTXCodelet *cd, \
74 uint64_t flags, \
75 FFTXCodeletOptions *opts, \
76 int len, int inv, \
77 const void *scale) \
78 { \
79 ff_tx_init_tabs_float(len); \
80 if (cd->max_len == 2) \
81 return ff_tx_gen_ptwo_revtab(s, opts); \
82 else \
83 return ff_tx_gen_split_radix_parity_revtab(s, len, inv, opts, \
84 basis, interleave); \
85 }
86
89
93 {
95
96 /* The transformations below are performed in the gather domain,
97 * so override the option and let the infrastructure convert the map
98 * to SCATTER if needed. */
100
102
105 else
107
110
112 int cnt = 0,
tmp[15];
113
114 /* Special permutation to simplify loads in the pre-permuted version */
115 memcpy(
tmp,
s->map, 15*
sizeof(*
tmp));
116 for (
int i = 1;
i < 15;
i += 3) {
117 s->map[cnt] =
tmp[
i];
118 cnt++;
119 }
120 for (
int i = 2;
i < 15;
i += 3) {
121 s->map[cnt] =
tmp[
i];
122 cnt++;
123 }
124 for (
int i = 0;
i < 15;
i += 3) {
125 s->map[cnt] =
tmp[
i];
126 cnt++;
127 }
128 memmove(&
s->map[7], &
s->map[6], 4*
sizeof(
int));
129 memmove(&
s->map[3], &
s->map[1], 4*
sizeof(
int));
132 }
133
134 return 0;
135 }
136
139 int len,
int inv,
const void *
scale)
140 {
143
144 s->scale_d = *((SCALE_TYPE *)
scale);
145 s->scale_f =
s->scale_d;
146
151
155
159
160 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
161 /* Invert lookup table for unstrided path */
162 for (
int i = 0;
i < (
len >> 1);
i++)
163 s->map[(
len >> 1) +
s->map[
i]] =
i;
164
167
168 return 0;
169 }
170
177 {
181
186
188 sub_len, inv,
scale)))
190
193
196
197 /* Our 15-point transform is also a compound one, so embed its input map */
199
200 /* Special permutation to simplify loads in the pre-permuted version */
201 for (
int k = 0; k <
s->sub[0].len; k++) {
202 int cnt = 0;
203 memcpy(
tmp, &
s->map[k*15], 15*
sizeof(*
tmp));
204 for (
int i = 1;
i < 15;
i += 3) {
205 s->map[k*15 + cnt] =
tmp[
i];
206 cnt++;
207 }
208 for (
int i = 2;
i < 15;
i += 3) {
209 s->map[k*15 + cnt] =
tmp[
i];
210 cnt++;
211 }
212 for (
int i = 0;
i < 15;
i += 3) {
213 s->map[k*15 + cnt] =
tmp[
i];
214 cnt++;
215 }
216 memmove(&
s->map[k*15 + 7], &
s->map[k*15 + 6], 4*
sizeof(
int));
217 memmove(&
s->map[k*15 + 3], &
s->map[k*15 + 1], 4*
sizeof(
int));
218 s->map[k*15 + 1] =
tmp[2];
219 s->map[k*15 + 2] =
tmp[0];
220 }
221 }
222
225
227
228 return 0;
229 }
230
232 TX_DEF(fft2, FFT, 2, 2, 2, 0, 128,
NULL, sse3, SSE3,
AV_TX_INPLACE, 0),
233 TX_DEF(fft2_asm, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
235 TX_DEF(fft2, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
236 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_FORWARD_ONLY, 0),
237 TX_DEF(fft4_fwd_asm, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
239 TX_DEF(fft4_inv_asm, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
241 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
242 TX_DEF(fft4_inv, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_INVERSE_ONLY, 0),
243 TX_DEF(fft8, FFT, 8, 8, 2, 0, 128, b8_i0, sse3, SSE3,
AV_TX_INPLACE, 0),
244 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
246 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
247 TX_DEF(fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
248 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
250 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
252 TX_DEF(fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
253 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
255 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
257 TX_DEF(fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
258 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
260 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
262
263 #if ARCH_X86_64
264 TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
265 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
267 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
269 TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
270 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
272 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
274 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX, 0,
AV_CPU_FLAG_AVXSLOW),
275 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
277 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
279 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3, FMA3, 0,
AV_CPU_FLAG_AVXSLOW),
280 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3,
282 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
284
285 #if HAVE_AVX2_EXTERNAL
286 TX_DEF(
fft15, FFT, 15, 15, 15, 0, 320,
factor_init, avx2, AVX2,
288 TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384,
factor_init, avx2, AVX2,
290
291 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0,
293 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
295 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
297
298 TX_DEF(fft_pfa_15xM, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 320,
fft_pfa_init, avx2, AVX2,
300 TX_DEF(fft_pfa_15xM_asm, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
302 TX_DEF(fft_pfa_15xM_ns, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
304
305 TX_DEF(mdct_inv, MDCT, 16,
TX_LEN_UNLIMITED, 2,
TX_FACTOR_ANY, 384,
m_inv_init, avx2, AVX2,
307 #endif
308 #endif
309
311 };