FFmpeg: libavcodec/x86/mlpdsp_init.c Source File

FFmpeg

[フレーム]

mlpdsp_init.c

Go to the documentation of this file.

1 /*

2 * MLP DSP functions x86-optimized

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 #include <stdint.h>

23 #include "config.h"

24 #include "libavutil/attributes.h"

25 #include "libavutil/cpu.h"

26 #include "libavutil/macros.h"

27 #include "libavutil/x86/asm.h"

28 #include "libavutil/x86/cpu.h"

29 #include "libavcodec/mlpdsp.h"

30 #include "libavcodec/mlp.h"

32 #define REMATRIX_CHANNEL_FUNC(opt) \

33 void ff_mlp_rematrix_channel_##opt(int32_t *samples, \

34 const int32_t *coeffs, \

35 const uint8_t *bypassed_lsbs, \

36 const int8_t *noise_buffer, \

37 int index, \

38 unsigned int dest_ch, \

39 uint16_t blockpos, \

40 unsigned int maxchan, \

41 int matrix_noise_shift, \

42 int access_unit_size_pow2, \

43 int32_t mask);

45 REMATRIX_CHANNEL_FUNC(sse4)

46 REMATRIX_CHANNEL_FUNC(avx2_bmi2)

48 #if HAVE_7REGS && HAVE_INLINE_ASM && HAVE_INLINE_ASM_NONLOCAL_LABELS

50 extern char ff_mlp_firorder_8;

51 extern char ff_mlp_firorder_7;

52 extern char ff_mlp_firorder_6;

53 extern char ff_mlp_firorder_5;

54 extern char ff_mlp_firorder_4;

55 extern char ff_mlp_firorder_3;

56 extern char ff_mlp_firorder_2;

57 extern char ff_mlp_firorder_1;

58 extern char ff_mlp_firorder_0;

60 extern char ff_mlp_iirorder_4;

61 extern char ff_mlp_iirorder_3;

62 extern char ff_mlp_iirorder_2;

63 extern char ff_mlp_iirorder_1;

64 extern char ff_mlp_iirorder_0;

66 static const void * const firtable[9] = { &ff_mlp_firorder_0, &ff_mlp_firorder_1,

67 &ff_mlp_firorder_2, &ff_mlp_firorder_3,

68 &ff_mlp_firorder_4, &ff_mlp_firorder_5,

69 &ff_mlp_firorder_6, &ff_mlp_firorder_7,

70 &ff_mlp_firorder_8 };

71 static const void * const iirtable[5] = { &ff_mlp_iirorder_0, &ff_mlp_iirorder_1,

72 &ff_mlp_iirorder_2, &ff_mlp_iirorder_3,

73 &ff_mlp_iirorder_4 };

75 #if ARCH_X86_64

77 #define MLPMUL(label, offset, offs, offc) \

78 LABEL_MANGLE(label)": \n\t" \

79 "movslq "offset"+"offs"(%0), %%rax\n\t" \

80 "movslq "offset"+"offc"(%1), %%rdx\n\t" \

81 "imul %%rdx, %%rax\n\t" \

82 "add %%rax, %%rsi\n\t"

84 #define FIRMULREG(label, offset, firc)\

85 LABEL_MANGLE(label)": \n\t" \

86 "movslq "#offset"(%0), %%rax\n\t" \

87 "imul %"#firc", %%rax\n\t" \

88 "add %%rax, %%rsi\n\t"

90 #define CLEAR_ACCUM \

91 "xor %%rsi, %%rsi\n\t"

93 #define SHIFT_ACCUM \

94 "shr %%cl, %%rsi\n\t"

96 #define ACCUM "%%rdx"

97 #define RESULT "%%rsi"

98 #define RESULT32 "%%esi"

100 #else /* if ARCH_X86_32 */

101

102 #define MLPMUL(label, offset, offs, offc) \

103 LABEL_MANGLE(label)": \n\t" \

104 "mov "offset"+"offs"(%0), %%eax\n\t" \

105 "imull "offset"+"offc"(%1) \n\t" \

106 "add %%eax , %%esi\n\t" \

107 "adc %%edx , %%ecx\n\t"

108

109 #define FIRMULREG(label, offset, firc) \

110 MLPMUL(label, #offset, "0", "0")

111

112 #define CLEAR_ACCUM \

113 "xor %%esi, %%esi\n\t" \

114 "xor %%ecx, %%ecx\n\t"

115

116 #define SHIFT_ACCUM \

117 "mov %%ecx, %%edx\n\t" \

118 "mov %%esi, %%eax\n\t" \

119 "movzbl %7 , %%ecx\n\t" \

120 "shrd %%cl, %%edx, %%eax\n\t" \

121

122 #define ACCUM "%%edx"

123 #define RESULT "%%eax"

124 #define RESULT32 "%%eax"

125

126 #endif /* !ARCH_X86_64 */

127

128 #define BINC AV_STRINGIFY(4* MAX_CHANNELS)

129 #define IOFFS AV_STRINGIFY(4*(MAX_FIR_ORDER + MAX_BLOCKSIZE))

130 #define IOFFC AV_STRINGIFY(4* MAX_FIR_ORDER)

131

132 #define FIRMUL(label, offset) MLPMUL(label, #offset, "0", "0")

133 #define IIRMUL(label, offset) MLPMUL(label, #offset, IOFFS, IOFFC)

134

135 static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff,

136 int firorder, int iirorder,

137 unsigned int filter_shift, int32_t mask,

138 int blocksize, int32_t *sample_buffer)

139 {

140 const void *firjump = firtable[firorder];

141 const void *iirjump = iirtable[iirorder];

142

143 blocksize = -blocksize;

144

145 __asm__ volatile(

146 "1: \n\t"

147 CLEAR_ACCUM

148 "jmp *%5 \n\t"

149 FIRMUL (ff_mlp_firorder_8, 0x1c )

150 FIRMUL (ff_mlp_firorder_7, 0x18 )

151 FIRMUL (ff_mlp_firorder_6, 0x14 )

152 FIRMUL (ff_mlp_firorder_5, 0x10 )

153 FIRMUL (ff_mlp_firorder_4, 0x0c )

154 FIRMUL (ff_mlp_firorder_3, 0x08 )

155 FIRMUL (ff_mlp_firorder_2, 0x04 )

156 FIRMULREG(ff_mlp_firorder_1, 0x00, 8)

157 LABEL_MANGLE(ff_mlp_firorder_0)":\n\t"

158 "jmp *%6 \n\t"

159 IIRMUL (ff_mlp_iirorder_4, 0x0c )

160 IIRMUL (ff_mlp_iirorder_3, 0x08 )

161 IIRMUL (ff_mlp_iirorder_2, 0x04 )

162 IIRMUL (ff_mlp_iirorder_1, 0x00 )

163 LABEL_MANGLE(ff_mlp_iirorder_0)":\n\t"

164 SHIFT_ACCUM

165 "mov "RESULT" ,"ACCUM" \n\t"

166 "add (%2) ,"RESULT" \n\t"

167 "and %4 ,"RESULT" \n\t"

168 "sub 4ドル , %0 \n\t"

169 "mov "RESULT32", (%0) \n\t"

170 "mov "RESULT32", (%2) \n\t"

171 "add $"BINC" , %2 \n\t"

172 "sub "ACCUM" ,"RESULT" \n\t"

173 "mov "RESULT32","IOFFS"(%0) \n\t"

174 "incl %3 \n\t"

175 "js 1b \n\t"

176 : /* 0*/"+r"(state),

177 /* 1*/"+r"(coeff),

178 /* 2*/"+r"(sample_buffer),

179 #if ARCH_X86_64

180 /* 3*/"+r"(blocksize)

181 : /* 4*/"r"((x86_reg)mask), /* 5*/"r"(firjump),

182 /* 6*/"r"(iirjump) , /* 7*/"c"(filter_shift)

183 , /* 8*/"r"((int64_t)coeff[0])

184 : "rax", "rdx", "rsi"

185 #else /* ARCH_X86_32 */

186 /* 3*/"+m"(blocksize)

187 : /* 4*/"m"( mask), /* 5*/"m"(firjump),

188 /* 6*/"m"(iirjump) , /* 7*/"m"(filter_shift)

189 : "eax", "edx", "esi", "ecx"

190 #endif /* !ARCH_X86_64 */

191 );

192 }

193

194 #endif /* HAVE_7REGS && HAVE_INLINE_ASM */

195

196 av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c)

197 {

198 int cpu_flags = av_get_cpu_flags();

199 #if HAVE_7REGS && HAVE_INLINE_ASM && HAVE_INLINE_ASM_NONLOCAL_LABELS

200 if (INLINE_MMX(cpu_flags))

201 c->mlp_filter_channel = mlp_filter_channel_x86;

202 #endif

203 if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags))

204 c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4;

205 if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2)

206 c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2;

207 }

INLINE_MMX

#define INLINE_MMX(flags)

Definition: cpu.h:86

cpu.h

const char * r

Definition: vf_curves.c:116

sub

static float sub(float src0, float src1)

Definition: dnn_backend_native_layer_mathbinary.c:31

LABEL_MANGLE

#define LABEL_MANGLE(a)

Definition: asm.h:103

EXTERNAL_AVX2_FAST

#define EXTERNAL_AVX2_FAST(flags)

Definition: cpu.h:79

#define b

Definition: input.c:40

av_get_cpu_flags

int av_get_cpu_flags(void)

Return the flags which specify extensions supported by the CPU.

Definition: cpu.c:98

cpu_flags

static atomic_int cpu_flags

Definition: cpu.c:50

REMATRIX_CHANNEL_FUNC

#define REMATRIX_CHANNEL_FUNC(opt)

Definition: mlpdsp_init.c:32

macros.h

av_cold

#define av_cold

Definition: attributes.h:90

mask

static const uint16_t mask[17]

Definition: lzw.c:38

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

state

static struct @320 state

av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c)

Definition: mlpdsp_init.c:196

__asm__

__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")

ACCUM

#define ACCUM(k, x, d)

EXTERNAL_SSE4

#define EXTERNAL_SSE4(flags)

Definition: cpu.h:68

add

static float add(float src0, float src1)

Definition: dnn_backend_native_layer_mathbinary.c:35

AV_CPU_FLAG_BMI2

#define AV_CPU_FLAG_BMI2

Bit Manipulation Instruction Set 2.

Definition: cpu.h:55