FFmpeg: libavcodec/psymodel.h Source File

FFmpeg

[フレーム]

libavcodec

psymodel.h

Go to the documentation of this file.

1 /*

2 * audio encoder psychoacoustic model

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 #ifndef AVCODEC_PSYMODEL_H

23 #define AVCODEC_PSYMODEL_H

25 #include "avcodec.h"

27 /** maximum possible number of bands */

28 #define PSY_MAX_BANDS 128

29 /** maximum number of channels */

30 #define PSY_MAX_CHANS 20

32 /* cutoff for VBR is purposely increased, since LP filtering actually

33 * hinders VBR performance rather than the opposite

34 */

35 #define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \

36 FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \

37 3000 + bit_rate/channels/4, \

38 12000 + bit_rate/channels/16), \

39 22000, \

40 sample_rate / 2): (sample_rate / 2))

41 #define AAC_CUTOFF(s) ( \

42 (s->flags & AV_CODEC_FLAG_QSCALE) \

43 ? s->sample_rate / 2 \

44 : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->ch_layout.nb_channels, s->sample_rate) \

45 )

47 /**

48 * single band psychoacoustic information

49 */

50 typedef struct FFPsyBand {

51 int bits;

52 float energy;

53 float threshold;

54 float spread; /* Energy spread over the band */

55 } FFPsyBand;

57 /**

58 * single channel psychoacoustic information

59 */

60 typedef struct FFPsyChannel {

61 FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information

62 float entropy; ///< total PE for this channel

63 } FFPsyChannel;

65 /**

66 * psychoacoustic information for an arbitrary group of channels

67 */

68 typedef struct FFPsyChannelGroup {

69 FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group

70 uint8_t num_ch; ///< number of channels in this group

71 uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group

72 } FFPsyChannelGroup;

74 /**

75 * windowing related information

76 */

77 typedef struct FFPsyWindowInfo {

78 int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next

79 int window_shape; ///< window shape (sine/KBD/whatever)

80 int num_windows; ///< number of windows in a frame

81 int grouping[8]; ///< window grouping (for e.g. AAC)

82 float clipping[8]; ///< maximum absolute normalized intensity in the given window for clip avoidance

83 int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)

84 } FFPsyWindowInfo;

86 /**

87 * context used by psychoacoustic model

88 */

89 typedef struct FFPsyContext {

90 AVCodecContext *avctx; ///< encoder context

91 const struct FFPsyModel *model; ///< encoder-specific model functions

93 FFPsyChannel *ch; ///< single channel information

94 FFPsyChannelGroup *group; ///< channel group information

95 int num_groups; ///< number of channel groups

96 int cutoff; ///< lowpass frequency cutoff for analysis

98 uint8_t **bands; ///< scalefactor band sizes for possible frame sizes

99 int *num_bands; ///< number of scalefactor bands for possible frame sizes

100 int num_lens; ///< number of scalefactor band sets

101

102 struct {

103 int size; ///< size of the bitresevoir in bits

104 int bits; ///< number of bits used in the bitresevoir

105 int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done

106 } bitres;

107

108 void* model_priv_data; ///< psychoacoustic model implementation private data

109 } FFPsyContext;

110

111 /**

112 * codec-specific psychoacoustic model implementation

113 */

114 typedef struct FFPsyModel {

115 const char *name;

116 int (*init) (FFPsyContext *apc);

117

118 /**

119 * Suggest window sequence for channel.

120 *

121 * @param ctx model context

122 * @param audio samples for the current frame

123 * @param la lookahead samples (NULL when unavailable)

124 * @param channel number of channel element to analyze

125 * @param prev_type previous window type

126 *

127 * @return suggested window information in a structure

128 */

129 FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type);

130

131 /**

132 * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.

133 *

134 * @param ctx model context

135 * @param channel channel number of the first channel in the group to perform analysis on

136 * @param coeffs array of pointers to the transformed coefficients

137 * @param wi window information for the channels in the group

138 */

139 void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);

140

141 void (*end) (FFPsyContext *apc);

142 } FFPsyModel;

143

144 /**

145 * Initialize psychoacoustic model.

146 *

147 * @param ctx model context

148 * @param avctx codec context

149 * @param num_lens number of possible frame lengths

150 * @param bands scalefactor band lengths for all frame lengths

151 * @param num_bands number of scalefactor bands for all frame lengths

152 * @param num_groups number of channel groups

153 * @param group_map array with # of channels in group - 1, for each group

154 *

155 * @return zero if successful, a negative value if not

156 */

157 int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,

158 const uint8_t **bands, const int *num_bands,

159 int num_groups, const uint8_t *group_map);

160

161 /**

162 * Determine what group a channel belongs to.

163 *

164 * @param ctx psymodel context

165 * @param channel channel to locate the group for

166 *

167 * @return pointer to the FFPsyChannelGroup this channel belongs to

168 */

169 FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel);

170

171 /**

172 * Cleanup model context at the end.

173 *

174 * @param ctx model context

175 */

176 void ff_psy_end(FFPsyContext *ctx);

177

178

179 /**************************************************************************

180 * Audio preprocessing stuff. *

181 * This should be moved into some audio filter eventually. *

182 **************************************************************************/

183 struct FFPsyPreprocessContext;

184

185 /**

186 * psychoacoustic model audio preprocessing initialization

187 */

188 struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx);

189

190 /**

191 * Preprocess several channel in audio frame in order to compress it better.

192 *

193 * @param ctx preprocessing context

194 * @param audio samples to be filtered (in place)

195 * @param channels number of channel to preprocess

196 */

197 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels);

198

199 /**

200 * Cleanup audio preprocessing module.

201 */

202 void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);

203

204 #endif /* AVCODEC_PSYMODEL_H */

ff_psy_preprocess_end

void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)

Cleanup audio preprocessing module.

Definition: psymodel.c:152

FFPsyContext::group

FFPsyChannelGroup * group

channel group information

Definition: psymodel.h:94

FFPsyContext::bitres

struct FFPsyContext::@111 bitres

FFPsyChannel

single channel psychoacoustic information

Definition: psymodel.h:60

FFPsyModel::name

const char * name

Definition: psymodel.h:115

FFPsyContext::bits

int bits

number of bits used in the bitresevoir

Definition: psymodel.h:104

FFPsyContext::size

int size

size of the bitresevoir in bits

Definition: psymodel.h:103

FFPsyContext::num_groups

int num_groups

number of channel groups

Definition: psymodel.h:95

ff_psy_end

void ff_psy_end(FFPsyContext *ctx)

Cleanup model context at the end.

Definition: psymodel.c:83

FFPsyWindowInfo::window_shape

int window_shape

window shape (sine/KBD/whatever)

Definition: psymodel.h:79

ff_psy_preprocess

void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)

Preprocess several channel in audio frame in order to compress it better.

Definition: psymodel.c:139

FFPsyChannelGroup::ch

FFPsyChannel * ch[PSY_MAX_CHANS]

pointers to the individual channels in the group

Definition: psymodel.h:69

ff_psy_preprocess_init

struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)

psychoacoustic model audio preprocessing initialization

Definition: psymodel.c:103

FFPsyModel::init

int(* init)(FFPsyContext *apc)

Definition: psymodel.h:116

FFPsyContext::num_bands

int * num_bands

number of scalefactor bands for possible frame sizes

Definition: psymodel.h:99

FFPsyWindowInfo

windowing related information

Definition: psymodel.h:77

FFPsyChannelGroup::num_ch

uint8_t num_ch

number of channels in this group

Definition: psymodel.h:70

ctx

AVFormatContext * ctx

Definition: movenc.c:48

channels

Definition: aptx.h:32

bands

static const float bands[]

Definition: af_superequalizer.c:56

FFPsyWindowInfo::window_type

int window_type[3]

window type (short/long/transitional, etc.) - current, previous and next

Definition: psymodel.h:78

FFPsyContext::num_lens

int num_lens

number of scalefactor band sets

Definition: psymodel.h:100

FFPsyBand::bits

int bits

Definition: psymodel.h:51

FFPsyBand

single band psychoacoustic information

Definition: psymodel.h:50

FFPsyContext::bands

uint8_t ** bands

scalefactor band sizes for possible frame sizes

Definition: psymodel.h:98

FFPsyWindowInfo::grouping

int grouping[8]

window grouping (for e.g. AAC)

Definition: psymodel.h:81

FFPsyPreprocessContext::avctx

AVCodecContext * avctx

Definition: psymodel.c:94

PSY_MAX_BANDS

#define PSY_MAX_BANDS

maximum possible number of bands

Definition: psymodel.h:28

FFPsyWindowInfo::clipping

float clipping[8]

maximum absolute normalized intensity in the given window for clip avoidance

Definition: psymodel.h:82

FFPsyChannelGroup::coupling

uint8_t coupling[PSY_MAX_BANDS]

allow coupling for this band in the group

Definition: psymodel.h:71

FFPsyChannel::psy_bands

FFPsyBand psy_bands[PSY_MAX_BANDS]

channel bands information

Definition: psymodel.h:61

FFPsyPreprocessContext

Definition: psymodel.c:93

FFPsyModel::window

FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)

Suggest window sequence for channel.

Definition: psymodel.h:129

ff_psy_find_group

FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)

Determine what group a channel belongs to.

Definition: psymodel.c:73

FFPsyBand::threshold

float threshold

Definition: psymodel.h:53

FFPsyContext::ch

FFPsyChannel * ch

single channel information

Definition: psymodel.h:93

PSY_MAX_CHANS

#define PSY_MAX_CHANS

maximum number of channels

Definition: psymodel.h:30

FFPsyWindowInfo::window_sizes

int * window_sizes

sequence of window sizes inside one frame (for eg. WMA)

Definition: psymodel.h:83

ff_psy_init

int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)

Initialize psychoacoustic model.

Definition: psymodel.c:31

FFPsyContext::alloc

int alloc

number of bits allocated by the psy, or -1 if no allocation was done

Definition: psymodel.h:105

avcodec.h

FFPsyModel::end

void(* end)(FFPsyContext *apc)

Definition: psymodel.h:141

FFPsyChannelGroup

psychoacoustic information for an arbitrary group of channels

Definition: psymodel.h:68

FFPsyModel::analyze

void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)

Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.

Definition: psymodel.h:139

FFPsyContext::model_priv_data

void * model_priv_data

psychoacoustic model implementation private data

Definition: psymodel.h:108

FFPsyBand::energy

float energy

Definition: psymodel.h:52

AVCodecContext

main external API structure.

Definition: avcodec.h:389

FFPsyContext::avctx

AVCodecContext * avctx

encoder context

Definition: psymodel.h:90

FFPsyModel

codec-specific psychoacoustic model implementation

Definition: psymodel.h:114

FFPsyContext::cutoff

int cutoff

lowpass frequency cutoff for analysis

Definition: psymodel.h:96

FFPsyContext::model

const struct FFPsyModel * model

encoder-specific model functions

Definition: psymodel.h:91

FFPsyChannel::entropy

float entropy

total PE for this channel

Definition: psymodel.h:62

FFPsyBand::spread

float spread

Definition: psymodel.h:54

int

Definition: ffmpeg_filter.c:153

FFPsyContext

context used by psychoacoustic model

Definition: psymodel.h:89

channel

Definition: ebur128.h:39

FFPsyWindowInfo::num_windows

int num_windows

number of windows in a frame

Definition: psymodel.h:80

Generated on Wed Aug 24 2022 21:41:45 for FFmpeg by doxygen 1.8.17