1 /*
2 * DSP utils
3 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 /**
24 * @file
25 * DSP utils.
26 * note, many functions in here may use MMX which trashes the FPU state, it is
27 * absolutely necessary to call emms_c() between dsp & float/double code
28 */
29
30 #ifndef AVCODEC_DSPUTIL_H
31 #define AVCODEC_DSPUTIL_H
32
36
37 /* encoding scans */
42
43 /* pixel operations */
44 #define MAX_NEG_CROP 1024
45
46 /* temporary */
49
54
55 /* RV40 functions */
60
63
64 /* minimum alignment rules ;)
65 If you notice errors in the align stuff, need more alignment for some ASM code
66 for some CPU or need to use a function with less aligned data then send a mail
67 to the ffmpeg-devel mailing list, ...
68
69 !warning These alignments might not match reality, (missing attribute((align))
70 stuff somewhere possible).
71 I (Michael) did not check them, these are just the alignments which I think
72 could be reached easily ...
73
74 !future video codecs might need functions with less strict alignment
75 */
76
77 /* add and put pixel (decoding) */
78 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
79 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4
82
84
85 #define DEF_OLD_QPEL(name)\
86 void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
87 void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
88 void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);
89
102
103 /* motion estimation */
104 // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller than 2
105 // although currently h<4 is not used as functions with width <8 are neither used nor implemented
106 typedef
int (*
me_cmp_func)(
void /*MpegEncContext*/ *
s,
uint8_t *blk1
/*align width (8 or 16)*/,
uint8_t *blk2
/*align 1*/,
int line_size,
int h)
/* __attribute__ ((const))*/;
107
108 /**
109 * Scantable.
110 */
116
119 int idct_permutation_type);
120
121 /**
122 * DSPContext.
123 */
125 /* pixel ops : interface with DCT */
133 /**
134 * translational global motion compensation.
135 */
137 /**
138 * global motion compensation.
139 */
146 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
147
162
169
172
173 /**
174 * Thirdpel motion compensation with rounding (a+b+1)>>1.
175 * this is an array[12] of motion compensation functions for the 9 thirdpe
176 * positions<br>
177 * *pixels_tab[ xthirdpel + 4*ythirdpel ]
178 * @param block destination where the result is stored
179 * @param pixels source
180 * @param line_size number of bytes in a horizontal line of block
181 * @param h height
182 */
185
190
192
193 /* huffyuv specific */
196 /**
197 * subtract huffyuv's variant of median prediction
198 * note, this might read from src1[-1], src2[-1]
199 */
204 /* this might write to dst[w] */
207
210
211 /* assume len is a multiple of 8, and arrays are 16-byte aligned */
213
214 /* (I)DCT */
217
218 /* IDCT really*/
220
221 /**
222 * block -> idct -> clip to unsigned 8 bit -> dest.
223 * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
224 * @param line_size size in bytes of a horizontal line of dest
225 */
227
228 /**
229 * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
230 * @param line_size size in bytes of a horizontal line of dest
231 */
233
234 /**
235 * idct input permutation.
236 * several optimized IDCTs need a permutated input (relative to the normal order of the reference
237 * IDCT)
238 * this permutation must be performed before the idct_put/add, note, normally this can be merged
239 * with the zigzag/alternate scan<br>
240 * an example to avoid confusion:
241 * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
242 * - (x -> reference dct -> reference idct -> x)
243 * - (x -> reference dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
244 * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
245 */
248 #define FF_NO_IDCT_PERM 1
249 #define FF_LIBMPEG2_IDCT_PERM 2
250 #define FF_SIMPLE_IDCT_PERM 3
251 #define FF_TRANSPOSE_IDCT_PERM 4
252 #define FF_PARTTRANS_IDCT_PERM 5
253 #define FF_SSE2_IDCT_PERM 6
254
257 #define BASIS_SHIFT 16
258 #define RECON_SHIFT 6
259
261 #define EDGE_WIDTH 16
263 #define EDGE_BOTTOM 2
264
266
267 /**
268 * Calculate scalar product of two vectors.
269 * @param len length of vectors, should be multiple of 16
270 */
272 /* ape functions */
273 /**
274 * Calculate scalar product of v1 and v2,
275 * and v1[i] += v3[i] * mul
276 * @param len length of vectors, should be multiple of 16
277 */
279
280 /**
281 * Apply symmetric window in 16-bit fixed-point.
282 * @param output destination array
283 * constraints: 16-byte aligned
284 * @param input source array
285 * constraints: 16-byte aligned
286 * @param window window array
287 * constraints: 16-byte aligned, at least len/2 elements
288 * @param len full window length
289 * constraints: multiple of ? greater than zero
290 */
292 const int16_t *window,
unsigned int len);
293
294 /**
295 * Clip each element in an array of int32_t to a given minimum and maximum value.
296 * @param dst destination array
297 * constraints: 16-byte aligned
298 * @param src source array
299 * constraints: 16-byte aligned
300 * @param min minimum value
301 * constraints: must be in the range [-(1 << 24), 1 << 24]
302 * @param max maximum value
303 * constraints: must be in the range [-(1 << 24), 1 << 24]
304 * @param len number of elements in the array
305 * constraints: multiple of 32 greater than zero
306 */
309
312
317
319
321
329
331
332 #endif /* AVCODEC_DSPUTIL_H */