Go to the documentation of this file. 1 /*
2 * Copyright (C) 2004 The FFmpeg project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * Standard C DSP-oriented functions cribbed from the original VP3
24 * source code.
25 */
26
27 #include <string.h>
28
29 #include "config.h"
35
38
39 #define IdctAdjustBeforeShift 8
47
48 #define M(a, b) ((int)((SUINT)(a) * (b)) >> 16)
49
52 {
54
55 int A,
B,
C,
D, Ad, Bd, Cd, Dd,
E,
F,
G,
H;
56 int Ed, Gd, Add, Bdd, Fd, Hd;
57
59
60 /* Inverse DCT on the rows now */
61 for (
i = 0;
i < 8;
i++) {
62 /* Check for non-zero values */
63 if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
64 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
69
72
75
76 E =
M(
xC4S4, (ip[0 * 8] + ip[4 * 8]));
77 F =
M(
xC4S4, (ip[0 * 8] - ip[4 * 8]));
78
81
84
87
90
91 /* Final sequence of operations over-write original inputs. */
92 ip[0 * 8] = Gd + Cd;
93 ip[7 * 8] = Gd - Cd;
94
95 ip[1 * 8] = Add + Hd;
96 ip[2 * 8] = Add - Hd;
97
98 ip[3 * 8] = Ed + Dd;
99 ip[4 * 8] = Ed - Dd;
100
101 ip[5 * 8] = Fd + Bdd;
102 ip[6 * 8] = Fd - Bdd;
103 }
104
105 ip += 1; /* next row */
106 }
107
109
110 for (
i = 0;
i < 8;
i++) {
111 /* Check for non-zero values (bitwise or faster than ||) */
112 if (ip[1] | ip[2] | ip[3] |
113 ip[4] | ip[5] | ip[6] | ip[7]) {
118
121
124
125 E =
M(
xC4S4, (ip[0] + ip[4])) + 8;
126 F =
M(
xC4S4, (ip[0] - ip[4])) + 8;
127
128 if (
type == 1) {
// HACK
131 }
132
135
138
141
144
145 /* Final sequence of operations over-write original inputs. */
149
152
155
158 } else {
161
164
167
170 }
171 } else {
181 } else {
182 if (ip[0]) {
183 int v = (
xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
192 }
193 }
194 }
195
196 ip += 8; /* next column */
197 dst++;
198 }
199 }
200
203 {
205
206 int A,
B,
C,
D, Ad, Bd, Cd, Dd,
E,
F,
G,
H;
207 int Ed, Gd, Add, Bdd, Fd, Hd;
208
210
211 /* Inverse DCT on the rows now */
212 for (
i = 0;
i < 4;
i++) {
213 /* Check for non-zero values */
214 if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) {
219
222
225
228
231
234
237
240
241 /* Final sequence of operations over-write original inputs */
242 ip[0 * 8] = Gd + Cd;
243 ip[7 * 8] = Gd - Cd;
244
245 ip[1 * 8] = Add + Hd;
246 ip[2 * 8] = Add - Hd;
247
248 ip[3 * 8] = Ed + Dd;
249 ip[4 * 8] = Ed - Dd;
250
251 ip[5 * 8] = Fd + Bdd;
252 ip[6 * 8] = Fd - Bdd;
253
254 }
255
256 ip += 1;
257 }
258
260
261 for (
i = 0;
i < 8;
i++) {
262 /* Check for non-zero values (bitwise or faster than ||) */
263 if (ip[0] | ip[1] | ip[2] | ip[3]) {
268
271
274
279
282
285
288
291
292 Gd += 8;
293 Add += 8;
294 Ed += 8;
295 Fd += 8;
296
297 /* Final sequence of operations over-write original inputs. */
301
304
307
310 } else {
313
316
319
322 }
323 } else {
333 }
334 }
335
336 ip += 8;
337 dst++;
338 }
339 }
340
342 {
345 }
346
348 {
351 }
352
354 int16_t *
block /* align 16 */)
355 {
358 }
359
361 int16_t *
block /* align 16 */)
362 {
365 }
366
368 int16_t *
block /* align 16 */)
369 {
371
372 for (
i = 0;
i < 8;
i++) {
382 }
384 }
385
387 int *bounding_values, int count)
388 {
389 unsigned char *end;
391 const ptrdiff_t nstride = -
stride;
392
393 for (end = first_pixel + count; first_pixel < end; first_pixel++) {
395 (first_pixel[0] - first_pixel[nstride]) * 3;
397
400 }
401 }
402
404 int *bounding_values, int count)
405 {
406 unsigned char *end;
408
409 for (end = first_pixel + count *
stride; first_pixel != end; first_pixel +=
stride) {
411 (first_pixel[ 0] - first_pixel[-1]) * 3;
413
416 }
417 }
418
419 #define LOOP_FILTER(prefix, suffix, dim, count) \
420 void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \
421 int *bounding_values) \
422 { \
423 vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \
424 }
425
430
431 static
void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *
src1,
433 {
435
436 for (
i = 0;
i <
h;
i++) {
438
445 }
446 }
447
449 {
450 c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
451
455 c->v_loop_filter =
c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c;
456 c->h_loop_filter =
c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c;
457
458 #if ARCH_ARM
460 #elif ARCH_PPC
462 #elif ARCH_X86
464 #elif ARCH_MIPS
466 #endif
467 }
468
469 /*
470 * This function initializes the loop filter boundary limits if the frame's
471 * quality index is different from the previous frame's.
472 *
473 * where sizeof(bounding_values_array) is 256 * sizeof(int)
474 *
475 * The filter_limit_values may not be larger than 127.
476 */
478 {
479 int *bounding_values = bounding_values_array + 127;
480 int x;
482
484
485 /* set up the bounding values */
486 memset(bounding_values_array, 0, 256 * sizeof(int));
487 for (x = 0; x < filter_limit; x++) {
488 bounding_values[-x] = -x;
489 bounding_values[x] = x;
490 }
492 bounding_values[ x] =
value;
493 bounding_values[-x] = -
value;
494 }
496 bounding_values[128] =
value;
497 bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202
U;
498 }
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]