libpostproc/postprocess.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029  C MMX MMX2 3DNow AltiVec
00030 isVertDC Ec Ec Ec
00031 isVertMinMaxOk Ec Ec Ec
00032 doVertLowPass E e e Ec
00033 doVertDefFilter Ec Ec e e Ec
00034 isHorizDC Ec Ec Ec
00035 isHorizMinMaxOk a E Ec
00036 doHorizLowPass E e e Ec
00037 doHorizDefFilter Ec Ec e e Ec
00038 do_a_deblock Ec E Ec E
00039 deRing E e e* Ecp
00040 Vertical RKAlgo1 E a a
00041 Horizontal RKAlgo1 a a
00042 Vertical X1# a E E
00043 Horizontal X1# a E E
00044 LinIpolDeinterlace e E E*
00045 CubicIpolDeinterlace a e e*
00046 LinBlendDeinterlace e E E*
00047 MedianDeinterlace# E Ec Ec
00048 TempDeNoiser# E e e Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066  (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use the Subversion log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 //#undef HAVE_MMX2
00083 //#define HAVE_AMD3DNOW
00084 //#undef HAVE_MMX
00085 //#undef ARCH_X86
00086 //#define DEBUG_BRIGHTNESS
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 
00090 unsigned postproc_version(void)
00091 {
00092 return LIBPOSTPROC_VERSION_INT;
00093 }
00094 
00095 #if HAVE_ALTIVEC_H
00096 #include <altivec.h>
00097 #endif
00098 
00099 #define GET_MODE_BUFFER_SIZE 500
00100 #define OPTIONS_ARRAY_SIZE 10
00101 #define BLOCK_SIZE 8
00102 #define TEMP_STRIDE 8
00103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00104 
00105 #if ARCH_X86
00106 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00107 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00108 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00109 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00110 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00111 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00112 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00113 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00114 #endif
00115 
00116 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00117 
00118 
00119 static struct PPFilter filters[]=
00120 {
00121 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00122 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00123 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
00124  {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
00125 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00126 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00127 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00128 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00129 {"dr", "dering", 1, 5, 6, DERING},
00130 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00131 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00132 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00133 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00134 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00135 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00136 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00137 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00138 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00139 {NULL, NULL,0,0,0,0} //End Marker
00140 };
00141 
00142 static const char *replaceTable[]=
00143 {
00144 "default", "hb:a,vb:a,dr:a",
00145 "de", "hb:a,vb:a,dr:a",
00146 "fast", "h1:a,v1:a,dr:a",
00147 "fa", "h1:a,v1:a,dr:a",
00148 "ac", "ha:a:128:7,va:a,dr:a",
00149 NULL //End Marker
00150 };
00151 
00152 
00153 #if ARCH_X86
00154 static inline void prefetchnta(void *p)
00155 {
00156 __asm__ volatile( "prefetchnta (%0)\n\t"
00157 : : "r" (p)
00158 );
00159 }
00160 
00161 static inline void prefetcht0(void *p)
00162 {
00163 __asm__ volatile( "prefetcht0 (%0)\n\t"
00164 : : "r" (p)
00165 );
00166 }
00167 
00168 static inline void prefetcht1(void *p)
00169 {
00170 __asm__ volatile( "prefetcht1 (%0)\n\t"
00171 : : "r" (p)
00172 );
00173 }
00174 
00175 static inline void prefetcht2(void *p)
00176 {
00177 __asm__ volatile( "prefetcht2 (%0)\n\t"
00178 : : "r" (p)
00179 );
00180 }
00181 #endif
00182 
00183 /* The horizontal functions exist only in C because the MMX
00184  * code is faster with vertical filters and transposing. */
00185 
00189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00190 {
00191 int numEq= 0;
00192 int y;
00193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00194 const int dcThreshold= dcOffset*2 + 1;
00195 
00196 for(y=0; y<BLOCK_SIZE; y++){
00197 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00198 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00199 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00200 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00201 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00202 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00203 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00204 src+= stride;
00205 }
00206 return numEq > c->ppMode.flatnessThreshold;
00207 }
00208 
00212 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00213 {
00214 int numEq= 0;
00215 int y;
00216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00217 const int dcThreshold= dcOffset*2 + 1;
00218 
00219 src+= stride*4; // src points to begin of the 8x8 Block
00220 for(y=0; y<BLOCK_SIZE-1; y++){
00221 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00222 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00223 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00224 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00225 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00226 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00227 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00228 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00229 src+= stride;
00230 }
00231 return numEq > c->ppMode.flatnessThreshold;
00232 }
00233 
00234 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00235 {
00236 int i;
00237 #if 1
00238 for(i=0; i<2; i++){
00239 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00240 src += stride;
00241 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00242 src += stride;
00243 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00244 src += stride;
00245 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00246 src += stride;
00247 }
00248 #else
00249 for(i=0; i<8; i++){
00250 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00251 src += stride;
00252 }
00253 #endif
00254 return 1;
00255 }
00256 
00257 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00258 {
00259 #if 1
00260 #if 1
00261 int x;
00262 src+= stride*4;
00263 for(x=0; x<BLOCK_SIZE; x+=4){
00264 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00265 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00266 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00267 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00268 }
00269 #else
00270 int x;
00271 src+= stride*3;
00272 for(x=0; x<BLOCK_SIZE; x++){
00273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00274 }
00275 #endif
00276 return 1;
00277 #else
00278 int x;
00279 src+= stride*4;
00280 for(x=0; x<BLOCK_SIZE; x++){
00281 int min=255;
00282 int max=0;
00283 int y;
00284 for(y=0; y<8; y++){
00285 int v= src[x + y*stride];
00286 if(v>max) max=v;
00287 if(v<min) min=v;
00288 }
00289 if(max-min > 2*QP) return 0;
00290 }
00291 return 1;
00292 #endif
00293 }
00294 
00295 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00296 {
00297 if( isHorizDC_C(src, stride, c) ){
00298 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00299 return 1;
00300 else
00301 return 0;
00302 }else{
00303 return 2;
00304 }
00305 }
00306 
00307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309 if( isVertDC_C(src, stride, c) ){
00310 if( isVertMinMaxOk_C(src, stride, c->QP) )
00311 return 1;
00312 else
00313 return 0;
00314 }else{
00315 return 2;
00316 }
00317 }
00318 
00319 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00320 {
00321 int y;
00322 for(y=0; y<BLOCK_SIZE; y++){
00323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00324 
00325 if(FFABS(middleEnergy) < 8*c->QP){
00326 const int q=(dst[3] - dst[4])/2;
00327 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00328 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00329 
00330 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00331 d= FFMAX(d, 0);
00332 
00333 d= (5*d + 32) >> 6;
00334 d*= FFSIGN(-middleEnergy);
00335 
00336 if(q>0)
00337 {
00338 d= d<0 ? 0 : d;
00339 d= d>q ? q : d;
00340 }
00341 else
00342 {
00343 d= d>0 ? 0 : d;
00344 d= d<q ? q : d;
00345 }
00346 
00347 dst[3]-= d;
00348 dst[4]+= d;
00349 }
00350 dst+= stride;
00351 }
00352 }
00353 
00358 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00359 {
00360 int y;
00361 for(y=0; y<BLOCK_SIZE; y++){
00362 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00363 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00364 
00365 int sums[10];
00366 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00367 sums[1] = sums[0] - first + dst[3];
00368 sums[2] = sums[1] - first + dst[4];
00369 sums[3] = sums[2] - first + dst[5];
00370 sums[4] = sums[3] - first + dst[6];
00371 sums[5] = sums[4] - dst[0] + dst[7];
00372 sums[6] = sums[5] - dst[1] + last;
00373 sums[7] = sums[6] - dst[2] + last;
00374 sums[8] = sums[7] - dst[3] + last;
00375 sums[9] = sums[8] - dst[4] + last;
00376 
00377 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00378 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00379 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00380 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00381 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00382 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00383 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00384 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00385 
00386 dst+= stride;
00387 }
00388 }
00389 
00398 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00399 {
00400 int y;
00401 static uint64_t *lut= NULL;
00402 if(lut==NULL)
00403 {
00404 int i;
00405 lut = av_malloc(256*8);
00406 for(i=0; i<256; i++)
00407 {
00408 int v= i < 128 ? 2*i : 2*(i-256);
00409 /*
00410 //Simulate 112242211 9-Tap filter
00411  uint64_t a= (v/16) & 0xFF;
00412  uint64_t b= (v/8) & 0xFF;
00413  uint64_t c= (v/4) & 0xFF;
00414  uint64_t d= (3*v/8) & 0xFF;
00415 */
00416 //Simulate piecewise linear interpolation
00417 uint64_t a= (v/16) & 0xFF;
00418 uint64_t b= (v*3/16) & 0xFF;
00419 uint64_t c= (v*5/16) & 0xFF;
00420 uint64_t d= (7*v/16) & 0xFF;
00421 uint64_t A= (0x100 - a)&0xFF;
00422 uint64_t B= (0x100 - b)&0xFF;
00423 uint64_t C= (0x100 - c)&0xFF;
00424 uint64_t D= (0x100 - c)&0xFF;
00425 
00426 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00427 (D<<24) | (C<<16) | (B<<8) | (A);
00428 //lut[i] = (v<<32) | (v<<24);
00429 }
00430 }
00431 
00432 for(y=0; y<BLOCK_SIZE; y++){
00433 int a= src[1] - src[2];
00434 int b= src[3] - src[4];
00435 int c= src[5] - src[6];
00436 
00437 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00438 
00439 if(d < QP){
00440 int v = d * FFSIGN(-b);
00441 
00442 src[1] +=v/8;
00443 src[2] +=v/4;
00444 src[3] +=3*v/8;
00445 src[4] -=3*v/8;
00446 src[5] -=v/4;
00447 src[6] -=v/8;
00448 }
00449 src+=stride;
00450 }
00451 }
00452 
00456 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00457 int y;
00458 const int QP= c->QP;
00459 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00460 const int dcThreshold= dcOffset*2 + 1;
00461 //START_TIMER
00462 src+= step*4; // src points to begin of the 8x8 Block
00463 for(y=0; y<8; y++){
00464 int numEq= 0;
00465 
00466 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00467 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00468 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00469 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00470 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00471 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00472 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00473 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00474 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00475 if(numEq > c->ppMode.flatnessThreshold){
00476 int min, max, x;
00477 
00478 if(src[0] > src[step]){
00479 max= src[0];
00480 min= src[step];
00481 }else{
00482 max= src[step];
00483 min= src[0];
00484 }
00485 for(x=2; x<8; x+=2){
00486 if(src[x*step] > src[(x+1)*step]){
00487 if(src[x *step] > max) max= src[ x *step];
00488 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00489 }else{
00490 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00491 if(src[ x *step] < min) min= src[ x *step];
00492 }
00493 }
00494 if(max-min < 2*QP){
00495 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00496 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00497 
00498 int sums[10];
00499 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00500 sums[1] = sums[0] - first + src[3*step];
00501 sums[2] = sums[1] - first + src[4*step];
00502 sums[3] = sums[2] - first + src[5*step];
00503 sums[4] = sums[3] - first + src[6*step];
00504 sums[5] = sums[4] - src[0*step] + src[7*step];
00505 sums[6] = sums[5] - src[1*step] + last;
00506 sums[7] = sums[6] - src[2*step] + last;
00507 sums[8] = sums[7] - src[3*step] + last;
00508 sums[9] = sums[8] - src[4*step] + last;
00509 
00510 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00511 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00512 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00513 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00514 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00515 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00516 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00517 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00518 }
00519 }else{
00520 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00521 
00522 if(FFABS(middleEnergy) < 8*QP){
00523 const int q=(src[3*step] - src[4*step])/2;
00524 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00525 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00526 
00527 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00528 d= FFMAX(d, 0);
00529 
00530 d= (5*d + 32) >> 6;
00531 d*= FFSIGN(-middleEnergy);
00532 
00533 if(q>0){
00534 d= d<0 ? 0 : d;
00535 d= d>q ? q : d;
00536 }else{
00537 d= d>0 ? 0 : d;
00538 d= d<q ? q : d;
00539 }
00540 
00541 src[3*step]-= d;
00542 src[4*step]+= d;
00543 }
00544 }
00545 
00546 src += stride;
00547 }
00548 /*if(step==16){
00549  STOP_TIMER("step16")
00550 }else{
00551  STOP_TIMER("stepX")
00552 }*/
00553 }
00554 
00555 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00556 //Plain C versions
00557 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00558 #define COMPILE_C
00559 #endif
00560 
00561 #if HAVE_ALTIVEC
00562 #define COMPILE_ALTIVEC
00563 #endif //HAVE_ALTIVEC
00564 
00565 #if ARCH_X86
00566 
00567 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00568 #define COMPILE_MMX
00569 #endif
00570 
00571 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00572 #define COMPILE_MMX2
00573 #endif
00574 
00575 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00576 #define COMPILE_3DNOW
00577 #endif
00578 #endif /* ARCH_X86 */
00579 
00580 #undef HAVE_MMX
00581 #define HAVE_MMX 0
00582 #undef HAVE_MMX2
00583 #define HAVE_MMX2 0
00584 #undef HAVE_AMD3DNOW
00585 #define HAVE_AMD3DNOW 0
00586 #undef HAVE_ALTIVEC
00587 #define HAVE_ALTIVEC 0
00588 
00589 #ifdef COMPILE_C
00590 #define RENAME(a) a ## _C
00591 #include "postprocess_template.c"
00592 #endif
00593 
00594 #ifdef COMPILE_ALTIVEC
00595 #undef RENAME
00596 #undef HAVE_ALTIVEC
00597 #define HAVE_ALTIVEC 1
00598 #define RENAME(a) a ## _altivec
00599 #include "postprocess_altivec_template.c"
00600 #include "postprocess_template.c"
00601 #endif
00602 
00603 //MMX versions
00604 #ifdef COMPILE_MMX
00605 #undef RENAME
00606 #undef HAVE_MMX
00607 #define HAVE_MMX 1
00608 #define RENAME(a) a ## _MMX
00609 #include "postprocess_template.c"
00610 #endif
00611 
00612 //MMX2 versions
00613 #ifdef COMPILE_MMX2
00614 #undef RENAME
00615 #undef HAVE_MMX
00616 #undef HAVE_MMX2
00617 #define HAVE_MMX 1
00618 #define HAVE_MMX2 1
00619 #define RENAME(a) a ## _MMX2
00620 #include "postprocess_template.c"
00621 #endif
00622 
00623 //3DNOW versions
00624 #ifdef COMPILE_3DNOW
00625 #undef RENAME
00626 #undef HAVE_MMX
00627 #undef HAVE_MMX2
00628 #undef HAVE_AMD3DNOW
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 0
00631 #define HAVE_AMD3DNOW 1
00632 #define RENAME(a) a ## _3DNow
00633 #include "postprocess_template.c"
00634 #endif
00635 
00636 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00637 
00638 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00639 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00640 {
00641 PPContext *c= (PPContext *)vc;
00642 PPMode *ppMode= (PPMode *)vm;
00643 c->ppMode= *ppMode; //FIXME
00644 
00645 // Using ifs here as they are faster than function pointers although the
00646 // difference would not be measurable here but it is much better because
00647 // someone might exchange the CPU whithout restarting MPlayer ;)
00648 #if CONFIG_RUNTIME_CPUDETECT
00649 #if ARCH_X86
00650 // ordered per speed fastest first
00651 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00652 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00653 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00654 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00655 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00656 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00657 else
00658 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00659 #else
00660 #if HAVE_ALTIVEC
00661 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00662 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00663 else
00664 #endif
00665 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00666 #endif
00667 #else //CONFIG_RUNTIME_CPUDETECT
00668 #if HAVE_MMX2
00669 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00670 #elif HAVE_AMD3DNOW
00671 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00672 #elif HAVE_MMX
00673 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00674 #elif HAVE_ALTIVEC
00675 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00676 #else
00677 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #endif 
00680 }
00681 
00682 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00683 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00684 
00685 /* -pp Command line Help
00686 */
00687 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00688 const char *const pp_help=
00689 #else
00690 const char pp_help[] =
00691 #endif
00692 "Available postprocessing filters:\n"
00693 "Filters Options\n"
00694 "short long name short long option Description\n"
00695 "* * a autoq CPU power dependent enabler\n"
00696 " c chrom chrominance filtering enabled\n"
00697 " y nochrom chrominance filtering disabled\n"
00698 " n noluma luma filtering disabled\n"
00699 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00700 " 1. difference factor: default=32, higher -> more deblocking\n"
00701 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00702 " the h & v deblocking filters share these\n"
00703 " so you can't set different thresholds for h / v\n"
00704 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00705 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00706 "va vadeblock (2 threshold) vertical deblocking filter\n"
00707 "h1 x1hdeblock experimental h deblock filter 1\n"
00708 "v1 x1vdeblock experimental v deblock filter 1\n"
00709 "dr dering deringing filter\n"
00710 "al autolevels automatic brightness / contrast\n"
00711 " f fullyrange stretch luminance to (0..255)\n"
00712 "lb linblenddeint linear blend deinterlacer\n"
00713 "li linipoldeint linear interpolating deinterlace\n"
00714 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00715 "md mediandeint median deinterlacer\n"
00716 "fd ffmpegdeint ffmpeg deinterlacer\n"
00717 "l5 lowpass5 FIR lowpass deinterlacer\n"
00718 "de default hb:a,vb:a,dr:a\n"
00719 "fa fast h1:a,v1:a,dr:a\n"
00720 "ac ha:a:128:7,va:a,dr:a\n"
00721 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00722 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00723 "fq forceQuant <quantizer> force quantizer\n"
00724 "Usage:\n"
00725 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00726 "long form example:\n"
00727 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00728 "short form example:\n"
00729 "vb:a/hb:a/lb de,-vb\n"
00730 "more examples:\n"
00731 "tn:64:128:256\n"
00732 "\n"
00733 ;
00734 
00735 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00736 {
00737 char temp[GET_MODE_BUFFER_SIZE];
00738 char *p= temp;
00739 static const char filterDelimiters[] = ",/";
00740 static const char optionDelimiters[] = ":";
00741 struct PPMode *ppMode;
00742 char *filterToken;
00743 
00744 ppMode= av_malloc(sizeof(PPMode));
00745 
00746 ppMode->lumMode= 0;
00747 ppMode->chromMode= 0;
00748 ppMode->maxTmpNoise[0]= 700;
00749 ppMode->maxTmpNoise[1]= 1500;
00750 ppMode->maxTmpNoise[2]= 3000;
00751 ppMode->maxAllowedY= 234;
00752 ppMode->minAllowedY= 16;
00753 ppMode->baseDcDiff= 256/8;
00754 ppMode->flatnessThreshold= 56-16-1;
00755 ppMode->maxClippedThreshold= 0.01;
00756 ppMode->error=0;
00757 
00758 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
00759 
00760 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00761 
00762 for(;;){
00763 char *filterName;
00764 int q= 1000000; //PP_QUALITY_MAX;
00765 int chrom=-1;
00766 int luma=-1;
00767 char *option;
00768 char *options[OPTIONS_ARRAY_SIZE];
00769 int i;
00770 int filterNameOk=0;
00771 int numOfUnknownOptions=0;
00772 int enable=1; //does the user want us to enabled or disabled the filter
00773 
00774 filterToken= strtok(p, filterDelimiters);
00775 if(filterToken == NULL) break;
00776 p+= strlen(filterToken) + 1; // p points to next filterToken
00777 filterName= strtok(filterToken, optionDelimiters);
00778 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00779 
00780 if(*filterName == '-'){
00781 enable=0;
00782 filterName++;
00783 }
00784 
00785 for(;;){ //for all options
00786 option= strtok(NULL, optionDelimiters);
00787 if(option == NULL) break;
00788 
00789 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00790 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00791 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00792 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00793 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00794 else{
00795 options[numOfUnknownOptions] = option;
00796 numOfUnknownOptions++;
00797 }
00798 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00799 }
00800 options[numOfUnknownOptions] = NULL;
00801 
00802 /* replace stuff from the replace Table */
00803 for(i=0; replaceTable[2*i]!=NULL; i++){
00804 if(!strcmp(replaceTable[2*i], filterName)){
00805 int newlen= strlen(replaceTable[2*i + 1]);
00806 int plen;
00807 int spaceLeft;
00808 
00809 if(p==NULL) p= temp, *p=0; //last filter
00810 else p--, *p=','; //not last filter
00811 
00812 plen= strlen(p);
00813 spaceLeft= p - temp + plen;
00814 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
00815 ppMode->error++;
00816 break;
00817 }
00818 memmove(p + newlen, p, plen+1);
00819 memcpy(p, replaceTable[2*i + 1], newlen);
00820 filterNameOk=1;
00821 }
00822 }
00823 
00824 for(i=0; filters[i].shortName!=NULL; i++){
00825 if( !strcmp(filters[i].longName, filterName)
00826 || !strcmp(filters[i].shortName, filterName)){
00827 ppMode->lumMode &= ~filters[i].mask;
00828 ppMode->chromMode &= ~filters[i].mask;
00829 
00830 filterNameOk=1;
00831 if(!enable) break; // user wants to disable it
00832 
00833 if(q >= filters[i].minLumQuality && luma)
00834 ppMode->lumMode|= filters[i].mask;
00835 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00836 if(q >= filters[i].minChromQuality)
00837 ppMode->chromMode|= filters[i].mask;
00838 
00839 if(filters[i].mask == LEVEL_FIX){
00840 int o;
00841 ppMode->minAllowedY= 16;
00842 ppMode->maxAllowedY= 234;
00843 for(o=0; options[o]!=NULL; o++){
00844 if( !strcmp(options[o],"fullyrange")
00845 ||!strcmp(options[o],"f")){
00846 ppMode->minAllowedY= 0;
00847 ppMode->maxAllowedY= 255;
00848 numOfUnknownOptions--;
00849 }
00850 }
00851 }
00852 else if(filters[i].mask == TEMP_NOISE_FILTER)
00853 {
00854 int o;
00855 int numOfNoises=0;
00856 
00857 for(o=0; options[o]!=NULL; o++){
00858 char *tail;
00859 ppMode->maxTmpNoise[numOfNoises]=
00860 strtol(options[o], &tail, 0);
00861 if(tail!=options[o]){
00862 numOfNoises++;
00863 numOfUnknownOptions--;
00864 if(numOfNoises >= 3) break;
00865 }
00866 }
00867 }
00868 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00869 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00870 int o;
00871 
00872 for(o=0; options[o]!=NULL && o<2; o++){
00873 char *tail;
00874 int val= strtol(options[o], &tail, 0);
00875 if(tail==options[o]) break;
00876 
00877 numOfUnknownOptions--;
00878 if(o==0) ppMode->baseDcDiff= val;
00879 else ppMode->flatnessThreshold= val;
00880 }
00881 }
00882 else if(filters[i].mask == FORCE_QUANT){
00883 int o;
00884 ppMode->forcedQuant= 15;
00885 
00886 for(o=0; options[o]!=NULL && o<1; o++){
00887 char *tail;
00888 int val= strtol(options[o], &tail, 0);
00889 if(tail==options[o]) break;
00890 
00891 numOfUnknownOptions--;
00892 ppMode->forcedQuant= val;
00893 }
00894 }
00895 }
00896 }
00897 if(!filterNameOk) ppMode->error++;
00898 ppMode->error += numOfUnknownOptions;
00899 }
00900 
00901 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00902 if(ppMode->error){
00903 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00904 av_free(ppMode);
00905 return NULL;
00906 }
00907 return ppMode;
00908 }
00909 
00910 void pp_free_mode(pp_mode *mode){
00911 av_free(mode);
00912 }
00913 
00914 static void reallocAlign(void **p, int alignment, int size){
00915 av_free(*p);
00916 *p= av_mallocz(size);
00917 }
00918 
00919 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00920 int mbWidth = (width+15)>>4;
00921 int mbHeight= (height+15)>>4;
00922 int i;
00923 
00924 c->stride= stride;
00925 c->qpStride= qpStride;
00926 
00927 reallocAlign((void **)&c->tempDst, 8, stride*24);
00928 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00929 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00930 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00931 for(i=0; i<256; i++)
00932 c->yHistogram[i]= width*height/64*15/256;
00933 
00934 for(i=0; i<3; i++){
00935 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
00936 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00937 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00938 }
00939 
00940 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00941 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00942 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00943 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00944 }
00945 
00946 static const char * context_to_name(void * ptr) {
00947 return "postproc";
00948 }
00949 
00950 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00951 
00952 pp_context *pp_get_context(int width, int height, int cpuCaps){
00953 PPContext *c= av_malloc(sizeof(PPContext));
00954 int stride= (width+15)&(~15); //assumed / will realloc if needed
00955 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00956 
00957 memset(c, 0, sizeof(PPContext));
00958 c->av_class = &av_codec_context_class;
00959 c->cpuCaps= cpuCaps;
00960 if(cpuCaps&PP_FORMAT){
00961 c->hChromaSubSample= cpuCaps&0x3;
00962 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00963 }else{
00964 c->hChromaSubSample= 1;
00965 c->vChromaSubSample= 1;
00966 }
00967 
00968 reallocBuffers(c, width, height, stride, qpStride);
00969 
00970 c->frameNum=-1;
00971 
00972 return c;
00973 }
00974 
00975 void pp_free_context(void *vc){
00976 PPContext *c = (PPContext*)vc;
00977 int i;
00978 
00979 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00980 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00981 
00982 av_free(c->tempBlocks);
00983 av_free(c->yHistogram);
00984 av_free(c->tempDst);
00985 av_free(c->tempSrc);
00986 av_free(c->deintTemp);
00987 av_free(c->stdQPTable);
00988 av_free(c->nonBQPTable);
00989 av_free(c->forcedQPTable);
00990 
00991 memset(c, 0, sizeof(PPContext));
00992 
00993 av_free(c);
00994 }
00995 
00996 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00997 uint8_t * dst[3], const int dstStride[3],
00998 int width, int height,
00999 const QP_STORE_T *QP_store, int QPStride,
01000 pp_mode *vm, void *vc, int pict_type)
01001 {
01002 int mbWidth = (width+15)>>4;
01003 int mbHeight= (height+15)>>4;
01004 PPMode *mode = (PPMode*)vm;
01005 PPContext *c = (PPContext*)vc;
01006 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01007 int absQPStride = FFABS(QPStride);
01008 
01009 // c->stride and c->QPStride are always positive
01010 if(c->stride < minStride || c->qpStride < absQPStride)
01011 reallocBuffers(c, width, height,
01012 FFMAX(minStride, c->stride),
01013 FFMAX(c->qpStride, absQPStride));
01014 
01015 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01016 int i;
01017 QP_store= c->forcedQPTable;
01018 absQPStride = QPStride = 0;
01019 if(mode->lumMode & FORCE_QUANT)
01020 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01021 else
01022 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01023 }
01024 
01025 if(pict_type & PP_PICT_TYPE_QP2){
01026 int i;
01027 const int count= mbHeight * absQPStride;
01028 for(i=0; i<(count>>2); i++){
01029 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01030 }
01031 for(i<<=2; i<count; i++){
01032 c->stdQPTable[i] = QP_store[i]>>1;
01033 }
01034 QP_store= c->stdQPTable;
01035 QPStride= absQPStride;
01036 }
01037 
01038 if(0){
01039 int x,y;
01040 for(y=0; y<mbHeight; y++){
01041 for(x=0; x<mbWidth; x++){
01042 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01043 }
01044 av_log(c, AV_LOG_INFO, "\n");
01045 }
01046 av_log(c, AV_LOG_INFO, "\n");
01047 }
01048 
01049 if((pict_type&7)!=3){
01050 if (QPStride >= 0){
01051 int i;
01052 const int count= mbHeight * QPStride;
01053 for(i=0; i<(count>>2); i++){
01054 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01055 }
01056 for(i<<=2; i<count; i++){
01057 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01058 }
01059 } else {
01060 int i,j;
01061 for(i=0; i<mbHeight; i++) {
01062 for(j=0; j<absQPStride; j++) {
01063 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01064 }
01065 }
01066 }
01067 }
01068 
01069 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01070 mode->lumMode, mode->chromMode);
01071 
01072 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01073 width, height, QP_store, QPStride, 0, mode, c);
01074 
01075 width = (width )>>c->hChromaSubSample;
01076 height = (height)>>c->vChromaSubSample;
01077 
01078 if(mode->chromMode){
01079 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01080 width, height, QP_store, QPStride, 1, mode, c);
01081 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01082 width, height, QP_store, QPStride, 2, mode, c);
01083 }
01084 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01085 linecpy(dst[1], src[1], height, srcStride[1]);
01086 linecpy(dst[2], src[2], height, srcStride[2]);
01087 }else{
01088 int y;
01089 for(y=0; y<height; y++){
01090 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01091 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01092 }
01093 }
01094 }
01095
Generated on Fri Oct 26 02:35:41 2012 for FFmpeg by doxygen 1.5.8