00001 /* 00002 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) 00003 * 00004 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org> 00005 * 00006 * This file is part of FFmpeg. 00007 * 00008 * FFmpeg is free software; you can redistribute it and/or modify 00009 * it under the terms of the GNU General Public License as published by 00010 * the Free Software Foundation; either version 2 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * FFmpeg is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU General Public License 00019 * along with FFmpeg; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00028 /* 00029 C MMX MMX2 3DNow AltiVec 00030 isVertDC Ec Ec Ec 00031 isVertMinMaxOk Ec Ec Ec 00032 doVertLowPass E e e Ec 00033 doVertDefFilter Ec Ec e e Ec 00034 isHorizDC Ec Ec Ec 00035 isHorizMinMaxOk a E Ec 00036 doHorizLowPass E e e Ec 00037 doHorizDefFilter Ec Ec e e Ec 00038 do_a_deblock Ec E Ec E 00039 deRing E e e* Ecp 00040 Vertical RKAlgo1 E a a 00041 Horizontal RKAlgo1 a a 00042 Vertical X1# a E E 00043 Horizontal X1# a E E 00044 LinIpolDeinterlace e E E* 00045 CubicIpolDeinterlace a e e* 00046 LinBlendDeinterlace e E E* 00047 MedianDeinterlace# E Ec Ec 00048 TempDeNoiser# E e e Ec 00049 00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work 00051 # more or less selfinvented filters so the exactness is not too meaningful 00052 E = Exact implementation 00053 e = almost exact implementation (slightly different rounding,...) 00054 a = alternative / approximate impl 00055 c = checked against the other implementations (-vo md5) 00056 p = partially optimized, still some work to do 00057 */ 00058 00059 /* 00060 TODO: 00061 reduce the time wasted on the mem transfer 00062 unroll stuff if instructions depend too much on the prior one 00063 move YScale thing to the end instead of fixing QP 00064 write a faster and higher quality deblocking filter :) 00065 make the mainloop more flexible (variable number of blocks at once 00066 (the if/else stuff per block is slowing things down) 00067 compare the quality & speed of all filters 00068 split this huge file 00069 optimize c versions 00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks 00071 ... 00072 */ 00073 00074 //Changelog: use the Subversion log 00075 00076 #include "config.h" 00077 #include "libavutil/avutil.h" 00078 #include <inttypes.h> 00079 #include <stdio.h> 00080 #include <stdlib.h> 00081 #include <string.h> 00082 //#undef HAVE_MMX2 00083 //#define HAVE_AMD3DNOW 00084 //#undef HAVE_MMX 00085 //#undef ARCH_X86 00086 //#define DEBUG_BRIGHTNESS 00087 #include "postprocess.h" 00088 #include "postprocess_internal.h" 00089 00090 unsigned postproc_version(void) 00091 { 00092 return LIBPOSTPROC_VERSION_INT; 00093 } 00094 00095 #if HAVE_ALTIVEC_H 00096 #include <altivec.h> 00097 #endif 00098 00099 #define GET_MODE_BUFFER_SIZE 500 00100 #define OPTIONS_ARRAY_SIZE 10 00101 #define BLOCK_SIZE 8 00102 #define TEMP_STRIDE 8 00103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet 00104 00105 #if ARCH_X86 00106 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL; 00107 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL; 00108 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL; 00109 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL; 00110 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL; 00111 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL; 00112 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL; 00113 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL; 00114 #endif 00115 00116 DECLARE_ASM_CONST(8, int, deringThreshold)= 20; 00117 00118 00119 static struct PPFilter filters[]= 00120 { 00121 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, 00122 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, 00123 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, 00124 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ 00125 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, 00126 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, 00127 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, 00128 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, 00129 {"dr", "dering", 1, 5, 6, DERING}, 00130 {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, 00131 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, 00132 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, 00133 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, 00134 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, 00135 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, 00136 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, 00137 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, 00138 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, 00139 {NULL, NULL,0,0,0,0} //End Marker 00140 }; 00141 00142 static const char *replaceTable[]= 00143 { 00144 "default", "hb:a,vb:a,dr:a", 00145 "de", "hb:a,vb:a,dr:a", 00146 "fast", "h1:a,v1:a,dr:a", 00147 "fa", "h1:a,v1:a,dr:a", 00148 "ac", "ha:a:128:7,va:a,dr:a", 00149 NULL //End Marker 00150 }; 00151 00152 00153 #if ARCH_X86 00154 static inline void prefetchnta(void *p) 00155 { 00156 __asm__ volatile( "prefetchnta (%0)\n\t" 00157 : : "r" (p) 00158 ); 00159 } 00160 00161 static inline void prefetcht0(void *p) 00162 { 00163 __asm__ volatile( "prefetcht0 (%0)\n\t" 00164 : : "r" (p) 00165 ); 00166 } 00167 00168 static inline void prefetcht1(void *p) 00169 { 00170 __asm__ volatile( "prefetcht1 (%0)\n\t" 00171 : : "r" (p) 00172 ); 00173 } 00174 00175 static inline void prefetcht2(void *p) 00176 { 00177 __asm__ volatile( "prefetcht2 (%0)\n\t" 00178 : : "r" (p) 00179 ); 00180 } 00181 #endif 00182 00183 /* The horizontal functions exist only in C because the MMX 00184 * code is faster with vertical filters and transposing. */ 00185 00189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) 00190 { 00191 int numEq= 0; 00192 int y; 00193 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 00194 const int dcThreshold= dcOffset*2 + 1; 00195 00196 for(y=0; y<BLOCK_SIZE; y++){ 00197 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; 00198 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; 00199 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; 00200 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; 00201 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; 00202 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; 00203 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; 00204 src+= stride; 00205 } 00206 return numEq > c->ppMode.flatnessThreshold; 00207 } 00208 00212 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c) 00213 { 00214 int numEq= 0; 00215 int y; 00216 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 00217 const int dcThreshold= dcOffset*2 + 1; 00218 00219 src+= stride*4; // src points to begin of the 8x8 Block 00220 for(y=0; y<BLOCK_SIZE-1; y++){ 00221 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; 00222 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; 00223 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; 00224 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; 00225 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; 00226 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; 00227 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; 00228 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; 00229 src+= stride; 00230 } 00231 return numEq > c->ppMode.flatnessThreshold; 00232 } 00233 00234 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) 00235 { 00236 int i; 00237 #if 1 00238 for(i=0; i<2; i++){ 00239 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; 00240 src += stride; 00241 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; 00242 src += stride; 00243 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; 00244 src += stride; 00245 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; 00246 src += stride; 00247 } 00248 #else 00249 for(i=0; i<8; i++){ 00250 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; 00251 src += stride; 00252 } 00253 #endif 00254 return 1; 00255 } 00256 00257 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) 00258 { 00259 #if 1 00260 #if 1 00261 int x; 00262 src+= stride*4; 00263 for(x=0; x<BLOCK_SIZE; x+=4){ 00264 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; 00265 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; 00266 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; 00267 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; 00268 } 00269 #else 00270 int x; 00271 src+= stride*3; 00272 for(x=0; x<BLOCK_SIZE; x++){ 00273 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; 00274 } 00275 #endif 00276 return 1; 00277 #else 00278 int x; 00279 src+= stride*4; 00280 for(x=0; x<BLOCK_SIZE; x++){ 00281 int min=255; 00282 int max=0; 00283 int y; 00284 for(y=0; y<8; y++){ 00285 int v= src[x + y*stride]; 00286 if(v>max) max=v; 00287 if(v<min) min=v; 00288 } 00289 if(max-min > 2*QP) return 0; 00290 } 00291 return 1; 00292 #endif 00293 } 00294 00295 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c) 00296 { 00297 if( isHorizDC_C(src, stride, c) ){ 00298 if( isHorizMinMaxOk_C(src, stride, c->QP) ) 00299 return 1; 00300 else 00301 return 0; 00302 }else{ 00303 return 2; 00304 } 00305 } 00306 00307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c) 00308 { 00309 if( isVertDC_C(src, stride, c) ){ 00310 if( isVertMinMaxOk_C(src, stride, c->QP) ) 00311 return 1; 00312 else 00313 return 0; 00314 }else{ 00315 return 2; 00316 } 00317 } 00318 00319 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) 00320 { 00321 int y; 00322 for(y=0; y<BLOCK_SIZE; y++){ 00323 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); 00324 00325 if(FFABS(middleEnergy) < 8*c->QP){ 00326 const int q=(dst[3] - dst[4])/2; 00327 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); 00328 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); 00329 00330 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) ); 00331 d= FFMAX(d, 0); 00332 00333 d= (5*d + 32) >> 6; 00334 d*= FFSIGN(-middleEnergy); 00335 00336 if(q>0) 00337 { 00338 d= d<0 ? 0 : d; 00339 d= d>q ? q : d; 00340 } 00341 else 00342 { 00343 d= d>0 ? 0 : d; 00344 d= d<q ? q : d; 00345 } 00346 00347 dst[3]-= d; 00348 dst[4]+= d; 00349 } 00350 dst+= stride; 00351 } 00352 } 00353 00358 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) 00359 { 00360 int y; 00361 for(y=0; y<BLOCK_SIZE; y++){ 00362 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; 00363 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; 00364 00365 int sums[10]; 00366 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; 00367 sums[1] = sums[0] - first + dst[3]; 00368 sums[2] = sums[1] - first + dst[4]; 00369 sums[3] = sums[2] - first + dst[5]; 00370 sums[4] = sums[3] - first + dst[6]; 00371 sums[5] = sums[4] - dst[0] + dst[7]; 00372 sums[6] = sums[5] - dst[1] + last; 00373 sums[7] = sums[6] - dst[2] + last; 00374 sums[8] = sums[7] - dst[3] + last; 00375 sums[9] = sums[8] - dst[4] + last; 00376 00377 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; 00378 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; 00379 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; 00380 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; 00381 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; 00382 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; 00383 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; 00384 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; 00385 00386 dst+= stride; 00387 } 00388 } 00389 00398 static inline void horizX1Filter(uint8_t *src, int stride, int QP) 00399 { 00400 int y; 00401 static uint64_t *lut= NULL; 00402 if(lut==NULL) 00403 { 00404 int i; 00405 lut = av_malloc(256*8); 00406 for(i=0; i<256; i++) 00407 { 00408 int v= i < 128 ? 2*i : 2*(i-256); 00409 /* 00410 //Simulate 112242211 9-Tap filter 00411 uint64_t a= (v/16) & 0xFF; 00412 uint64_t b= (v/8) & 0xFF; 00413 uint64_t c= (v/4) & 0xFF; 00414 uint64_t d= (3*v/8) & 0xFF; 00415 */ 00416 //Simulate piecewise linear interpolation 00417 uint64_t a= (v/16) & 0xFF; 00418 uint64_t b= (v*3/16) & 0xFF; 00419 uint64_t c= (v*5/16) & 0xFF; 00420 uint64_t d= (7*v/16) & 0xFF; 00421 uint64_t A= (0x100 - a)&0xFF; 00422 uint64_t B= (0x100 - b)&0xFF; 00423 uint64_t C= (0x100 - c)&0xFF; 00424 uint64_t D= (0x100 - c)&0xFF; 00425 00426 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | 00427 (D<<24) | (C<<16) | (B<<8) | (A); 00428 //lut[i] = (v<<32) | (v<<24); 00429 } 00430 } 00431 00432 for(y=0; y<BLOCK_SIZE; y++){ 00433 int a= src[1] - src[2]; 00434 int b= src[3] - src[4]; 00435 int c= src[5] - src[6]; 00436 00437 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); 00438 00439 if(d < QP){ 00440 int v = d * FFSIGN(-b); 00441 00442 src[1] +=v/8; 00443 src[2] +=v/4; 00444 src[3] +=3*v/8; 00445 src[4] -=3*v/8; 00446 src[5] -=v/4; 00447 src[6] -=v/8; 00448 } 00449 src+=stride; 00450 } 00451 } 00452 00456 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ 00457 int y; 00458 const int QP= c->QP; 00459 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; 00460 const int dcThreshold= dcOffset*2 + 1; 00461 //START_TIMER 00462 src+= step*4; // src points to begin of the 8x8 Block 00463 for(y=0; y<8; y++){ 00464 int numEq= 0; 00465 00466 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; 00467 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; 00468 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; 00469 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; 00470 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; 00471 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; 00472 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; 00473 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; 00474 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; 00475 if(numEq > c->ppMode.flatnessThreshold){ 00476 int min, max, x; 00477 00478 if(src[0] > src[step]){ 00479 max= src[0]; 00480 min= src[step]; 00481 }else{ 00482 max= src[step]; 00483 min= src[0]; 00484 } 00485 for(x=2; x<8; x+=2){ 00486 if(src[x*step] > src[(x+1)*step]){ 00487 if(src[x *step] > max) max= src[ x *step]; 00488 if(src[(x+1)*step] < min) min= src[(x+1)*step]; 00489 }else{ 00490 if(src[(x+1)*step] > max) max= src[(x+1)*step]; 00491 if(src[ x *step] < min) min= src[ x *step]; 00492 } 00493 } 00494 if(max-min < 2*QP){ 00495 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; 00496 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; 00497 00498 int sums[10]; 00499 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; 00500 sums[1] = sums[0] - first + src[3*step]; 00501 sums[2] = sums[1] - first + src[4*step]; 00502 sums[3] = sums[2] - first + src[5*step]; 00503 sums[4] = sums[3] - first + src[6*step]; 00504 sums[5] = sums[4] - src[0*step] + src[7*step]; 00505 sums[6] = sums[5] - src[1*step] + last; 00506 sums[7] = sums[6] - src[2*step] + last; 00507 sums[8] = sums[7] - src[3*step] + last; 00508 sums[9] = sums[8] - src[4*step] + last; 00509 00510 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; 00511 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; 00512 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; 00513 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; 00514 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; 00515 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; 00516 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; 00517 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; 00518 } 00519 }else{ 00520 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); 00521 00522 if(FFABS(middleEnergy) < 8*QP){ 00523 const int q=(src[3*step] - src[4*step])/2; 00524 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); 00525 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); 00526 00527 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) ); 00528 d= FFMAX(d, 0); 00529 00530 d= (5*d + 32) >> 6; 00531 d*= FFSIGN(-middleEnergy); 00532 00533 if(q>0){ 00534 d= d<0 ? 0 : d; 00535 d= d>q ? q : d; 00536 }else{ 00537 d= d>0 ? 0 : d; 00538 d= d<q ? q : d; 00539 } 00540 00541 src[3*step]-= d; 00542 src[4*step]+= d; 00543 } 00544 } 00545 00546 src += stride; 00547 } 00548 /*if(step==16){ 00549 STOP_TIMER("step16") 00550 }else{ 00551 STOP_TIMER("stepX") 00552 }*/ 00553 } 00554 00555 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one 00556 //Plain C versions 00557 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT 00558 #define COMPILE_C 00559 #endif 00560 00561 #if HAVE_ALTIVEC 00562 #define COMPILE_ALTIVEC 00563 #endif //HAVE_ALTIVEC 00564 00565 #if ARCH_X86 00566 00567 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT 00568 #define COMPILE_MMX 00569 #endif 00570 00571 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT 00572 #define COMPILE_MMX2 00573 #endif 00574 00575 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT 00576 #define COMPILE_3DNOW 00577 #endif 00578 #endif /* ARCH_X86 */ 00579 00580 #undef HAVE_MMX 00581 #define HAVE_MMX 0 00582 #undef HAVE_MMX2 00583 #define HAVE_MMX2 0 00584 #undef HAVE_AMD3DNOW 00585 #define HAVE_AMD3DNOW 0 00586 #undef HAVE_ALTIVEC 00587 #define HAVE_ALTIVEC 0 00588 00589 #ifdef COMPILE_C 00590 #define RENAME(a) a ## _C 00591 #include "postprocess_template.c" 00592 #endif 00593 00594 #ifdef COMPILE_ALTIVEC 00595 #undef RENAME 00596 #undef HAVE_ALTIVEC 00597 #define HAVE_ALTIVEC 1 00598 #define RENAME(a) a ## _altivec 00599 #include "postprocess_altivec_template.c" 00600 #include "postprocess_template.c" 00601 #endif 00602 00603 //MMX versions 00604 #ifdef COMPILE_MMX 00605 #undef RENAME 00606 #undef HAVE_MMX 00607 #define HAVE_MMX 1 00608 #define RENAME(a) a ## _MMX 00609 #include "postprocess_template.c" 00610 #endif 00611 00612 //MMX2 versions 00613 #ifdef COMPILE_MMX2 00614 #undef RENAME 00615 #undef HAVE_MMX 00616 #undef HAVE_MMX2 00617 #define HAVE_MMX 1 00618 #define HAVE_MMX2 1 00619 #define RENAME(a) a ## _MMX2 00620 #include "postprocess_template.c" 00621 #endif 00622 00623 //3DNOW versions 00624 #ifdef COMPILE_3DNOW 00625 #undef RENAME 00626 #undef HAVE_MMX 00627 #undef HAVE_MMX2 00628 #undef HAVE_AMD3DNOW 00629 #define HAVE_MMX 1 00630 #define HAVE_MMX2 0 00631 #define HAVE_AMD3DNOW 1 00632 #define RENAME(a) a ## _3DNow 00633 #include "postprocess_template.c" 00634 #endif 00635 00636 // minor note: the HAVE_xyz is messed up after that line so do not use it. 00637 00638 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 00639 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc) 00640 { 00641 PPContext *c= (PPContext *)vc; 00642 PPMode *ppMode= (PPMode *)vm; 00643 c->ppMode= *ppMode; //FIXME 00644 00645 // Using ifs here as they are faster than function pointers although the 00646 // difference would not be measurable here but it is much better because 00647 // someone might exchange the CPU whithout restarting MPlayer ;) 00648 #if CONFIG_RUNTIME_CPUDETECT 00649 #if ARCH_X86 00650 // ordered per speed fastest first 00651 if(c->cpuCaps & PP_CPU_CAPS_MMX2) 00652 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00653 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) 00654 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00655 else if(c->cpuCaps & PP_CPU_CAPS_MMX) 00656 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00657 else 00658 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00659 #else 00660 #if HAVE_ALTIVEC 00661 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) 00662 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00663 else 00664 #endif 00665 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00666 #endif 00667 #else //CONFIG_RUNTIME_CPUDETECT 00668 #if HAVE_MMX2 00669 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00670 #elif HAVE_AMD3DNOW 00671 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00672 #elif HAVE_MMX 00673 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00674 #elif HAVE_ALTIVEC 00675 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00676 #else 00677 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); 00678 #endif 00679 #endif 00680 } 00681 00682 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 00683 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); 00684 00685 /* -pp Command line Help 00686 */ 00687 #if LIBPOSTPROC_VERSION_INT < (52<<16) 00688 const char *const pp_help= 00689 #else 00690 const char pp_help[] = 00691 #endif 00692 "Available postprocessing filters:\n" 00693 "Filters Options\n" 00694 "short long name short long option Description\n" 00695 "* * a autoq CPU power dependent enabler\n" 00696 " c chrom chrominance filtering enabled\n" 00697 " y nochrom chrominance filtering disabled\n" 00698 " n noluma luma filtering disabled\n" 00699 "hb hdeblock (2 threshold) horizontal deblocking filter\n" 00700 " 1. difference factor: default=32, higher -> more deblocking\n" 00701 " 2. flatness threshold: default=39, lower -> more deblocking\n" 00702 " the h & v deblocking filters share these\n" 00703 " so you can't set different thresholds for h / v\n" 00704 "vb vdeblock (2 threshold) vertical deblocking filter\n" 00705 "ha hadeblock (2 threshold) horizontal deblocking filter\n" 00706 "va vadeblock (2 threshold) vertical deblocking filter\n" 00707 "h1 x1hdeblock experimental h deblock filter 1\n" 00708 "v1 x1vdeblock experimental v deblock filter 1\n" 00709 "dr dering deringing filter\n" 00710 "al autolevels automatic brightness / contrast\n" 00711 " f fullyrange stretch luminance to (0..255)\n" 00712 "lb linblenddeint linear blend deinterlacer\n" 00713 "li linipoldeint linear interpolating deinterlace\n" 00714 "ci cubicipoldeint cubic interpolating deinterlacer\n" 00715 "md mediandeint median deinterlacer\n" 00716 "fd ffmpegdeint ffmpeg deinterlacer\n" 00717 "l5 lowpass5 FIR lowpass deinterlacer\n" 00718 "de default hb:a,vb:a,dr:a\n" 00719 "fa fast h1:a,v1:a,dr:a\n" 00720 "ac ha:a:128:7,va:a,dr:a\n" 00721 "tn tmpnoise (3 threshold) temporal noise reducer\n" 00722 " 1. <= 2. <= 3. larger -> stronger filtering\n" 00723 "fq forceQuant <quantizer> force quantizer\n" 00724 "Usage:\n" 00725 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" 00726 "long form example:\n" 00727 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" 00728 "short form example:\n" 00729 "vb:a/hb:a/lb de,-vb\n" 00730 "more examples:\n" 00731 "tn:64:128:256\n" 00732 "\n" 00733 ; 00734 00735 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality) 00736 { 00737 char temp[GET_MODE_BUFFER_SIZE]; 00738 char *p= temp; 00739 static const char filterDelimiters[] = ",/"; 00740 static const char optionDelimiters[] = ":"; 00741 struct PPMode *ppMode; 00742 char *filterToken; 00743 00744 ppMode= av_malloc(sizeof(PPMode)); 00745 00746 ppMode->lumMode= 0; 00747 ppMode->chromMode= 0; 00748 ppMode->maxTmpNoise[0]= 700; 00749 ppMode->maxTmpNoise[1]= 1500; 00750 ppMode->maxTmpNoise[2]= 3000; 00751 ppMode->maxAllowedY= 234; 00752 ppMode->minAllowedY= 16; 00753 ppMode->baseDcDiff= 256/8; 00754 ppMode->flatnessThreshold= 56-16-1; 00755 ppMode->maxClippedThreshold= 0.01; 00756 ppMode->error=0; 00757 00758 strncpy(temp, name, GET_MODE_BUFFER_SIZE); 00759 00760 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); 00761 00762 for(;;){ 00763 char *filterName; 00764 int q= 1000000; //PP_QUALITY_MAX; 00765 int chrom=-1; 00766 int luma=-1; 00767 char *option; 00768 char *options[OPTIONS_ARRAY_SIZE]; 00769 int i; 00770 int filterNameOk=0; 00771 int numOfUnknownOptions=0; 00772 int enable=1; //does the user want us to enabled or disabled the filter 00773 00774 filterToken= strtok(p, filterDelimiters); 00775 if(filterToken == NULL) break; 00776 p+= strlen(filterToken) + 1; // p points to next filterToken 00777 filterName= strtok(filterToken, optionDelimiters); 00778 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); 00779 00780 if(*filterName == '-'){ 00781 enable=0; 00782 filterName++; 00783 } 00784 00785 for(;;){ //for all options 00786 option= strtok(NULL, optionDelimiters); 00787 if(option == NULL) break; 00788 00789 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option); 00790 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; 00791 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; 00792 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; 00793 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; 00794 else{ 00795 options[numOfUnknownOptions] = option; 00796 numOfUnknownOptions++; 00797 } 00798 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; 00799 } 00800 options[numOfUnknownOptions] = NULL; 00801 00802 /* replace stuff from the replace Table */ 00803 for(i=0; replaceTable[2*i]!=NULL; i++){ 00804 if(!strcmp(replaceTable[2*i], filterName)){ 00805 int newlen= strlen(replaceTable[2*i + 1]); 00806 int plen; 00807 int spaceLeft; 00808 00809 if(p==NULL) p= temp, *p=0; //last filter 00810 else p--, *p=','; //not last filter 00811 00812 plen= strlen(p); 00813 spaceLeft= p - temp + plen; 00814 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){ 00815 ppMode->error++; 00816 break; 00817 } 00818 memmove(p + newlen, p, plen+1); 00819 memcpy(p, replaceTable[2*i + 1], newlen); 00820 filterNameOk=1; 00821 } 00822 } 00823 00824 for(i=0; filters[i].shortName!=NULL; i++){ 00825 if( !strcmp(filters[i].longName, filterName) 00826 || !strcmp(filters[i].shortName, filterName)){ 00827 ppMode->lumMode &= ~filters[i].mask; 00828 ppMode->chromMode &= ~filters[i].mask; 00829 00830 filterNameOk=1; 00831 if(!enable) break; // user wants to disable it 00832 00833 if(q >= filters[i].minLumQuality && luma) 00834 ppMode->lumMode|= filters[i].mask; 00835 if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) 00836 if(q >= filters[i].minChromQuality) 00837 ppMode->chromMode|= filters[i].mask; 00838 00839 if(filters[i].mask == LEVEL_FIX){ 00840 int o; 00841 ppMode->minAllowedY= 16; 00842 ppMode->maxAllowedY= 234; 00843 for(o=0; options[o]!=NULL; o++){ 00844 if( !strcmp(options[o],"fullyrange") 00845 ||!strcmp(options[o],"f")){ 00846 ppMode->minAllowedY= 0; 00847 ppMode->maxAllowedY= 255; 00848 numOfUnknownOptions--; 00849 } 00850 } 00851 } 00852 else if(filters[i].mask == TEMP_NOISE_FILTER) 00853 { 00854 int o; 00855 int numOfNoises=0; 00856 00857 for(o=0; options[o]!=NULL; o++){ 00858 char *tail; 00859 ppMode->maxTmpNoise[numOfNoises]= 00860 strtol(options[o], &tail, 0); 00861 if(tail!=options[o]){ 00862 numOfNoises++; 00863 numOfUnknownOptions--; 00864 if(numOfNoises >= 3) break; 00865 } 00866 } 00867 } 00868 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK 00869 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){ 00870 int o; 00871 00872 for(o=0; options[o]!=NULL && o<2; o++){ 00873 char *tail; 00874 int val= strtol(options[o], &tail, 0); 00875 if(tail==options[o]) break; 00876 00877 numOfUnknownOptions--; 00878 if(o==0) ppMode->baseDcDiff= val; 00879 else ppMode->flatnessThreshold= val; 00880 } 00881 } 00882 else if(filters[i].mask == FORCE_QUANT){ 00883 int o; 00884 ppMode->forcedQuant= 15; 00885 00886 for(o=0; options[o]!=NULL && o<1; o++){ 00887 char *tail; 00888 int val= strtol(options[o], &tail, 0); 00889 if(tail==options[o]) break; 00890 00891 numOfUnknownOptions--; 00892 ppMode->forcedQuant= val; 00893 } 00894 } 00895 } 00896 } 00897 if(!filterNameOk) ppMode->error++; 00898 ppMode->error += numOfUnknownOptions; 00899 } 00900 00901 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); 00902 if(ppMode->error){ 00903 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); 00904 av_free(ppMode); 00905 return NULL; 00906 } 00907 return ppMode; 00908 } 00909 00910 void pp_free_mode(pp_mode *mode){ 00911 av_free(mode); 00912 } 00913 00914 static void reallocAlign(void **p, int alignment, int size){ 00915 av_free(*p); 00916 *p= av_mallocz(size); 00917 } 00918 00919 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ 00920 int mbWidth = (width+15)>>4; 00921 int mbHeight= (height+15)>>4; 00922 int i; 00923 00924 c->stride= stride; 00925 c->qpStride= qpStride; 00926 00927 reallocAlign((void **)&c->tempDst, 8, stride*24); 00928 reallocAlign((void **)&c->tempSrc, 8, stride*24); 00929 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); 00930 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); 00931 for(i=0; i<256; i++) 00932 c->yHistogram[i]= width*height/64*15/256; 00933 00934 for(i=0; i<3; i++){ 00935 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end. 00936 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024); 00937 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size 00938 } 00939 00940 reallocAlign((void **)&c->deintTemp, 8, 2*width+32); 00941 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 00942 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); 00943 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); 00944 } 00945 00946 static const char * context_to_name(void * ptr) { 00947 return "postproc"; 00948 } 00949 00950 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL }; 00951 00952 pp_context *pp_get_context(int width, int height, int cpuCaps){ 00953 PPContext *c= av_malloc(sizeof(PPContext)); 00954 int stride= (width+15)&(~15); //assumed / will realloc if needed 00955 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed 00956 00957 memset(c, 0, sizeof(PPContext)); 00958 c->av_class = &av_codec_context_class; 00959 c->cpuCaps= cpuCaps; 00960 if(cpuCaps&PP_FORMAT){ 00961 c->hChromaSubSample= cpuCaps&0x3; 00962 c->vChromaSubSample= (cpuCaps>>4)&0x3; 00963 }else{ 00964 c->hChromaSubSample= 1; 00965 c->vChromaSubSample= 1; 00966 } 00967 00968 reallocBuffers(c, width, height, stride, qpStride); 00969 00970 c->frameNum=-1; 00971 00972 return c; 00973 } 00974 00975 void pp_free_context(void *vc){ 00976 PPContext *c = (PPContext*)vc; 00977 int i; 00978 00979 for(i=0; i<3; i++) av_free(c->tempBlurred[i]); 00980 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]); 00981 00982 av_free(c->tempBlocks); 00983 av_free(c->yHistogram); 00984 av_free(c->tempDst); 00985 av_free(c->tempSrc); 00986 av_free(c->deintTemp); 00987 av_free(c->stdQPTable); 00988 av_free(c->nonBQPTable); 00989 av_free(c->forcedQPTable); 00990 00991 memset(c, 0, sizeof(PPContext)); 00992 00993 av_free(c); 00994 } 00995 00996 void pp_postprocess(const uint8_t * src[3], const int srcStride[3], 00997 uint8_t * dst[3], const int dstStride[3], 00998 int width, int height, 00999 const QP_STORE_T *QP_store, int QPStride, 01000 pp_mode *vm, void *vc, int pict_type) 01001 { 01002 int mbWidth = (width+15)>>4; 01003 int mbHeight= (height+15)>>4; 01004 PPMode *mode = (PPMode*)vm; 01005 PPContext *c = (PPContext*)vc; 01006 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0])); 01007 int absQPStride = FFABS(QPStride); 01008 01009 // c->stride and c->QPStride are always positive 01010 if(c->stride < minStride || c->qpStride < absQPStride) 01011 reallocBuffers(c, width, height, 01012 FFMAX(minStride, c->stride), 01013 FFMAX(c->qpStride, absQPStride)); 01014 01015 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){ 01016 int i; 01017 QP_store= c->forcedQPTable; 01018 absQPStride = QPStride = 0; 01019 if(mode->lumMode & FORCE_QUANT) 01020 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant; 01021 else 01022 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1; 01023 } 01024 01025 if(pict_type & PP_PICT_TYPE_QP2){ 01026 int i; 01027 const int count= mbHeight * absQPStride; 01028 for(i=0; i<(count>>2); i++){ 01029 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; 01030 } 01031 for(i<<=2; i<count; i++){ 01032 c->stdQPTable[i] = QP_store[i]>>1; 01033 } 01034 QP_store= c->stdQPTable; 01035 QPStride= absQPStride; 01036 } 01037 01038 if(0){ 01039 int x,y; 01040 for(y=0; y<mbHeight; y++){ 01041 for(x=0; x<mbWidth; x++){ 01042 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]); 01043 } 01044 av_log(c, AV_LOG_INFO, "\n"); 01045 } 01046 av_log(c, AV_LOG_INFO, "\n"); 01047 } 01048 01049 if((pict_type&7)!=3){ 01050 if (QPStride >= 0){ 01051 int i; 01052 const int count= mbHeight * QPStride; 01053 for(i=0; i<(count>>2); i++){ 01054 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F; 01055 } 01056 for(i<<=2; i<count; i++){ 01057 c->nonBQPTable[i] = QP_store[i] & 0x3F; 01058 } 01059 } else { 01060 int i,j; 01061 for(i=0; i<mbHeight; i++) { 01062 for(j=0; j<absQPStride; j++) { 01063 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; 01064 } 01065 } 01066 } 01067 } 01068 01069 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n", 01070 mode->lumMode, mode->chromMode); 01071 01072 postProcess(src[0], srcStride[0], dst[0], dstStride[0], 01073 width, height, QP_store, QPStride, 0, mode, c); 01074 01075 width = (width )>>c->hChromaSubSample; 01076 height = (height)>>c->vChromaSubSample; 01077 01078 if(mode->chromMode){ 01079 postProcess(src[1], srcStride[1], dst[1], dstStride[1], 01080 width, height, QP_store, QPStride, 1, mode, c); 01081 postProcess(src[2], srcStride[2], dst[2], dstStride[2], 01082 width, height, QP_store, QPStride, 2, mode, c); 01083 } 01084 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){ 01085 linecpy(dst[1], src[1], height, srcStride[1]); 01086 linecpy(dst[2], src[2], height, srcStride[2]); 01087 }else{ 01088 int y; 01089 for(y=0; y<height; y++){ 01090 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); 01091 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); 01092 } 01093 } 01094 } 01095