FFmpeg: libpostproc/postprocess.c Source File

FFmpeg

[フレーム]

libpostproc

postprocess.c

Go to the documentation of this file.

1 /*

3 *

5 *

6 * This file is part of FFmpeg.

7 *

8 * FFmpeg is free software; you can redistribute it and/or modify

9 * it under the terms of the GNU General Public License as published by

10 * the Free Software Foundation; either version 2 of the License, or

11 * (at your option) any later version.

12 *

13 * FFmpeg is distributed in the hope that it will be useful,

14 * but WITHOUT ANY WARRANTY; without even the implied warranty of

15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

16 * GNU General Public License for more details.

17 *

18 * You should have received a copy of the GNU General Public License

19 * along with FFmpeg; if not, write to the Free Software

20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

21 */

23 /**

24 * @file

25 * postprocessing.

26 */

28 /*

29 C MMX MMX2 3DNow AltiVec

30 isVertDC Ec Ec Ec

31 isVertMinMaxOk Ec Ec Ec

32 doVertLowPass E e e Ec

33 doVertDefFilter Ec Ec e e Ec

34 isHorizDC Ec Ec Ec

35 isHorizMinMaxOk a E Ec

36 doHorizLowPass E e e Ec

37 doHorizDefFilter Ec Ec e e Ec

38 do_a_deblock Ec E Ec E

39 deRing E e e* Ecp

40 Vertical RKAlgo1 E a a

41 Horizontal RKAlgo1 a a

42 Vertical X1# a E E

43 Horizontal X1# a E E

44 LinIpolDeinterlace e E E*

45 CubicIpolDeinterlace a e e*

46 LinBlendDeinterlace e E E*

47 MedianDeinterlace# E Ec Ec

48 TempDeNoiser# E e e Ec

50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work

51 # more or less selfinvented filters so the exactness is not too meaningful

52 E = Exact implementation

53 e = almost exact implementation (slightly different rounding,...)

54 a = alternative / approximate impl

55 c = checked against the other implementations (-vo md5)

56 p = partially optimized, still some work to do

57 */

59 /*

60 TODO:

61 reduce the time wasted on the mem transfer

62 unroll stuff if instructions depend too much on the prior one

63 move YScale thing to the end instead of fixing QP

64 write a faster and higher quality deblocking filter :)

65 make the mainloop more flexible (variable number of blocks at once

66 (the if/else stuff per block is slowing things down)

67 compare the quality & speed of all filters

68 split this huge file

69 optimize c versions

70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks

71 ...

72 */

74 //Changelog: use git log

76 #include "config.h"

77 #include "libavutil/avutil.h"

78 #include "libavutil/avassert.h"

79 #include <inttypes.h>

80 #include <stdio.h>

81 #include <stdlib.h>

82 #include <string.h>

83 //#undef HAVE_MMXEXT_INLINE

84 //#define HAVE_AMD3DNOW_INLINE

85 //#undef HAVE_MMX_INLINE

86 //#undef ARCH_X86

87 //#define DEBUG_BRIGHTNESS

88 #include "postprocess.h"

89 #include "postprocess_internal.h"

90 #include "libavutil/avstring.h"

92 unsigned postproc_version(void)

93 {

94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);

95 return LIBPOSTPROC_VERSION_INT;

96 }

98 const char *postproc_configuration(void)

99 {

100 return FFMPEG_CONFIGURATION;

101 }

102

103 const char *postproc_license(void)

104 {

105 #define LICENSE_PREFIX "libpostproc license: "

106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;

107 }

108

109 #if HAVE_ALTIVEC_H

110 #include <altivec.h>

111 #endif

112

113 #define GET_MODE_BUFFER_SIZE 500

114 #define OPTIONS_ARRAY_SIZE 10

115 #define BLOCK_SIZE 8

116 #define TEMP_STRIDE 8

117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet

118

119 #if ARCH_X86 && HAVE_INLINE_ASM

120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;

121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;

122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;

123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;

124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;

125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;

126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;

127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;

128 #endif

129

130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;

131

132

133 static const struct PPFilter filters[]=

134 {

135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},

136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},

137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},

138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/

139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},

140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},

141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},

142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},

143 {"dr", "dering", 1, 5, 6, DERING},

144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},

145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},

146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},

147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},

148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},

149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},

150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},

151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},

152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},

153 {"be", "bitexact", 1, 0, 0, BITEXACT},

154 {NULL, NULL,0,0,0,0} //End Marker

155 };

156

157 static const char *replaceTable[]=

158 {

159 "default", "hb:a,vb:a,dr:a",

160 "de", "hb:a,vb:a,dr:a",

161 "fast", "h1:a,v1:a,dr:a",

162 "fa", "h1:a,v1:a,dr:a",

163 "ac", "ha:a:128:7,va:a,dr:a",

164 NULL //End Marker

165 };

166

167

168 #if ARCH_X86 && HAVE_INLINE_ASM

169 static inline void prefetchnta(void *p)

170 {

171 __asm__ volatile( "prefetchnta (%0)\n\t"

172 : : "r" (p)

173 );

174 }

175

176 static inline void prefetcht0(void *p)

177 {

178 __asm__ volatile( "prefetcht0 (%0)\n\t"

179 : : "r" (p)

180 );

181 }

182

183 static inline void prefetcht1(void *p)

184 {

185 __asm__ volatile( "prefetcht1 (%0)\n\t"

186 : : "r" (p)

187 );

188 }

189

190 static inline void prefetcht2(void *p)

191 {

192 __asm__ volatile( "prefetcht2 (%0)\n\t"

193 : : "r" (p)

194 );

195 }

196 #endif

197

198 /* The horizontal functions exist only in C because the MMX

199 * code is faster with vertical filters and transposing. */

200

201 /**

202 * Check if the given 8x8 Block is mostly "flat"

203 */

204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)

205 {

206 int numEq= 0;

207 int y;

208 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;

209 const int dcThreshold= dcOffset*2 + 1;

210

211 for(y=0; y<BLOCK_SIZE; y++){

212 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;

213 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;

214 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;

215 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;

216 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;

217 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;

218 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;

219 src+= stride;

220 }

221 return numEq > c->ppMode.flatnessThreshold;

222 }

223

224 /**

225 * Check if the middle 8x8 Block in the given 8x16 block is flat

226 */

227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)

228 {

229 int numEq= 0;

230 int y;

231 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;

232 const int dcThreshold= dcOffset*2 + 1;

233

234 src+= stride*4; // src points to begin of the 8x8 Block

235 for(y=0; y<BLOCK_SIZE-1; y++){

236 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;

237 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;

238 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;

239 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;

240 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;

241 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;

242 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;

243 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;

244 src+= stride;

245 }

246 return numEq > c->ppMode.flatnessThreshold;

247 }

248

249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)

250 {

251 int i;

252 for(i=0; i<2; i++){

253 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;

254 src += stride;

255 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;

256 src += stride;

257 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;

258 src += stride;

259 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;

260 src += stride;

261 }

262 return 1;

263 }

264

265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)

266 {

267 int x;

268 src+= stride*4;

269 for(x=0; x<BLOCK_SIZE; x+=4){

270 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;

271 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;

272 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;

273 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;

274 }

275 return 1;

276 }

277

278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)

279 {

280 if( isHorizDC_C(src, stride, c) ){

281 if( isHorizMinMaxOk_C(src, stride, c->QP) )

282 return 1;

283 else

284 return 0;

285 }else{

286 return 2;

287 }

288 }

289

290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)

291 {

292 if( isVertDC_C(src, stride, c) ){

293 if( isVertMinMaxOk_C(src, stride, c->QP) )

294 return 1;

295 else

296 return 0;

297 }else{

298 return 2;

299 }

300 }

301

302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)

303 {

304 int y;

305 for(y=0; y<BLOCK_SIZE; y++){

306 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

307

308 if(FFABS(middleEnergy) < 8*c->QP){

309 const int q=(dst[3] - dst[4])/2;

310 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);

311 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

312

313 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );

314 d= FFMAX(d, 0);

315

316 d= (5*d + 32) >> 6;

317 d*= FFSIGN(-middleEnergy);

318

319 if(q>0)

320 {

321 d= d<0 ? 0 : d;

322 d= d>q ? q : d;

323 }

324 else

325 {

326 d= d>0 ? 0 : d;

327 d= d<q ? q : d;

328 }

329

330 dst[3]-= d;

331 dst[4]+= d;

332 }

333 dst+= stride;

334 }

335 }

336

337 /**

338 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)

339 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)

340 */

341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)

342 {

343 int y;

344 for(y=0; y<BLOCK_SIZE; y++){

345 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];

346 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];

347

348 int sums[10];

349 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;

350 sums[1] = sums[0] - first + dst[3];

351 sums[2] = sums[1] - first + dst[4];

352 sums[3] = sums[2] - first + dst[5];

353 sums[4] = sums[3] - first + dst[6];

354 sums[5] = sums[4] - dst[0] + dst[7];

355 sums[6] = sums[5] - dst[1] + last;

356 sums[7] = sums[6] - dst[2] + last;

357 sums[8] = sums[7] - dst[3] + last;

358 sums[9] = sums[8] - dst[4] + last;

359

360 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;

361 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;

362 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;

363 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;

364 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;

365 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;

366 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;

367 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;

368

369 dst+= stride;

370 }

371 }

372

373 /**

374 * Experimental Filter 1 (Horizontal)

375 * will not damage linear gradients

376 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter

377 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)

378 * MMX2 version does correct clipping C version does not

379 * not identical with the vertical one

380 */

381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)

382 {

383 int y;

384 static uint64_t lut[256];

385 if(!lut[255])

386 {

387 int i;

388 for(i=0; i<256; i++)

389 {

390 int v= i < 128 ? 2*i : 2*(i-256);

391 /*

392 //Simulate 112242211 9-Tap filter

393 uint64_t a= (v/16) & 0xFF;

394 uint64_t b= (v/8) & 0xFF;

395 uint64_t c= (v/4) & 0xFF;

396 uint64_t d= (3*v/8) & 0xFF;

397 */

398 //Simulate piecewise linear interpolation

399 uint64_t a= (v/16) & 0xFF;

400 uint64_t b= (v*3/16) & 0xFF;

401 uint64_t c= (v*5/16) & 0xFF;

402 uint64_t d= (7*v/16) & 0xFF;

403 uint64_t A= (0x100 - a)&0xFF;

404 uint64_t B= (0x100 - b)&0xFF;

405 uint64_t C= (0x100 - c)&0xFF;

406 uint64_t D= (0x100 - c)&0xFF;

407

408 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |

409 (D<<24) | (C<<16) | (B<<8) | (A);

410 //lut[i] = (v<<32) | (v<<24);

411 }

412 }

413

414 for(y=0; y<BLOCK_SIZE; y++){

415 int a= src[1] - src[2];

416 int b= src[3] - src[4];

417 int c= src[5] - src[6];

418

419 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);

420

421 if(d < QP){

422 int v = d * FFSIGN(-b);

423

424 src[1] +=v/8;

425 src[2] +=v/4;

426 src[3] +=3*v/8;

427 src[4] -=3*v/8;

428 src[5] -=v/4;

429 src[6] -=v/8;

430 }

431 src+=stride;

432 }

433 }

434

435 /**

436 * accurate deblock filter

437 */

438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,

439 int stride, const PPContext *c)

440 {

441 int y;

442 const int QP= c->QP;

443 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;

444 const int dcThreshold= dcOffset*2 + 1;

445 //START_TIMER

446 src+= step*4; // src points to begin of the 8x8 Block

447 for(y=0; y<8; y++){

448 int numEq= 0;

449

450 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;

451 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;

452 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;

453 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;

454 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;

455 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;

456 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;

457 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;

458 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;

459 if(numEq > c->ppMode.flatnessThreshold){

460 int min, max, x;

461

462 if(src[0] > src[step]){

463 max= src[0];

464 min= src[step];

465 }else{

466 max= src[step];

467 min= src[0];

468 }

469 for(x=2; x<8; x+=2){

470 if(src[x*step] > src[(x+1)*step]){

471 if(src[x *step] > max) max= src[ x *step];

472 if(src[(x+1)*step] < min) min= src[(x+1)*step];

473 }else{

474 if(src[(x+1)*step] > max) max= src[(x+1)*step];

475 if(src[ x *step] < min) min= src[ x *step];

476 }

477 }

478 if(max-min < 2*QP){

479 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];

480 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];

481

482 int sums[10];

483 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;

484 sums[1] = sums[0] - first + src[3*step];

485 sums[2] = sums[1] - first + src[4*step];

486 sums[3] = sums[2] - first + src[5*step];

487 sums[4] = sums[3] - first + src[6*step];

488 sums[5] = sums[4] - src[0*step] + src[7*step];

489 sums[6] = sums[5] - src[1*step] + last;

490 sums[7] = sums[6] - src[2*step] + last;

491 sums[8] = sums[7] - src[3*step] + last;

492 sums[9] = sums[8] - src[4*step] + last;

493

494 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;

495 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;

496 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;

497 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;

498 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;

499 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;

500 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;

501 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;

502 }

503 }else{

504 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);

505

506 if(FFABS(middleEnergy) < 8*QP){

507 const int q=(src[3*step] - src[4*step])/2;

508 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);

509 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);

510

511 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );

512 d= FFMAX(d, 0);

513

514 d= (5*d + 32) >> 6;

515 d*= FFSIGN(-middleEnergy);

516

517 if(q>0){

518 d= d<0 ? 0 : d;

519 d= d>q ? q : d;

520 }else{

521 d= d>0 ? 0 : d;

522 d= d<q ? q : d;

523 }

524

525 src[3*step]-= d;

526 src[4*step]+= d;

527 }

528 }

529

530 src += stride;

531 }

532 /*if(step==16){

533 STOP_TIMER("step16")

534 }else{

535 STOP_TIMER("stepX")

536 }*/

537 }

538

539 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one

540 //Plain C versions

541 //we always compile C for testing which needs bitexactness

542 #define TEMPLATE_PP_C 1

543 #include "postprocess_template.c"

544

545 #if HAVE_ALTIVEC

546 # define TEMPLATE_PP_ALTIVEC 1

547 # include "postprocess_altivec_template.c"

548 # include "postprocess_template.c"

549 #endif

550

551 #if ARCH_X86 && HAVE_INLINE_ASM

552 # if CONFIG_RUNTIME_CPUDETECT

553 # define TEMPLATE_PP_MMX 1

554 # include "postprocess_template.c"

555 # define TEMPLATE_PP_MMXEXT 1

556 # include "postprocess_template.c"

557 # define TEMPLATE_PP_3DNOW 1

558 # include "postprocess_template.c"

559 # define TEMPLATE_PP_SSE2 1

560 # include "postprocess_template.c"

561 # else

562 # if HAVE_SSE2_INLINE

563 # define TEMPLATE_PP_SSE2 1

564 # include "postprocess_template.c"

565 # elif HAVE_MMXEXT_INLINE

566 # define TEMPLATE_PP_MMXEXT 1

567 # include "postprocess_template.c"

568 # elif HAVE_AMD3DNOW_INLINE

569 # define TEMPLATE_PP_3DNOW 1

570 # include "postprocess_template.c"

571 # elif HAVE_MMX_INLINE

572 # define TEMPLATE_PP_MMX 1

573 # include "postprocess_template.c"

574 # endif

575 # endif

576 #endif

577

578 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,

579 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);

580

581 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,

582 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)

583 {

584 pp_fn pp = postProcess_C;

585 PPContext *c= (PPContext *)vc;

586 PPMode *ppMode= (PPMode *)vm;

587 c->ppMode= *ppMode; //FIXME

588

589 if (!(ppMode->lumMode & BITEXACT)) {

590 #if CONFIG_RUNTIME_CPUDETECT

591 #if ARCH_X86 && HAVE_INLINE_ASM

592 // ordered per speed fastest first

593 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;

594 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;

595 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;

596 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;

597 #elif HAVE_ALTIVEC

598 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;

599 #endif

600 #else /* CONFIG_RUNTIME_CPUDETECT */

601 #if HAVE_SSE2_INLINE

602 pp = postProcess_SSE2;

603 #elif HAVE_MMXEXT_INLINE

604 pp = postProcess_MMX2;

605 #elif HAVE_AMD3DNOW_INLINE

606 pp = postProcess_3DNow;

607 #elif HAVE_MMX_INLINE

608 pp = postProcess_MMX;

609 #elif HAVE_ALTIVEC

610 pp = postProcess_altivec;

611 #endif

612 #endif /* !CONFIG_RUNTIME_CPUDETECT */

613 }

614

615 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);

616 }

617

618 /* -pp Command line Help

619 */

620 const char pp_help[] =

621 "Available postprocessing filters:\n"

622 "Filters Options\n"

623 "short long name short long option Description\n"

624 "* * a autoq CPU power dependent enabler\n"

625 " c chrom chrominance filtering enabled\n"

626 " y nochrom chrominance filtering disabled\n"

627 " n noluma luma filtering disabled\n"

628 "hb hdeblock (2 threshold) horizontal deblocking filter\n"

629 " 1. difference factor: default=32, higher -> more deblocking\n"

630 " 2. flatness threshold: default=39, lower -> more deblocking\n"

631 " the h & v deblocking filters share these\n"

632 " so you can't set different thresholds for h / v\n"

633 "vb vdeblock (2 threshold) vertical deblocking filter\n"

634 "ha hadeblock (2 threshold) horizontal deblocking filter\n"

635 "va vadeblock (2 threshold) vertical deblocking filter\n"

636 "h1 x1hdeblock experimental h deblock filter 1\n"

637 "v1 x1vdeblock experimental v deblock filter 1\n"

638 "dr dering deringing filter\n"

639 "al autolevels automatic brightness / contrast\n"

640 " f fullyrange stretch luminance to (0..255)\n"

641 "lb linblenddeint linear blend deinterlacer\n"

642 "li linipoldeint linear interpolating deinterlace\n"

643 "ci cubicipoldeint cubic interpolating deinterlacer\n"

644 "md mediandeint median deinterlacer\n"

645 "fd ffmpegdeint ffmpeg deinterlacer\n"

646 "l5 lowpass5 FIR lowpass deinterlacer\n"

647 "de default hb:a,vb:a,dr:a\n"

648 "fa fast h1:a,v1:a,dr:a\n"

649 "ac ha:a:128:7,va:a,dr:a\n"

650 "tn tmpnoise (3 threshold) temporal noise reducer\n"

651 " 1. <= 2. <= 3. larger -> stronger filtering\n"

652 "fq forceQuant <quantizer> force quantizer\n"

653 "Usage:\n"

654 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"

655 "long form example:\n"

656 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"

657 "short form example:\n"

658 "vb:a/hb:a/lb de,-vb\n"

659 "more examples:\n"

660 "tn:64:128:256\n"

661 "\n"

662 ;

663

664 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)

665 {

666 char temp[GET_MODE_BUFFER_SIZE];

667 char *p= temp;

668 static const char filterDelimiters[] = ",/";

669 static const char optionDelimiters[] = ":";

670 struct PPMode *ppMode;

671 char *filterToken;

672

673 if (!name) {

674 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");

675 return NULL;

676 }

677

678 if (!strcmp(name, "help")) {

679 const char *p;

680 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {

681 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));

682 av_log(NULL, AV_LOG_INFO, "%s", temp);

683 }

684 return NULL;

685 }

686

687 ppMode= av_malloc(sizeof(PPMode));

688

689 ppMode->lumMode= 0;

690 ppMode->chromMode= 0;

691 ppMode->maxTmpNoise[0]= 700;

692 ppMode->maxTmpNoise[1]= 1500;

693 ppMode->maxTmpNoise[2]= 3000;

694 ppMode->maxAllowedY= 234;

695 ppMode->minAllowedY= 16;

696 ppMode->baseDcDiff= 256/8;

697 ppMode->flatnessThreshold= 56-16-1;

698 ppMode->maxClippedThreshold= 0.01;

699 ppMode->error=0;

700

701 memset(temp, 0, GET_MODE_BUFFER_SIZE);

702 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);

703

704 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);

705

706 for(;;){

707 char *filterName;

708 int q= 1000000; //PP_QUALITY_MAX;

709 int chrom=-1;

710 int luma=-1;

711 char *option;

712 char *options[OPTIONS_ARRAY_SIZE];

713 int i;

714 int filterNameOk=0;

715 int numOfUnknownOptions=0;

716 int enable=1; //does the user want us to enabled or disabled the filter

717

718 filterToken= strtok(p, filterDelimiters);

719 if(filterToken == NULL) break;

720 p+= strlen(filterToken) + 1; // p points to next filterToken

721 filterName= strtok(filterToken, optionDelimiters);

722 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);

723

724 if(*filterName == '-'){

725 enable=0;

726 filterName++;

727 }

728

729 for(;;){ //for all options

730 option= strtok(NULL, optionDelimiters);

731 if(option == NULL) break;

732

733 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);

734 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;

735 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;

736 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;

737 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;

738 else{

739 options[numOfUnknownOptions] = option;

740 numOfUnknownOptions++;

741 }

742 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;

743 }

744 options[numOfUnknownOptions] = NULL;

745

746 /* replace stuff from the replace Table */

747 for(i=0; replaceTable[2*i]!=NULL; i++){

748 if(!strcmp(replaceTable[2*i], filterName)){

749 int newlen= strlen(replaceTable[2*i + 1]);

750 int plen;

751 int spaceLeft;

752

753 p--, *p=',';

754

755 plen= strlen(p);

756 spaceLeft= p - temp + plen;

757 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){

758 ppMode->error++;

759 break;

760 }

761 memmove(p + newlen, p, plen+1);

762 memcpy(p, replaceTable[2*i + 1], newlen);

763 filterNameOk=1;

764 }

765 }

766

767 for(i=0; filters[i].shortName!=NULL; i++){

768 if( !strcmp(filters[i].longName, filterName)

769 || !strcmp(filters[i].shortName, filterName)){

770 ppMode->lumMode &= ~filters[i].mask;

771 ppMode->chromMode &= ~filters[i].mask;

772

773 filterNameOk=1;

774 if(!enable) break; // user wants to disable it

775

776 if(q >= filters[i].minLumQuality && luma)

777 ppMode->lumMode|= filters[i].mask;

778 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))

779 if(q >= filters[i].minChromQuality)

780 ppMode->chromMode|= filters[i].mask;

781

782 if(filters[i].mask == LEVEL_FIX){

783 int o;

784 ppMode->minAllowedY= 16;

785 ppMode->maxAllowedY= 234;

786 for(o=0; options[o]!=NULL; o++){

787 if( !strcmp(options[o],"fullyrange")

788 ||!strcmp(options[o],"f")){

789 ppMode->minAllowedY= 0;

790 ppMode->maxAllowedY= 255;

791 numOfUnknownOptions--;

792 }

793 }

794 }

795 else if(filters[i].mask == TEMP_NOISE_FILTER)

796 {

797 int o;

798 int numOfNoises=0;

799

800 for(o=0; options[o]!=NULL; o++){

801 char *tail;

802 ppMode->maxTmpNoise[numOfNoises]=

803 strtol(options[o], &tail, 0);

804 if(tail!=options[o]){

805 numOfNoises++;

806 numOfUnknownOptions--;

807 if(numOfNoises >= 3) break;

808 }

809 }

810 }

811 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK

812 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){

813 int o;

814

815 for(o=0; options[o]!=NULL && o<2; o++){

816 char *tail;

817 int val= strtol(options[o], &tail, 0);

818 if(tail==options[o]) break;

819

820 numOfUnknownOptions--;

821 if(o==0) ppMode->baseDcDiff= val;

822 else ppMode->flatnessThreshold= val;

823 }

824 }

825 else if(filters[i].mask == FORCE_QUANT){

826 int o;

827 ppMode->forcedQuant= 15;

828

829 for(o=0; options[o]!=NULL && o<1; o++){

830 char *tail;

831 int val= strtol(options[o], &tail, 0);

832 if(tail==options[o]) break;

833

834 numOfUnknownOptions--;

835 ppMode->forcedQuant= val;

836 }

837 }

838 }

839 }

840 if(!filterNameOk) ppMode->error++;

841 ppMode->error += numOfUnknownOptions;

842 }

843

844 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);

845 if(ppMode->error){

846 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);

847 av_free(ppMode);

848 return NULL;

849 }

850 return ppMode;

851 }

852

853 void pp_free_mode(pp_mode *mode){

854 av_free(mode);

855 }

856

857 static void reallocAlign(void **p, int alignment, int size){

858 av_free(*p);

859 *p= av_mallocz(size);

860 }

861

862 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){

863 int mbWidth = (width+15)>>4;

864 int mbHeight= (height+15)>>4;

865 int i;

866

867 c->stride= stride;

868 c->qpStride= qpStride;

869

870 reallocAlign((void **)&c->tempDst, 8, stride*24+32);

871 reallocAlign((void **)&c->tempSrc, 8, stride*24);

872 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);

873 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));

874 for(i=0; i<256; i++)

875 c->yHistogram[i]= width*height/64*15/256;

876

877 for(i=0; i<3; i++){

878 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.

879 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);

880 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size

881 }

882

883 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);

884 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));

885 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));

886 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));

887 }

888

889 static const char * context_to_name(void * ptr) {

890 return "postproc";

891 }

892

893 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };

894

895 pp_context *pp_get_context(int width, int height, int cpuCaps){

896 PPContext *c= av_malloc(sizeof(PPContext));

897 int stride= FFALIGN(width, 16); //assumed / will realloc if needed

898 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed

899

900 memset(c, 0, sizeof(PPContext));

901 c->av_class = &av_codec_context_class;

902 if(cpuCaps&PP_FORMAT){

903 c->hChromaSubSample= cpuCaps&0x3;

904 c->vChromaSubSample= (cpuCaps>>4)&0x3;

905 }else{

906 c->hChromaSubSample= 1;

907 c->vChromaSubSample= 1;

908 }

909 if (cpuCaps & PP_CPU_CAPS_AUTO) {

910 c->cpuCaps = av_get_cpu_flags();

911 } else {

912 c->cpuCaps = 0;

913 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;

914 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;

915 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;

916 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;

917 }

918

919 reallocBuffers(c, width, height, stride, qpStride);

920

921 c->frameNum=-1;

922

923 return c;

924 }

925

926 void pp_free_context(void *vc){

927 PPContext *c = (PPContext*)vc;

928 int i;

929

930 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);

931 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);

932

933 av_free(c->tempBlocks);

934 av_free(c->yHistogram);

935 av_free(c->tempDst);

936 av_free(c->tempSrc);

937 av_free(c->deintTemp);

938 av_free(c->stdQPTable);

939 av_free(c->nonBQPTable);

940 av_free(c->forcedQPTable);

941

942 memset(c, 0, sizeof(PPContext));

943

944 av_free(c);

945 }

946

947 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],

948 uint8_t * dst[3], const int dstStride[3],

949 int width, int height,

950 const QP_STORE_T *QP_store, int QPStride,

951 pp_mode *vm, void *vc, int pict_type)

952 {

953 int mbWidth = (width+15)>>4;

954 int mbHeight= (height+15)>>4;

955 PPMode *mode = (PPMode*)vm;

956 PPContext *c = (PPContext*)vc;

957 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));

958 int absQPStride = FFABS(QPStride);

959

960 // c->stride and c->QPStride are always positive

961 if(c->stride < minStride || c->qpStride < absQPStride)

962 reallocBuffers(c, width, height,

963 FFMAX(minStride, c->stride),

964 FFMAX(c->qpStride, absQPStride));

965

966 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){

967 int i;

968 QP_store= c->forcedQPTable;

969 absQPStride = QPStride = 0;

970 if(mode->lumMode & FORCE_QUANT)

971 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;

972 else

973 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;

974 }

975

976 if(pict_type & PP_PICT_TYPE_QP2){

977 int i;

978 const int count= mbHeight * absQPStride;

979 for(i=0; i<(count>>2); i++){

980 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;

981 }

982 for(i<<=2; i<count; i++){

983 c->stdQPTable[i] = QP_store[i]>>1;

984 }

985 QP_store= c->stdQPTable;

986 QPStride= absQPStride;

987 }

988

989 if(0){

990 int x,y;

991 for(y=0; y<mbHeight; y++){

992 for(x=0; x<mbWidth; x++){

993 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);

994 }

995 av_log(c, AV_LOG_INFO, "\n");

996 }

997 av_log(c, AV_LOG_INFO, "\n");

998 }

999

1000 if((pict_type&7)!=3){

1001 if (QPStride >= 0){

1002 int i;

1003 const int count= mbHeight * QPStride;

1004 for(i=0; i<(count>>2); i++){

1005 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;

1006 }

1007 for(i<<=2; i<count; i++){

1008 c->nonBQPTable[i] = QP_store[i] & 0x3F;

1009 }

1010 } else {

1011 int i,j;

1012 for(i=0; i<mbHeight; i++) {

1013 for(j=0; j<absQPStride; j++) {

1014 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;

1015 }

1016 }

1017 }

1018 }

1019

1020 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",

1021 mode->lumMode, mode->chromMode);

1022

1023 postProcess(src[0], srcStride[0], dst[0], dstStride[0],

1024 width, height, QP_store, QPStride, 0, mode, c);

1025

1026 width = (width )>>c->hChromaSubSample;

1027 height = (height)>>c->vChromaSubSample;

1028

1029 if(mode->chromMode){

1030 postProcess(src[1], srcStride[1], dst[1], dstStride[1],

1031 width, height, QP_store, QPStride, 1, mode, c);

1032 postProcess(src[2], srcStride[2], dst[2], dstStride[2],

1033 width, height, QP_store, QPStride, 2, mode, c);

1034 }

1035 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){

1036 linecpy(dst[1], src[1], height, srcStride[1]);

1037 linecpy(dst[2], src[2], height, srcStride[2]);

1038 }else{

1039 int y;

1040 for(y=0; y<height; y++){

1041 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);

1042 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);

1043 }

1044 }

1045 }

Generated on Sat May 25 2013 04:01:21 for FFmpeg by doxygen 1.8.2