FFmpeg: libavcodec/h264pred_template.c Source File

FFmpeg

[フレーム]

libavcodec

h264pred_template.c

Go to the documentation of this file.

1 /*

2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder

4 *

5 * This file is part of FFmpeg.

6 *

7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.

11 *

12 * FFmpeg is distributed in the hope that it will be useful,

13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 * Lesser General Public License for more details.

16 *

17 * You should have received a copy of the GNU Lesser General Public

18 * License along with FFmpeg; if not, write to the Free Software

19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

20 */

22 /**

23 * @file

24 * H.264 / AVC / MPEG4 part10 prediction functions.

25 * @author Michael Niedermayer <michaelni@gmx.at>

26 */

28 #include "mathops.h"

30 #include "bit_depth_template.c"

32 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,

33 ptrdiff_t _stride)

34 {

35 pixel *src = (pixel*)_src;

36 int stride = _stride>>(sizeof(pixel)-1);

37 const pixel4 a= AV_RN4PA(src-stride);

39 AV_WN4PA(src+0*stride, a);

40 AV_WN4PA(src+1*stride, a);

41 AV_WN4PA(src+2*stride, a);

42 AV_WN4PA(src+3*stride, a);

43 }

45 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,

46 ptrdiff_t _stride)

47 {

48 pixel *src = (pixel*)_src;

49 int stride = _stride>>(sizeof(pixel)-1);

50 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));

51 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));

52 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));

53 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));

54 }

56 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,

57 ptrdiff_t _stride)

58 {

59 pixel *src = (pixel*)_src;

60 int stride = _stride>>(sizeof(pixel)-1);

61 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]

62 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;

63 const pixel4 a = PIXEL_SPLAT_X4(dc);

65 AV_WN4PA(src+0*stride, a);

66 AV_WN4PA(src+1*stride, a);

67 AV_WN4PA(src+2*stride, a);

68 AV_WN4PA(src+3*stride, a);

69 }

71 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,

72 ptrdiff_t _stride)

73 {

74 pixel *src = (pixel*)_src;

75 int stride = _stride>>(sizeof(pixel)-1);

76 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;

77 const pixel4 a = PIXEL_SPLAT_X4(dc);

79 AV_WN4PA(src+0*stride, a);

80 AV_WN4PA(src+1*stride, a);

81 AV_WN4PA(src+2*stride, a);

82 AV_WN4PA(src+3*stride, a);

83 }

85 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,

86 ptrdiff_t _stride)

87 {

88 pixel *src = (pixel*)_src;

89 int stride = _stride>>(sizeof(pixel)-1);

90 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;

91 const pixel4 a = PIXEL_SPLAT_X4(dc);

93 AV_WN4PA(src+0*stride, a);

94 AV_WN4PA(src+1*stride, a);

95 AV_WN4PA(src+2*stride, a);

96 AV_WN4PA(src+3*stride, a);

97 }

99 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,

100 ptrdiff_t _stride)

101 {

102 pixel *src = (pixel*)_src;

103 int stride = _stride>>(sizeof(pixel)-1);

104 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));

105

106 AV_WN4PA(src+0*stride, a);

107 AV_WN4PA(src+1*stride, a);

108 AV_WN4PA(src+2*stride, a);

109 AV_WN4PA(src+3*stride, a);

110 }

111

112 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,

113 ptrdiff_t _stride)

114 {

115 pixel *src = (pixel*)_src;

116 int stride = _stride>>(sizeof(pixel)-1);

117 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);

118

119 AV_WN4PA(src+0*stride, a);

120 AV_WN4PA(src+1*stride, a);

121 AV_WN4PA(src+2*stride, a);

122 AV_WN4PA(src+3*stride, a);

123 }

124

125 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,

126 ptrdiff_t _stride)

127 {

128 pixel *src = (pixel*)_src;

129 int stride = _stride>>(sizeof(pixel)-1);

130 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);

131

132 AV_WN4PA(src+0*stride, a);

133 AV_WN4PA(src+1*stride, a);

134 AV_WN4PA(src+2*stride, a);

135 AV_WN4PA(src+3*stride, a);

136 }

137

138

139 #define LOAD_TOP_RIGHT_EDGE\

140 const unsigned av_unused t4 = topright[0];\

141 const unsigned av_unused t5 = topright[1];\

142 const unsigned av_unused t6 = topright[2];\

143 const unsigned av_unused t7 = topright[3];\

144

145 #define LOAD_DOWN_LEFT_EDGE\

146 const unsigned av_unused l4 = src[-1+4*stride];\

147 const unsigned av_unused l5 = src[-1+5*stride];\

148 const unsigned av_unused l6 = src[-1+6*stride];\

149 const unsigned av_unused l7 = src[-1+7*stride];\

150

151 #define LOAD_LEFT_EDGE\

152 const unsigned av_unused l0 = src[-1+0*stride];\

153 const unsigned av_unused l1 = src[-1+1*stride];\

154 const unsigned av_unused l2 = src[-1+2*stride];\

155 const unsigned av_unused l3 = src[-1+3*stride];\

156

157 #define LOAD_TOP_EDGE\

158 const unsigned av_unused t0 = src[ 0-1*stride];\

159 const unsigned av_unused t1 = src[ 1-1*stride];\

160 const unsigned av_unused t2 = src[ 2-1*stride];\

161 const unsigned av_unused t3 = src[ 3-1*stride];\

162

163 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,

164 ptrdiff_t _stride)

165 {

166 pixel *src = (pixel*)_src;

167 int stride = _stride>>(sizeof(pixel)-1);

168 const int lt= src[-1-1*stride];

169 LOAD_TOP_EDGE

170 LOAD_LEFT_EDGE

171

172 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;

173 src[0+2*stride]=

174 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;

175 src[0+1*stride]=

176 src[1+2*stride]=

177 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;

178 src[0+0*stride]=

179 src[1+1*stride]=

180 src[2+2*stride]=

181 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;

182 src[1+0*stride]=

183 src[2+1*stride]=

184 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;

185 src[2+0*stride]=

186 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;

187 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;

188 }

189

190 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,

191 ptrdiff_t _stride)

192 {

193 pixel *src = (pixel*)_src;

194 const pixel *topright = (const pixel*)_topright;

195 int stride = _stride>>(sizeof(pixel)-1);

196 LOAD_TOP_EDGE

197 LOAD_TOP_RIGHT_EDGE

198 // LOAD_LEFT_EDGE

199

200 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;

201 src[1+0*stride]=

202 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;

203 src[2+0*stride]=

204 src[1+1*stride]=

205 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;

206 src[3+0*stride]=

207 src[2+1*stride]=

208 src[1+2*stride]=

209 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;

210 src[3+1*stride]=

211 src[2+2*stride]=

212 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;

213 src[3+2*stride]=

214 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;

215 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;

216 }

217

218 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,

219 const uint8_t *topright,

220 ptrdiff_t _stride)

221 {

222 pixel *src = (pixel*)_src;

223 int stride = _stride>>(sizeof(pixel)-1);

224 const int lt= src[-1-1*stride];

225 LOAD_TOP_EDGE

226 LOAD_LEFT_EDGE

227

228 src[0+0*stride]=

229 src[1+2*stride]=(lt + t0 + 1)>>1;

230 src[1+0*stride]=

231 src[2+2*stride]=(t0 + t1 + 1)>>1;

232 src[2+0*stride]=

233 src[3+2*stride]=(t1 + t2 + 1)>>1;

234 src[3+0*stride]=(t2 + t3 + 1)>>1;

235 src[0+1*stride]=

236 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;

237 src[1+1*stride]=

238 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;

239 src[2+1*stride]=

240 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;

241 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;

242 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;

243 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;

244 }

245

246 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,

247 const uint8_t *_topright,

248 ptrdiff_t _stride)

249 {

250 pixel *src = (pixel*)_src;

251 const pixel *topright = (const pixel*)_topright;

252 int stride = _stride>>(sizeof(pixel)-1);

253 LOAD_TOP_EDGE

254 LOAD_TOP_RIGHT_EDGE

255

256 src[0+0*stride]=(t0 + t1 + 1)>>1;

257 src[1+0*stride]=

258 src[0+2*stride]=(t1 + t2 + 1)>>1;

259 src[2+0*stride]=

260 src[1+2*stride]=(t2 + t3 + 1)>>1;

261 src[3+0*stride]=

262 src[2+2*stride]=(t3 + t4+ 1)>>1;

263 src[3+2*stride]=(t4 + t5+ 1)>>1;

264 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;

265 src[1+1*stride]=

266 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;

267 src[2+1*stride]=

268 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;

269 src[3+1*stride]=

270 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;

271 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;

272 }

273

274 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,

275 ptrdiff_t _stride)

276 {

277 pixel *src = (pixel*)_src;

278 int stride = _stride>>(sizeof(pixel)-1);

279 LOAD_LEFT_EDGE

280

281 src[0+0*stride]=(l0 + l1 + 1)>>1;

282 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;

283 src[2+0*stride]=

284 src[0+1*stride]=(l1 + l2 + 1)>>1;

285 src[3+0*stride]=

286 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;

287 src[2+1*stride]=

288 src[0+2*stride]=(l2 + l3 + 1)>>1;

289 src[3+1*stride]=

290 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;

291 src[3+2*stride]=

292 src[1+3*stride]=

293 src[0+3*stride]=

294 src[2+2*stride]=

295 src[2+3*stride]=

296 src[3+3*stride]=l3;

297 }

298

299 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,

300 const uint8_t *topright,

301 ptrdiff_t _stride)

302 {

303 pixel *src = (pixel*)_src;

304 int stride = _stride>>(sizeof(pixel)-1);

305 const int lt= src[-1-1*stride];

306 LOAD_TOP_EDGE

307 LOAD_LEFT_EDGE

308

309 src[0+0*stride]=

310 src[2+1*stride]=(lt + l0 + 1)>>1;

311 src[1+0*stride]=

312 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;

313 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;

314 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;

315 src[0+1*stride]=

316 src[2+2*stride]=(l0 + l1 + 1)>>1;

317 src[1+1*stride]=

318 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;

319 src[0+2*stride]=

320 src[2+3*stride]=(l1 + l2+ 1)>>1;

321 src[1+2*stride]=

322 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;

323 src[0+3*stride]=(l2 + l3 + 1)>>1;

324 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;

325 }

326

327 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)

328 {

329 int i;

330 pixel *src = (pixel*)_src;

331 int stride = _stride>>(sizeof(pixel)-1);

332 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);

333 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);

334 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);

335 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);

336

337 for(i=0; i<16; i++){

338 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

339 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);

340 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);

341 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);

342 }

343 }

344

345 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)

346 {

347 int i;

348 pixel *src = (pixel*)_src;

349 stride >>= sizeof(pixel)-1;

350

351 for(i=0; i<16; i++){

352 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);

353

354 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

355 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);

356 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);

357 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);

358 }

359 }

360

361 #define PREDICT_16x16_DC(v)\

362 for(i=0; i<16; i++){\

363 AV_WN4PA(src+ 0, v);\

364 AV_WN4PA(src+ 4, v);\

365 AV_WN4PA(src+ 8, v);\

366 AV_WN4PA(src+12, v);\

367 src += stride;\

368 }

369

370 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)

371 {

372 int i, dc=0;

373 pixel *src = (pixel*)_src;

374 pixel4 dcsplat;

375 stride >>= sizeof(pixel)-1;

376

377 for(i=0;i<16; i++){

378 dc+= src[-1+i*stride];

379 }

380

381 for(i=0;i<16; i++){

382 dc+= src[i-stride];

383 }

384

385 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);

386 PREDICT_16x16_DC(dcsplat);

387 }

388

389 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)

390 {

391 int i, dc=0;

392 pixel *src = (pixel*)_src;

393 pixel4 dcsplat;

394 stride >>= sizeof(pixel)-1;

395

396 for(i=0;i<16; i++){

397 dc+= src[-1+i*stride];

398 }

399

400 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);

401 PREDICT_16x16_DC(dcsplat);

402 }

403

404 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)

405 {

406 int i, dc=0;

407 pixel *src = (pixel*)_src;

408 pixel4 dcsplat;

409 stride >>= sizeof(pixel)-1;

410

411 for(i=0;i<16; i++){

412 dc+= src[i-stride];

413 }

414

415 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);

416 PREDICT_16x16_DC(dcsplat);

417 }

418

419 #define PRED16x16_X(n, v) \

420 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\

421 {\

422 int i;\

423 pixel *src = (pixel*)_src;\

424 stride >>= sizeof(pixel)-1;\

425 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\

426 }

427

428 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)

429 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)

430 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)

431

432 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,

433 ptrdiff_t _stride,

434 const int svq3,

435 const int rv40)

436 {

437 int i, j, k;

438 int a;

439 INIT_CLIP

440 pixel *src = (pixel*)_src;

441 int stride = _stride>>(sizeof(pixel)-1);

442 const pixel * const src0 = src +7-stride;

443 const pixel * src1 = src +8*stride-1;

444 const pixel * src2 = src1-2*stride; // == src+6*stride-1;

445 int H = src0[1] - src0[-1];

446 int V = src1[0] - src2[ 0];

447 for(k=2; k<=8; ++k) {

448 src1 += stride; src2 -= stride;

449 H += k*(src0[k] - src0[-k]);

450 V += k*(src1[0] - src2[ 0]);

451 }

452 if(svq3){

453 H = ( 5*(H/4) ) / 16;

454 V = ( 5*(V/4) ) / 16;

455

456 /* required for 100% accuracy */

457 i = H; H = V; V = i;

458 }else if(rv40){

459 H = ( H + (H>>2) ) >> 4;

460 V = ( V + (V>>2) ) >> 4;

461 }else{

462 H = ( 5*H+32 ) >> 6;

463 V = ( 5*V+32 ) >> 6;

464 }

465

466 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);

467 for(j=16; j>0; --j) {

468 int b = a;

469 a += V;

470 for(i=-16; i<0; i+=4) {

471 src[16+i] = CLIP((b ) >> 5);

472 src[17+i] = CLIP((b+ H) >> 5);

473 src[18+i] = CLIP((b+2*H) >> 5);

474 src[19+i] = CLIP((b+3*H) >> 5);

475 b += 4*H;

476 }

477 src += stride;

478 }

479 }

480

481 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)

482 {

483 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);

484 }

485

486 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)

487 {

488 int i;

489 pixel *src = (pixel*)_src;

490 int stride = _stride>>(sizeof(pixel)-1);

491 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);

492 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);

493

494 for(i=0; i<8; i++){

495 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

496 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);

497 }

498 }

499

500 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)

501 {

502 int i;

503 pixel *src = (pixel*)_src;

504 int stride = _stride>>(sizeof(pixel)-1);

505 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);

506 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);

507

508 for(i=0; i<16; i++){

509 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

510 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);

511 }

512 }

513

514 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)

515 {

516 int i;

517 pixel *src = (pixel*)_src;

518 stride >>= sizeof(pixel)-1;

519

520 for(i=0; i<8; i++){

521 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);

522 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

523 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);

524 }

525 }

526

527 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)

528 {

529 int i;

530 pixel *src = (pixel*)_src;

531 stride >>= sizeof(pixel)-1;

532 for(i=0; i<16; i++){

533 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);

534 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);

535 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);

536 }

537 }

538

539 #define PRED8x8_X(n, v)\

540 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\

541 {\

542 int i;\

543 const pixel4 a = PIXEL_SPLAT_X4(v);\

544 pixel *src = (pixel*)_src;\

545 stride >>= sizeof(pixel)-1;\

546 for(i=0; i<8; i++){\

547 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\

548 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\

549 }\

550 }

551

552 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)

553 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)

554 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)

555

556 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)

557 {

558 FUNCC(pred8x8_128_dc)(_src, stride);

559 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);

560 }

561

562 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)

563 {

564 int i;

565 int dc0, dc2;

566 pixel4 dc0splat, dc2splat;

567 pixel *src = (pixel*)_src;

568 stride >>= sizeof(pixel)-1;

569

570 dc0=dc2=0;

571 for(i=0;i<4; i++){

572 dc0+= src[-1+i*stride];

573 dc2+= src[-1+(i+4)*stride];

574 }

575 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);

576 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);

577

578 for(i=0; i<4; i++){

579 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

580 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);

581 }

582 for(i=4; i<8; i++){

583 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);

584 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);

585 }

586 }

587

588 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)

589 {

590 FUNCC(pred8x8_left_dc)(_src, stride);

591 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);

592 }

593

594 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)

595 {

596 int i;

597 int dc0, dc1;

598 pixel4 dc0splat, dc1splat;

599 pixel *src = (pixel*)_src;

600 stride >>= sizeof(pixel)-1;

601

602 dc0=dc1=0;

603 for(i=0;i<4; i++){

604 dc0+= src[i-stride];

605 dc1+= src[4+i-stride];

606 }

607 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);

608 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);

609

610 for(i=0; i<4; i++){

611 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

612 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);

613 }

614 for(i=4; i<8; i++){

615 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

616 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);

617 }

618 }

619

620 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)

621 {

622 int i;

623 int dc0, dc1;

624 pixel4 dc0splat, dc1splat;

625 pixel *src = (pixel*)_src;

626 stride >>= sizeof(pixel)-1;

627

628 dc0=dc1=0;

629 for(i=0;i<4; i++){

630 dc0+= src[i-stride];

631 dc1+= src[4+i-stride];

632 }

633 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);

634 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);

635

636 for(i=0; i<16; i++){

637 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

638 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);

639 }

640 }

641

642 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)

643 {

644 int i;

645 int dc0, dc1, dc2;

646 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;

647 pixel *src = (pixel*)_src;

648 stride >>= sizeof(pixel)-1;

649

650 dc0=dc1=dc2=0;

651 for(i=0;i<4; i++){

652 dc0+= src[-1+i*stride] + src[i-stride];

653 dc1+= src[4+i-stride];

654 dc2+= src[-1+(i+4)*stride];

655 }

656 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);

657 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);

658 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);

659 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);

660

661 for(i=0; i<4; i++){

662 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

663 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);

664 }

665 for(i=4; i<8; i++){

666 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);

667 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);

668 }

669 }

670

671 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)

672 {

673 int i;

674 int dc0, dc1, dc2, dc3, dc4;

675 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;

676 pixel *src = (pixel*)_src;

677 stride >>= sizeof(pixel)-1;

678

679 dc0=dc1=dc2=dc3=dc4=0;

680 for(i=0;i<4; i++){

681 dc0+= src[-1+i*stride] + src[i-stride];

682 dc1+= src[4+i-stride];

683 dc2+= src[-1+(i+4)*stride];

684 dc3+= src[-1+(i+8)*stride];

685 dc4+= src[-1+(i+12)*stride];

686 }

687 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);

688 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);

689 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);

690 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);

691 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);

692 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);

693 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);

694 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);

695

696 for(i=0; i<4; i++){

697 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);

698 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);

699 }

700 for(i=4; i<8; i++){

701 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);

702 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);

703 }

704 for(i=8; i<12; i++){

705 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);

706 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);

707 }

708 for(i=12; i<16; i++){

709 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);

710 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);

711 }

712 }

713

714 //the following 4 function should not be optimized!

715 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)

716 {

717 FUNCC(pred8x8_top_dc)(src, stride);

718 FUNCC(pred4x4_dc)(src, NULL, stride);

719 }

720

721 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)

722 {

723 FUNCC(pred8x16_top_dc)(src, stride);

724 FUNCC(pred4x4_dc)(src, NULL, stride);

725 }

726

727 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)

728 {

729 FUNCC(pred8x8_dc)(src, stride);

730 FUNCC(pred4x4_top_dc)(src, NULL, stride);

731 }

732

733 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)

734 {

735 FUNCC(pred8x16_dc)(src, stride);

736 FUNCC(pred4x4_top_dc)(src, NULL, stride);

737 }

738

739 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)

740 {

741 FUNCC(pred8x8_left_dc)(src, stride);

742 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);

743 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);

744 }

745

746 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)

747 {

748 FUNCC(pred8x16_left_dc)(src, stride);

749 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);

750 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);

751 }

752

753 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)

754 {

755 FUNCC(pred8x8_left_dc)(src, stride);

756 FUNCC(pred4x4_128_dc)(src , NULL, stride);

757 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);

758 }

759

760 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)

761 {

762 FUNCC(pred8x16_left_dc)(src, stride);

763 FUNCC(pred4x4_128_dc)(src , NULL, stride);

764 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);

765 }

766

767 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)

768 {

769 int j, k;

770 int a;

771 INIT_CLIP

772 pixel *src = (pixel*)_src;

773 int stride = _stride>>(sizeof(pixel)-1);

774 const pixel * const src0 = src +3-stride;

775 const pixel * src1 = src +4*stride-1;

776 const pixel * src2 = src1-2*stride; // == src+2*stride-1;

777 int H = src0[1] - src0[-1];

778 int V = src1[0] - src2[ 0];

779 for(k=2; k<=4; ++k) {

780 src1 += stride; src2 -= stride;

781 H += k*(src0[k] - src0[-k]);

782 V += k*(src1[0] - src2[ 0]);

783 }

784 H = ( 17*H+16 ) >> 5;

785 V = ( 17*V+16 ) >> 5;

786

787 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);

788 for(j=8; j>0; --j) {

789 int b = a;

790 a += V;

791 src[0] = CLIP((b ) >> 5);

792 src[1] = CLIP((b+ H) >> 5);

793 src[2] = CLIP((b+2*H) >> 5);

794 src[3] = CLIP((b+3*H) >> 5);

795 src[4] = CLIP((b+4*H) >> 5);

796 src[5] = CLIP((b+5*H) >> 5);

797 src[6] = CLIP((b+6*H) >> 5);

798 src[7] = CLIP((b+7*H) >> 5);

799 src += stride;

800 }

801 }

802

803 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)

804 {

805 int j, k;

806 int a;

807 INIT_CLIP

808 pixel *src = (pixel*)_src;

809 int stride = _stride>>(sizeof(pixel)-1);

810 const pixel * const src0 = src +3-stride;

811 const pixel * src1 = src +8*stride-1;

812 const pixel * src2 = src1-2*stride; // == src+6*stride-1;

813 int H = src0[1] - src0[-1];

814 int V = src1[0] - src2[ 0];

815

816 for (k = 2; k <= 4; ++k) {

817 src1 += stride; src2 -= stride;

818 H += k*(src0[k] - src0[-k]);

819 V += k*(src1[0] - src2[ 0]);

820 }

821 for (; k <= 8; ++k) {

822 src1 += stride; src2 -= stride;

823 V += k*(src1[0] - src2[0]);

824 }

825

826 H = (17*H+16) >> 5;

827 V = (5*V+32) >> 6;

828

829 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;

830 for(j=16; j>0; --j) {

831 int b = a;

832 a += V;

833 src[0] = CLIP((b ) >> 5);

834 src[1] = CLIP((b+ H) >> 5);

835 src[2] = CLIP((b+2*H) >> 5);

836 src[3] = CLIP((b+3*H) >> 5);

837 src[4] = CLIP((b+4*H) >> 5);

838 src[5] = CLIP((b+5*H) >> 5);

839 src[6] = CLIP((b+6*H) >> 5);

840 src[7] = CLIP((b+7*H) >> 5);

841 src += stride;

842 }

843 }

844

845 #define SRC(x,y) src[(x)+(y)*stride]

846 #define PL(y) \

847 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;

848 #define PREDICT_8x8_LOAD_LEFT \

849 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \

850 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \

851 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \

852 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2

853

854 #define PT(x) \

855 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;

856 #define PREDICT_8x8_LOAD_TOP \

857 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \

858 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \

859 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \

860 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \

861 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2

862

863 #define PTR(x) \

864 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;

865 #define PREDICT_8x8_LOAD_TOPRIGHT \

866 int t8, t9, t10, t11, t12, t13, t14, t15; \

867 if(has_topright) { \

868 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \

869 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \

870 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);

871

872 #define PREDICT_8x8_LOAD_TOPLEFT \

873 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2

874

875 #define PREDICT_8x8_DC(v) \

876 int y; \

877 for( y = 0; y < 8; y++ ) { \

878 AV_WN4PA(((pixel4*)src)+0, v); \

879 AV_WN4PA(((pixel4*)src)+1, v); \

880 src += stride; \

881 }

882

883 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,

884 int has_topright, ptrdiff_t _stride)

885 {

886 pixel *src = (pixel*)_src;

887 int stride = _stride>>(sizeof(pixel)-1);

888

889 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));

890 }

891 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,

892 int has_topright, ptrdiff_t _stride)

893 {

894 pixel *src = (pixel*)_src;

895 int stride = _stride>>(sizeof(pixel)-1);

896

897 PREDICT_8x8_LOAD_LEFT;

898 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);

899 PREDICT_8x8_DC(dc);

900 }

901 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,

902 int has_topright, ptrdiff_t _stride)

903 {

904 pixel *src = (pixel*)_src;

905 int stride = _stride>>(sizeof(pixel)-1);

906

907 PREDICT_8x8_LOAD_TOP;

908 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);

909 PREDICT_8x8_DC(dc);

910 }

911 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,

912 int has_topright, ptrdiff_t _stride)

913 {

914 pixel *src = (pixel*)_src;

915 int stride = _stride>>(sizeof(pixel)-1);

916

917 PREDICT_8x8_LOAD_LEFT;

918 PREDICT_8x8_LOAD_TOP;

919 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7

920 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);

921 PREDICT_8x8_DC(dc);

922 }

923 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,

924 int has_topright, ptrdiff_t _stride)

925 {

926 pixel *src = (pixel*)_src;

927 int stride = _stride>>(sizeof(pixel)-1);

928 pixel4 a;

929

930 PREDICT_8x8_LOAD_LEFT;

931 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \

932 AV_WN4PA(src+y*stride, a); \

933 AV_WN4PA(src+y*stride+4, a);

934 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);

935 #undef ROW

936 }

937 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,

938 int has_topright, ptrdiff_t _stride)

939 {

940 int y;

941 pixel *src = (pixel*)_src;

942 int stride = _stride>>(sizeof(pixel)-1);

943 pixel4 a, b;

944

945 PREDICT_8x8_LOAD_TOP;

946 src[0] = t0;

947 src[1] = t1;

948 src[2] = t2;

949 src[3] = t3;

950 src[4] = t4;

951 src[5] = t5;

952 src[6] = t6;

953 src[7] = t7;

954 a = AV_RN4PA(((pixel4*)src)+0);

955 b = AV_RN4PA(((pixel4*)src)+1);

956 for( y = 1; y < 8; y++ ) {

957 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);

958 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);

959 }

960 }

961 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,

962 int has_topright, ptrdiff_t _stride)

963 {

964 pixel *src = (pixel*)_src;

965 int stride = _stride>>(sizeof(pixel)-1);

966 PREDICT_8x8_LOAD_TOP;

967 PREDICT_8x8_LOAD_TOPRIGHT;

968 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;

969 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;

970 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;

971 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;

972 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;

973 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;

974 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;

975 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;

976 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;

977 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;

978 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;

979 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;

980 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;

981 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;

982 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;

983 }

984 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,

985 int has_topright, ptrdiff_t _stride)

986 {

987 pixel *src = (pixel*)_src;

988 int stride = _stride>>(sizeof(pixel)-1);

989 PREDICT_8x8_LOAD_TOP;

990 PREDICT_8x8_LOAD_LEFT;

991 PREDICT_8x8_LOAD_TOPLEFT;

992 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;

993 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;

994 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;

995 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;

996 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;

997 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;

998 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;

999 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;

1000 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;

1001 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;

1002 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;

1003 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;

1004 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;

1005 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;

1006 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;

1007 }

1008 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,

1009 int has_topright, ptrdiff_t _stride)

1010 {

1011 pixel *src = (pixel*)_src;

1012 int stride = _stride>>(sizeof(pixel)-1);

1013 PREDICT_8x8_LOAD_TOP;

1014 PREDICT_8x8_LOAD_LEFT;

1015 PREDICT_8x8_LOAD_TOPLEFT;

1016 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;

1017 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;

1018 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;

1019 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;

1020 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;

1021 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;

1022 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;

1023 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;

1024 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;

1025 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;

1026 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;

1027 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;

1028 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;

1029 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;

1030 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;

1031 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;

1032 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;

1033 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;

1034 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;

1035 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;

1036 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;

1037 SRC(7,0)= (t6 + t7 + 1) >> 1;

1038 }

1039 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,

1040 int has_topright, ptrdiff_t _stride)

1041 {

1042 pixel *src = (pixel*)_src;

1043 int stride = _stride>>(sizeof(pixel)-1);

1044 PREDICT_8x8_LOAD_TOP;

1045 PREDICT_8x8_LOAD_LEFT;

1046 PREDICT_8x8_LOAD_TOPLEFT;

1047 SRC(0,7)= (l6 + l7 + 1) >> 1;

1048 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;

1049 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;

1050 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;

1051 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;

1052 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;

1053 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;

1054 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;

1055 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;

1056 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;

1057 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;

1058 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;

1059 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;

1060 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;

1061 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;

1062 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;

1063 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;

1064 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;

1065 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;

1066 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;

1067 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;

1068 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;

1069 }

1070 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,

1071 int has_topright, ptrdiff_t _stride)

1072 {

1073 pixel *src = (pixel*)_src;

1074 int stride = _stride>>(sizeof(pixel)-1);

1075 PREDICT_8x8_LOAD_TOP;

1076 PREDICT_8x8_LOAD_TOPRIGHT;

1077 SRC(0,0)= (t0 + t1 + 1) >> 1;

1078 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;

1079 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;

1080 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;

1081 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;

1082 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;

1083 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;

1084 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;

1085 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;

1086 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;

1087 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;

1088 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;

1089 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;

1090 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;

1091 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;

1092 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;

1093 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;

1094 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;

1095 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;

1096 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;

1097 SRC(7,6)= (t10 + t11 + 1) >> 1;

1098 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;

1099 }

1100 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,

1101 int has_topright, ptrdiff_t _stride)

1102 {

1103 pixel *src = (pixel*)_src;

1104 int stride = _stride>>(sizeof(pixel)-1);

1105 PREDICT_8x8_LOAD_LEFT;

1106 SRC(0,0)= (l0 + l1 + 1) >> 1;

1107 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;

1108 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;

1109 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;

1110 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;

1111 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;

1112 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;

1113 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;

1114 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;

1115 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;

1116 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;

1117 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;

1118 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;

1119 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;

1120 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=

1121 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=

1122 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=

1123 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;

1124 }

1125 #undef PREDICT_8x8_LOAD_LEFT

1126 #undef PREDICT_8x8_LOAD_TOP

1127 #undef PREDICT_8x8_LOAD_TOPLEFT

1128 #undef PREDICT_8x8_LOAD_TOPRIGHT

1129 #undef PREDICT_8x8_DC

1130 #undef PTR

1131 #undef PT

1132 #undef PL

1133 #undef SRC

1134

1135 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,

1136 ptrdiff_t stride)

1137 {

1138 int i;

1139 pixel *pix = (pixel*)_pix;

1140 const dctcoef *block = (const dctcoef*)_block;

1141 stride >>= sizeof(pixel)-1;

1142 pix -= stride;

1143 for(i=0; i<4; i++){

1144 pixel v = pix[0];

1145 pix[1*stride]= v += block[0];

1146 pix[2*stride]= v += block[4];

1147 pix[3*stride]= v += block[8];

1148 pix[4*stride]= v + block[12];

1149 pix++;

1150 block++;

1151 }

1152

1153 memset(_block, 0, sizeof(dctcoef) * 16);

1154 }

1155

1156 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,

1157 ptrdiff_t stride)

1158 {

1159 int i;

1160 pixel *pix = (pixel*)_pix;

1161 const dctcoef *block = (const dctcoef*)_block;

1162 stride >>= sizeof(pixel)-1;

1163 for(i=0; i<4; i++){

1164 pixel v = pix[-1];

1165 pix[0]= v += block[0];

1166 pix[1]= v += block[1];

1167 pix[2]= v += block[2];

1168 pix[3]= v + block[3];

1169 pix+= stride;

1170 block+= 4;

1171 }

1172

1173 memset(_block, 0, sizeof(dctcoef) * 16);

1174 }

1175

1176 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,

1177 ptrdiff_t stride)

1178 {

1179 int i;

1180 pixel *pix = (pixel*)_pix;

1181 const dctcoef *block = (const dctcoef*)_block;

1182 stride >>= sizeof(pixel)-1;

1183 pix -= stride;

1184 for(i=0; i<8; i++){

1185 pixel v = pix[0];

1186 pix[1*stride]= v += block[0];

1187 pix[2*stride]= v += block[8];

1188 pix[3*stride]= v += block[16];

1189 pix[4*stride]= v += block[24];

1190 pix[5*stride]= v += block[32];

1191 pix[6*stride]= v += block[40];

1192 pix[7*stride]= v += block[48];

1193 pix[8*stride]= v + block[56];

1194 pix++;

1195 block++;

1196 }

1197

1198 memset(_block, 0, sizeof(dctcoef) * 64);

1199 }

1200

1201 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,

1202 ptrdiff_t stride)

1203 {

1204 int i;

1205 pixel *pix = (pixel*)_pix;

1206 const dctcoef *block = (const dctcoef*)_block;

1207 stride >>= sizeof(pixel)-1;

1208 for(i=0; i<8; i++){

1209 pixel v = pix[-1];

1210 pix[0]= v += block[0];

1211 pix[1]= v += block[1];

1212 pix[2]= v += block[2];

1213 pix[3]= v += block[3];

1214 pix[4]= v += block[4];

1215 pix[5]= v += block[5];

1216 pix[6]= v += block[6];

1217 pix[7]= v + block[7];

1218 pix+= stride;

1219 block+= 8;

1220 }

1221

1222 memset(_block, 0, sizeof(dctcoef) * 64);

1223 }

1224

1225 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,

1226 int16_t *block,

1227 ptrdiff_t stride)

1228 {

1229 int i;

1230 for(i=0; i<16; i++)

1231 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1232 }

1233

1234 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,

1235 const int *block_offset,

1236 int16_t *block,

1237 ptrdiff_t stride)

1238 {

1239 int i;

1240 for(i=0; i<16; i++)

1241 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1242 }

1243

1244 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,

1245 int16_t *block, ptrdiff_t stride)

1246 {

1247 int i;

1248 for(i=0; i<4; i++)

1249 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1250 }

1251

1252 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,

1253 int16_t *block, ptrdiff_t stride)

1254 {

1255 int i;

1256 for(i=0; i<4; i++)

1257 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1258 for(i=4; i<8; i++)

1259 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);

1260 }

1261

1262 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,

1263 int16_t *block,

1264 ptrdiff_t stride)

1265 {

1266 int i;

1267 for(i=0; i<4; i++)

1268 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1269 }

1270

1271 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,

1272 const int *block_offset,

1273 int16_t *block, ptrdiff_t stride)

1274 {

1275 int i;

1276 for(i=0; i<4; i++)

1277 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

1278 for(i=4; i<8; i++)

1279 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);

1280 }

Generated on Sat Jan 25 2014 19:51:48 for FFmpeg by doxygen 1.8.2