1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <assert.h>
22
30
31 #define pixeltmp int16_t
35
37 {
39 for (
int i = 0;
i <
h; ++
i) {
44 }
45 }
46
48 {
50 for (
int i = 0;
i <
w; ++
i) {
51 const int srcB =
src[-2*srcStride];
52 const int srcA =
src[-1*srcStride];
53 const int src0 =
src[0 *srcStride];
54 const int src1 =
src[1 *srcStride];
55 const int src2 =
src[2 *srcStride];
56 const int src3 =
src[3 *srcStride];
57 const int src4 =
src[4 *srcStride];
62 }
63 }
64
66 {
70 for (
int i = 0;
i <
h + 5; ++
i) {
75 }
76 tmp -= tmpStride*(
h+5-2);
77 for (
int i = 0;
i <
w; ++
i) {
78 const int tmpB =
tmp[-2*tmpStride];
79 const int tmpA =
tmp[-1*tmpStride];
80 const int tmp0 =
tmp[0 *tmpStride];
81 const int tmp1 =
tmp[1 *tmpStride];
82 const int tmp2 =
tmp[2 *tmpStride];
83 const int tmp3 =
tmp[3 *tmpStride];
84 const int tmp4 =
tmp[4 *tmpStride];
85 dst[0*dstStride] =
av_clip_uint8(((tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3) + 512) >> 10);
86 dst[1*dstStride] =
av_clip_uint8(((tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4) + 512) >> 10);
89 }
90 }
91
93
95 {
97 "put_h264_qpel_pixels_tab not at start of H264QpelContext");
99 s->put_snow_qpel_pixels_tab[3][0] = put_snow_qpel2_mc00_8_c;
100 s->put_snow_qpel_pixels_tab[3][1] = put_snow_qpel2_mc10_8_c;
101 s->put_snow_qpel_pixels_tab[3][2] = put_snow_qpel2_mc20_8_c;
102 s->put_snow_qpel_pixels_tab[3][3] = put_snow_qpel2_mc30_8_c;
103 s->put_snow_qpel_pixels_tab[3][4] = put_snow_qpel2_mc01_8_c;
104 s->put_snow_qpel_pixels_tab[3][5] = put_snow_qpel2_mc11_8_c;
105 s->put_snow_qpel_pixels_tab[3][6] = put_snow_qpel2_mc21_8_c;
106 s->put_snow_qpel_pixels_tab[3][7] = put_snow_qpel2_mc31_8_c;
107 s->put_snow_qpel_pixels_tab[3][8] = put_snow_qpel2_mc02_8_c;
108 s->put_snow_qpel_pixels_tab[3][9] = put_snow_qpel2_mc12_8_c;
109 s->put_snow_qpel_pixels_tab[3][10] = put_snow_qpel2_mc22_8_c;
110 s->put_snow_qpel_pixels_tab[3][11] = put_snow_qpel2_mc32_8_c;
111 s->put_snow_qpel_pixels_tab[3][12] = put_snow_qpel2_mc03_8_c;
112 s->put_snow_qpel_pixels_tab[3][13] = put_snow_qpel2_mc13_8_c;
113 s->put_snow_qpel_pixels_tab[3][14] = put_snow_qpel2_mc23_8_c;
114 s->put_snow_qpel_pixels_tab[3][15] = put_snow_qpel2_mc33_8_c;
115 }
116
118 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
119 int y, x;
121 for(y=0; y<b_h; y++){
122 //FIXME ugly misuse of obmc_stride
123 const uint8_t *obmc1= obmc + y*obmc_stride;
124 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
125 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
126 const uint8_t *
obmc4= obmc3+ (obmc_stride>>1);
128 for(x=0; x<b_w; x++){
129 int v= obmc1[x] *
block[3][x + y*src_stride]
130 +obmc2[x] *
block[2][x + y*src_stride]
131 +obmc3[x] *
block[1][x + y*src_stride]
133
137 }
138 if(add){
141 if(v&(~255)) v= ~(v>>31);
142 dst8[x + y*src_stride] = v;
143 }else{
145 }
146 }
147 }
148 }
149
151 int plane_index,
level, orientation;
152
153 for(plane_index=0; plane_index<3; plane_index++){
155 for(orientation=
level ? 1:0; orientation<4; orientation++){
156 memset(
s->plane[plane_index].band[
level][orientation].state,
MID_STATE,
sizeof(
s->plane[plane_index].band[
level][orientation].state));
157 }
158 }
159 }
160 memset(
s->header_state,
MID_STATE,
sizeof(
s->header_state));
161 memset(
s->block_state,
MID_STATE,
sizeof(
s->block_state));
162 }
163
167
170
172 s->block =
av_calloc(
w *
h,
sizeof(*
s->block) << (
s->block_max_depth*2));
175
176 return 0;
177 }
178
180 static const uint8_t
weight[64]={
181 8,7,6,5,4,3,2,1,
182 7,7,0,0,0,0,0,1,
183 6,0,6,0,0,0,2,0,
184 5,0,0,5,0,3,0,0,
185 4,0,0,0,4,0,0,0,
186 3,0,0,5,0,3,0,0,
187 2,0,6,0,0,0,2,0,
188 1,7,0,0,0,0,0,1,
189 };
190
191 static const uint8_t brane[256]={
192 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
193 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
194 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
195 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
196 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
197 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
198 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
199 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
200 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
201 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
202 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
203 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
204 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
205 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
206 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
207 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
208 };
209
210 static const uint8_t needs[16]={
211 0,1,0,0,
212 2,4,2,0,
213 0,1,0,0,
214 15
215 };
216
220 int16_t *tmpI= tmpIt;
221 uint8_t *tmp2= tmp2t[0];
222 const uint8_t *hpel[11];
224 r= brane[dx + 16*dy]&15;
225 l= brane[dx + 16*dy]>>4;
226
227 b= needs[l] | needs[
r];
230
233 for(x=0; x < b_w; x++){
242 int am=0;
243 if(!
p ||
p->fast_mc){
244 am= 20*(
a2+
a3) - 5*(
a1+a4) + (
a0+a5);
245 tmpI[x]= am;
246 am= (am+16)>>5;
247 }else{
248 am=
p->hcoeff[0]*(
a2+
a3) +
p->hcoeff[1]*(
a1+a4) +
p->hcoeff[2]*(
a0+a5) +
p->hcoeff[3]*(a_1+a6);
249 tmpI[x]= am;
250 am= (am+32)>>6;
251 }
252
253 if(am&(~255)) am= ~(am>>31);
254 tmp2[x]= am;
255 }
256 tmpI+= 64;
257 tmp2+= 64;
259 }
261 }
263 tmp2= tmp2t[1];
264
266 for(y=0; y < b_h; y++){
267 for(x=0; x < b_w+1; x++){
276 int am=0;
278 am= (20*(
a2+
a3) - 5*(
a1+a4) + (
a0+a5) + 16)>>5;
279 else
280 am= (
p->hcoeff[0]*(
a2+
a3) +
p->hcoeff[1]*(
a1+a4) +
p->hcoeff[2]*(
a0+a5) +
p->hcoeff[3]*(a_1+a6) + 32)>>6;
281
282 if(am&(~255)) am= ~(am>>31);
283 tmp2[x]= am;
284 }
286 tmp2+= 64;
287 }
289 }
291 tmp2= tmp2t[2];
292 tmpI= tmpIt;
294 for(y=0; y < b_h; y++){
295 for(x=0; x < b_w; x++){
304 int am=0;
306 am= (20*(
a2+
a3) - 5*(
a1+a4) + (
a0+a5) + 512)>>10;
307 else
308 am= (
p->hcoeff[0]*(
a2+
a3) +
p->hcoeff[1]*(
a1+a4) +
p->hcoeff[2]*(
a0+a5) +
p->hcoeff[3]*(a_1+a6) + 2048)>>12;
309 if(am&(~255)) am= ~(am>>31);
310 tmp2[x]= am;
311 }
312 tmpI+= 64;
313 tmp2+= 64;
314 }
315 }
316
320
321 hpel[ 4]= tmp2t[1];
322 hpel[ 5]= tmp2t[2];
323 hpel[ 6]= tmp2t[1] + 1;
324
326 hpel[ 9]= hpel[1] + 64;
327 hpel[10]= hpel[8] + 1;
328
329 #define MC_STRIDE(x) (needs[x] ? 64 : stride)
330
332 int dxy = dx / 8 + dy / 8 * 4;
333 const uint8_t *
src1 = hpel[dxy ];
334 const uint8_t *
src2 = hpel[dxy + 1];
335 const uint8_t *src3 = hpel[dxy + 4];
336 const uint8_t *src4 = hpel[dxy + 5];
341 dx&=7;
342 dy&=7;
343 for(y=0; y < b_h; y++){
344 for(x=0; x < b_w; x++){
345 dst[x]= ((8-dx)*(8-dy)*
src1[x] + dx*(8-dy)*
src2[x]+
346 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
347 }
350 src3+=stride3;
351 src4+=stride4;
353 }
354 }else{
355 const uint8_t *
src1= hpel[l];
356 const uint8_t *
src2= hpel[
r];
359 int a=
weight[((dx&7) + (8*(dy&7)))];
361 for(y=0; y < b_h; y++){
362 for(x=0; x < b_w; x++){
364 }
368 }
369 }
370 }
371
372 void ff_snow_pred_block(
SnowContext *
s, uint8_t *
dst, uint8_t *
tmp, ptrdiff_t
stride,
int sx,
int sy,
int b_w,
int b_h,
const BlockNode *
block,
int plane_index,
int w,
int h){
374 int x, y;
375 const unsigned color =
block->color[plane_index];
376 const unsigned color4 =
color*0x01010101;
377 if(b_w==32){
378 for(y=0; y < b_h; y++){
387 }
388 }else if(b_w==16){
389 for(y=0; y < b_h; y++){
394 }
395 }else if(b_w==8){
396 for(y=0; y < b_h; y++){
399 }
400 }else if(b_w==4){
401 for(y=0; y < b_h; y++){
403 }
404 }else{
405 for(y=0; y < b_h; y++){
406 for(x=0; x < b_w; x++){
408 }
409 }
410 }
411 }else{
412 const uint8_t *
src =
s->last_picture[
block->ref]->data[plane_index];
413 const int scale= plane_index ? (2*
s->mv_scale)>>
s->chroma_h_shift : 2*
s->mv_scale;
418 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
429 }
430
431 av_assert2(
s->chroma_h_shift ==
s->chroma_v_shift);
// only one mv_scale
432
433 av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
434 if( (dx&3) || (dy&3)
435 || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
436 || (b_w&(b_w-1))
437 || b_w == 1
438 || b_h == 1
439 || !
s->plane[plane_index].fast_mc )
441 else if(b_w==32){
442 int y;
443 for(y=0; y<b_h; y+=16){
446 }
447 }else if(b_w==b_h)
449 else if(b_w==2*b_h){
451 s->put_snow_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](
dst+b_h,
src + 3 + b_h + 3*
stride,
stride);
452 }else{
456 }
457 }
458 }
459
460 #define mca(dx,dy,b_w)\
461 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
462 av_assert2(h==b_w);\
463 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
464 }
465
474
475 static
av_cold void snow_static_init(
void)
476 {
480 }
481
487
489 s->max_ref_frames=1;
//just make sure it's not an invalid value in case of no initial keyframe
490 s->spatial_decomposition_count = 1;
491
494
496
497 #define mcfh(dx,dy)\
498 s->hdsp.put_pixels_tab [0][dy/4+dx/8]=\
499 s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
500 mc_block_hpel ## dx ## dy ## 16;\
501 s->hdsp.put_pixels_tab [1][dy/4+dx/8]=\
502 s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
503 mc_block_hpel ## dx ## dy ## 8;
504
509
510 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
511
514
521
524 if (!
s->last_picture[
i])
526 }
527
530 if (!
s->mconly_picture || !
s->current_picture)
532
534
535 return 0;
536 }
537
540 int plane_index,
level, orientation;
541
543 int emu_buf_size;
548 }
549
550 for(plane_index=0; plane_index <
s->nb_planes; plane_index++){
551 int w=
s->avctx->width;
552 int h=
s->avctx->height;
553
554 if(plane_index){
557 }
558 s->plane[plane_index].width =
w;
559 s->plane[plane_index].height=
h;
560
562 for(orientation=
level ? 1 : 0; orientation<4; orientation++){
564
565 b->buf=
s->spatial_dwt_buffer;
567 b->stride=
s->plane[plane_index].width << (
s->spatial_decomposition_count -
level);
568 b->width = (
w + !(orientation&1))>>1;
569 b->height= (
h + !(orientation>1))>>1;
570
571 b->stride_line = 1 << (
s->spatial_decomposition_count -
level);
574
575 if(orientation&1){
577 b->buf_x_offset = (
w+1)>>1;
578 }
579 if(orientation>1){
580 b->buf +=
b->stride>>1;
581 b->buf_y_offset =
b->stride_line >> 1;
582 }
583 b->ibuf=
s->spatial_idwt_buffer + (
b->buf -
s->spatial_dwt_buffer);
584
586 b->parent= &
s->plane[plane_index].band[
level-1][orientation];
587 //FIXME avoid this realloc
590 sizeof(*
b->x_coeff));
593 }
596 }
597 }
598
599 return 0;
600 }
601
603 {
605
606 tmp=
s->last_picture[
s->max_ref_frames-1];
607 for (
int i =
s->max_ref_frames - 1;
i > 0;
i--)
608 s->last_picture[
i] =
s->last_picture[
i-1];
609 s->last_picture[0] =
s->current_picture;
610 s->current_picture =
tmp;
611
613
617 }else{
619 for(
i=0;
i<
s->max_ref_frames &&
s->last_picture[
i]->data[0];
i++)
621 break;
623 if(
s->ref_frames==0){
626 }
628 }
629
630 return 0;
631 }
632
634 {
635 int plane_index,
level, orientation,
i;
636
642
646
649 }
650
651 for(plane_index=0; plane_index <
MAX_PLANES; plane_index++){
653 for(orientation=
level ? 1 : 0; orientation<4; orientation++){
655
657 }
658 }
659 }
662 }