1 /*
2 * HEVC video decoder
3 *
4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
25
28
29
32 {
35
37
38 for (y = 0; y <
height; y++) {
39 for (x = 0; x <
width; x++)
42 }
43 }
44
47 {
50
52
53 for (y = 0; y <
size; y++) {
54 for (x = 0; x <
size; x++) {
56 coeffs++;
57 }
59 }
60 }
61
64 {
66 }
67
70 {
72 }
73
76 {
78 }
79
82 {
84 }
85
86
88 {
89 int16_t *coeffs = (int16_t *) _coeffs;
91 int size = 1 << log2_size;
92
95 for (y = 0; y < size - 1; y++) {
96 for (x = 0; x <
size; x++)
97 coeffs[x] += coeffs[x - size];
99 }
100 } else {
101 for (y = 0; y <
size; y++) {
102 for (x = 1; x <
size; x++)
103 coeffs[x] += coeffs[x - 1];
105 }
106 }
107 }
108
110 {
113 int size = 1 << log2_size;
114 int16_t *coeffs = _coeffs;
115
116
117 if (shift > 0) {
118 int offset = 1 << (shift - 1);
119 for (y = 0; y <
size; y++) {
120 for (x = 0; x <
size; x++) {
121 *coeffs = (*coeffs +
offset) >> shift;
122 coeffs++;
123 }
124 }
125 } else {
126 for (y = 0; y <
size; y++) {
127 for (x = 0; x <
size; x++) {
128 *coeffs = *coeffs << -
shift;
129 coeffs++;
130 }
131 }
132 }
133 }
134
135 #define SET(dst, x) (dst) = (x)
136 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
137 #define ADD_AND_SCALE(dst, x) \
138 (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
139
140 #define TR_4x4_LUMA(dst, src, step, assign) \
141 do { \
142 int c0 = src[0 * step] + src[2 * step]; \
143 int c1 = src[2 * step] + src[3 * step]; \
144 int c2 = src[0 * step] - src[3 * step]; \
145 int c3 = 74 * src[1 * step]; \
146 \
147 assign(dst[2 * step], 74 * (src[0 * step] - \
148 src[2 * step] + \
149 src[3 * step])); \
150 assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
151 assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
152 assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
153 } while (0)
154
156 {
157 int i;
159 int add = 1 << (shift - 1);
160 int16_t *
src = coeffs;
161
162 for (i = 0; i < 4; i++) {
164 src++;
165 }
166
168 add = 1 << (shift - 1);
169 for (i = 0; i < 4; i++) {
171 coeffs += 4;
172 }
173 }
174
175 #undef TR_4x4_LUMA
176
177 #define TR_4(dst, src, dstep, sstep, assign, end) \
178 do { \
179 const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
180 const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
181 const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
182 const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
183 \
184 assign(dst[0 * dstep], e0 + o0); \
185 assign(dst[1 * dstep], e1 + o1); \
186 assign(dst[2 * dstep], e1 - o1); \
187 assign(dst[3 * dstep], e0 - o0); \
188 } while (0)
189
190 #define TR_8(dst, src, dstep, sstep, assign, end) \
191 do { \
192 int i, j; \
193 int e_8[4]; \
194 int o_8[4] = { 0 }; \
195 for (i = 0; i < 4; i++) \
196 for (j = 1; j < end; j += 2) \
197 o_8[i] += transform[4 * j][i] * src[j * sstep]; \
198 TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
199 \
200 for (i = 0; i < 4; i++) { \
201 assign(dst[i * dstep], e_8[i] + o_8[i]); \
202 assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
203 } \
204 } while (0)
205
206 #define TR_16(dst, src, dstep, sstep, assign, end) \
207 do { \
208 int i, j; \
209 int e_16[8]; \
210 int o_16[8] = { 0 }; \
211 for (i = 0; i < 8; i++) \
212 for (j = 1; j < end; j += 2) \
213 o_16[i] += transform[2 * j][i] * src[j * sstep]; \
214 TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
215 \
216 for (i = 0; i < 8; i++) { \
217 assign(dst[i * dstep], e_16[i] + o_16[i]); \
218 assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
219 } \
220 } while (0)
221
222 #define TR_32(dst, src, dstep, sstep, assign, end) \
223 do { \
224 int i, j; \
225 int e_32[16]; \
226 int o_32[16] = { 0 }; \
227 for (i = 0; i < 16; i++) \
228 for (j = 1; j < end; j += 2) \
229 o_32[i] += transform[j][i] * src[j * sstep]; \
230 TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
231 \
232 for (i = 0; i < 16; i++) { \
233 assign(dst[i * dstep], e_32[i] + o_32[i]); \
234 assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
235 } \
236 } while (0)
237
238 #define IDCT_VAR4(H) \
239 int limit2 = FFMIN(col_limit + 4, H)
240 #define IDCT_VAR8(H) \
241 int limit = FFMIN(col_limit, H); \
242 int limit2 = FFMIN(col_limit + 4, H)
243 #define IDCT_VAR16(H) IDCT_VAR8(H)
244 #define IDCT_VAR32(H) IDCT_VAR8(H)
245
247 static void FUNC(idct_##H ##x ##H )( \
248 int16_t *coeffs, int col_limit) { \
249 int i; \
250 int shift = 7; \
251 int add = 1 << (shift - 1); \
252 int16_t *src = coeffs; \
253 IDCT_VAR ##H(H); \
254 \
255 for (i = 0; i < H; i++) { \
256 TR_ ## H(src, src, H, H, SCALE, limit2); \
257 if (limit2 < H && i%4 == 0 && !!i) \
258 limit2 -= 4; \
259 src++; \
260 } \
261 \
262 shift = 20 - BIT_DEPTH; \
263 add = 1 << (shift - 1); \
264 for (i = 0; i < H; i++) { \
265 TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
266 coeffs += H; \
267 } \
268 }
269
271 static void FUNC(idct_##H ##x ##H ##_dc)( \
272 int16_t *coeffs) { \
273 int i, j; \
274 int shift = 14 - BIT_DEPTH; \
275 int add = 1 << (shift - 1); \
276 int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
277 \
278 for (j = 0; j < H; j++) { \
279 for (i = 0; i < H; i++) { \
280 coeffs[i+j*H] = coeff; \
281 } \
282 } \
283 }
284
289
294
295 #undef TR_4
296 #undef TR_8
297 #undef TR_16
298 #undef TR_32
299
300 #undef SET
301 #undef SCALE
302 #undef ADD_AND_SCALE
303
305 ptrdiff_t stride_dst, ptrdiff_t stride_src,
306 int16_t *sao_offset_val, int sao_left_class,
308 {
314
315 stride_dst /=
sizeof(
pixel);
316 stride_src /=
sizeof(
pixel);
317
318 for (k = 0; k < 4; k++)
319 offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
320 for (y = 0; y <
height; y++) {
321 for (x = 0; x <
width; x++)
322 dst[x] =
av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
323 dst += stride_dst;
324 src += stride_src;
325 }
326 }
327
328 #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
329
332
333 static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
334 static const int8_t pos[4][2][2] = {
335 { { -1, 0 }, { 1, 0 } }, // horizontal
336 { { 0, -1 }, { 0, 1 } }, // vertical
337 { { -1, -1 }, { 1, 1 } }, // 45 degree
338 { { 1, -1 }, { -1, 1 } }, // 135 degree
339 };
342 int a_stride, b_stride;
345 stride_dst /=
sizeof(
pixel);
346
347 a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
348 b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
349 for (y = 0; y <
height; y++) {
350 for (x = 0; x <
width; x++) {
351 int diff0 =
CMP(src[x], src[x + a_stride]);
352 int diff1 =
CMP(src[x], src[x + b_stride]);
355 }
356 src += stride_src;
357 dst += stride_dst;
358 }
359 }
360
362 ptrdiff_t stride_dst, ptrdiff_t stride_src,
SAOParams *sao,
363 int *borders, int _width, int _height,
366 {
370 int16_t *sao_offset_val = sao->offset_val[c_idx];
371 int sao_eo_class = sao->eo_class[c_idx];
372 int init_x = 0, width = _width,
height = _height;
373
374 stride_dst /=
sizeof(
pixel);
375 stride_src /=
sizeof(
pixel);
376
378 if (borders[0]) {
380 for (y = 0; y <
height; y++) {
381 dst[y * stride_dst] =
av_clip_pixel(src[y * stride_src] + offset_val);
382 }
383 init_x = 1;
384 }
385 if (borders[2]) {
388 for (x = 0; x <
height; x++) {
390 }
391 width--;
392 }
393 }
395 if (borders[1]) {
397 for (x = init_x; x <
width; x++)
399 }
400 if (borders[3]) {
402 int y_stride_dst = stride_dst * (height - 1);
403 int y_stride_src = stride_src * (height - 1);
404 for (x = init_x; x <
width; x++)
405 dst[x + y_stride_dst] =
av_clip_pixel(src[x + y_stride_src] + offset_val);
406 height--;
407 }
408 }
409 }
410
412 ptrdiff_t stride_dst, ptrdiff_t stride_src,
SAOParams *sao,
413 int *borders, int _width, int _height,
416 {
420 int16_t *sao_offset_val = sao->offset_val[c_idx];
421 int sao_eo_class = sao->eo_class[c_idx];
422 int init_x = 0, init_y = 0, width = _width,
height = _height;
423
424 stride_dst /=
sizeof(
pixel);
425 stride_src /=
sizeof(
pixel);
426
428 if (borders[0]) {
430 for (y = 0; y <
height; y++) {
431 dst[y * stride_dst] =
av_clip_pixel(src[y * stride_src] + offset_val);
432 }
433 init_x = 1;
434 }
435 if (borders[2]) {
438 for (x = 0; x <
height; x++) {
440 }
441 width--;
442 }
443 }
445 if (borders[1]) {
447 for (x = init_x; x <
width; x++)
449 init_y = 1;
450 }
451 if (borders[3]) {
453 int y_stride_dst = stride_dst * (height - 1);
454 int y_stride_src = stride_src * (height - 1);
455 for (x = init_x; x <
width; x++)
456 dst[x + y_stride_dst] =
av_clip_pixel(src[x + y_stride_src] + offset_val);
457 height--;
458 }
459 }
460
461 {
462 int save_upper_left = !diag_edge[0] && sao_eo_class ==
SAO_EO_135D && !borders[0] && !borders[1];
463 int save_upper_right = !diag_edge[1] && sao_eo_class ==
SAO_EO_45D && !borders[1] && !borders[2];
464 int save_lower_right = !diag_edge[2] && sao_eo_class ==
SAO_EO_135D && !borders[2] && !borders[3];
465 int save_lower_left = !diag_edge[3] && sao_eo_class ==
SAO_EO_45D && !borders[0] && !borders[3];
466
467 // Restore pixels that can't be modified
469 for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
470 dst[y*stride_dst] = src[y*stride_src];
471 }
473 for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
474 dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
475 }
476
478 for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
479 dst[x] = src[x];
480 }
482 for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
483 dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
484 }
486 dst[0] = src[0];
487 if(diag_edge[1] && sao_eo_class ==
SAO_EO_45D)
488 dst[width-1] = src[width-1];
490 dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
491 if(diag_edge[3] && sao_eo_class ==
SAO_EO_45D)
492 dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
493
494 }
495 }
496
497 #undef CMP
498
499 ////////////////////////////////////////////////////////////////////////////////
500 //
501 ////////////////////////////////////////////////////////////////////////////////
503 uint8_t *_src, ptrdiff_t _srcstride,
505 {
508 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
509
510 for (y = 0; y <
height; y++) {
511 for (x = 0; x <
width; x++)
513 src += srcstride;
515 }
516 }
517
520 {
523 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
525 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
526
527 for (y = 0; y <
height; y++) {
528 memcpy(dst, src, width *
sizeof(
pixel));
529 src += srcstride;
530 dst += dststride;
531 }
532 }
533
535 int16_t *src2,
537 {
540 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
542 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
543
545 #if BIT_DEPTH < 14
546 int offset = 1 << (shift - 1);
547 #else
548 int offset = 0;
549 #endif
550
551 for (y = 0; y <
height; y++) {
552 for (x = 0; x <
width; x++)
554 src += srcstride;
555 dst += dststride;
557 }
558 }
559
561 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width)
562 {
565 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
567 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
569 #if BIT_DEPTH < 14
570 int offset = 1 << (shift - 1);
571 #else
572 int offset = 0;
573 #endif
574
575 ox = ox * (1 << (BIT_DEPTH - 8));
576 for (y = 0; y <
height; y++) {
577 for (x = 0; x <
width; x++)
578 dst[x] =
av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
579 src += srcstride;
580 dst += dststride;
581 }
582 }
583
585 int16_t *src2,
586 int height,
int denom,
int wx0,
int wx1,
587 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
588 {
591 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
593 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
594
596 int log2Wd = denom + shift - 1;
597
600 for (y = 0; y <
height; y++) {
601 for (x = 0; x <
width; x++) {
602 dst[x] =
av_clip_pixel(( (src[x] << (14 -
BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
603 }
604 src += srcstride;
605 dst += dststride;
607 }
608 }
609
610 ////////////////////////////////////////////////////////////////////////////////
611 //
612 ////////////////////////////////////////////////////////////////////////////////
613 #define QPEL_FILTER(src, stride) \
614 (filter[0] * src[x - 3 * stride] + \
615 filter[1] * src[x - 2 * stride] + \
616 filter[2] * src[x - stride] + \
617 filter[3] * src[x ] + \
618 filter[4] * src[x + stride] + \
619 filter[5] * src[x + 2 * stride] + \
620 filter[6] * src[x + 3 * stride] + \
621 filter[7] * src[x + 4 * stride])
622
624 uint8_t *_src, ptrdiff_t _srcstride,
626 {
629 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
631 for (y = 0; y <
height; y++) {
632 for (x = 0; x <
width; x++)
634 src += srcstride;
636 }
637 }
638
640 uint8_t *_src, ptrdiff_t _srcstride,
642 {
645 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
647 for (y = 0; y <
height; y++) {
648 for (x = 0; x <
width; x++)
650 src += srcstride;
652 }
653 }
654
657 ptrdiff_t _srcstride,
659 intptr_t my,
int width)
660 {
664 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
666 int16_t *tmp = tmp_array;
667
671 for (x = 0; x <
width; x++)
673 src += srcstride;
675 }
676
679 for (y = 0; y <
height; y++) {
680 for (x = 0; x <
width; x++)
684 }
685 }
686
688 uint8_t *_src, ptrdiff_t _srcstride,
690 {
693 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
695 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
698
699 #if BIT_DEPTH < 14
700 int offset = 1 << (shift - 1);
701 #else
702 int offset = 0;
703 #endif
704
705 for (y = 0; y <
height; y++) {
706 for (x = 0; x <
width; x++)
708 src += srcstride;
709 dst += dststride;
710 }
711 }
712
714 int16_t *src2,
716 {
719 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
721 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
722
724
726 #if BIT_DEPTH < 14
727 int offset = 1 << (shift - 1);
728 #else
729 int offset = 0;
730 #endif
731
732 for (y = 0; y <
height; y++) {
733 for (x = 0; x <
width; x++)
735 src += srcstride;
736 dst += dststride;
738 }
739 }
740
742 uint8_t *_src, ptrdiff_t _srcstride,
744 {
747 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
749 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
752
753 #if BIT_DEPTH < 14
754 int offset = 1 << (shift - 1);
755 #else
756 int offset = 0;
757 #endif
758
759 for (y = 0; y <
height; y++) {
760 for (x = 0; x <
width; x++)
762 src += srcstride;
763 dst += dststride;
764 }
765 }
766
767
769 int16_t *src2,
771 {
774 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
776 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
777
779
781 #if BIT_DEPTH < 14
782 int offset = 1 << (shift - 1);
783 #else
784 int offset = 0;
785 #endif
786
787 for (y = 0; y <
height; y++) {
788 for (x = 0; x <
width; x++)
790 src += srcstride;
791 dst += dststride;
793 }
794 }
795
797 uint8_t *_src, ptrdiff_t _srcstride,
799 {
803 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
805 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
807 int16_t *tmp = tmp_array;
809
810 #if BIT_DEPTH < 14
811 int offset = 1 << (shift - 1);
812 #else
813 int offset = 0;
814 #endif
815
819 for (x = 0; x <
width; x++)
821 src += srcstride;
823 }
824
827
828 for (y = 0; y <
height; y++) {
829 for (x = 0; x <
width; x++)
832 dst += dststride;
833 }
834 }
835
837 int16_t *src2,
839 {
843 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
845 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
847 int16_t *tmp = tmp_array;
849 #if BIT_DEPTH < 14
850 int offset = 1 << (shift - 1);
851 #else
852 int offset = 0;
853 #endif
854
858 for (x = 0; x <
width; x++)
860 src += srcstride;
862 }
863
866
867 for (y = 0; y <
height; y++) {
868 for (x = 0; x <
width; x++)
871 dst += dststride;
873 }
874 }
875
877 uint8_t *_src, ptrdiff_t _srcstride,
878 int height,
int denom,
int wx,
int ox,
879 intptr_t mx, intptr_t my,
int width)
880 {
883 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
885 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
888 #if BIT_DEPTH < 14
889 int offset = 1 << (shift - 1);
890 #else
891 int offset = 0;
892 #endif
893
894 ox = ox * (1 << (BIT_DEPTH - 8));
895 for (y = 0; y <
height; y++) {
896 for (x = 0; x <
width; x++)
898 src += srcstride;
899 dst += dststride;
900 }
901 }
902
904 int16_t *src2,
905 int height,
int denom,
int wx0,
int wx1,
906 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
907 {
910 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
912 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
913
915
917 int log2Wd = denom + shift - 1;
918
921 for (y = 0; y <
height; y++) {
922 for (x = 0; x <
width; x++)
924 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
925 src += srcstride;
926 dst += dststride;
928 }
929 }
930
932 uint8_t *_src, ptrdiff_t _srcstride,
933 int height,
int denom,
int wx,
int ox,
934 intptr_t mx, intptr_t my,
int width)
935 {
938 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
940 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
943 #if BIT_DEPTH < 14
944 int offset = 1 << (shift - 1);
945 #else
946 int offset = 0;
947 #endif
948
949 ox = ox * (1 << (BIT_DEPTH - 8));
950 for (y = 0; y <
height; y++) {
951 for (x = 0; x <
width; x++)
953 src += srcstride;
954 dst += dststride;
955 }
956 }
957
959 int16_t *src2,
960 int height,
int denom,
int wx0,
int wx1,
961 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
962 {
965 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
967 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
968
970
972 int log2Wd = denom + shift - 1;
973
976 for (y = 0; y <
height; y++) {
977 for (x = 0; x <
width; x++)
979 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
980 src += srcstride;
981 dst += dststride;
983 }
984 }
985
987 uint8_t *_src, ptrdiff_t _srcstride,
988 int height,
int denom,
int wx,
int ox,
989 intptr_t mx, intptr_t my,
int width)
990 {
994 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
996 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
998 int16_t *tmp = tmp_array;
1000 #if BIT_DEPTH < 14
1001 int offset = 1 << (shift - 1);
1002 #else
1003 int offset = 0;
1004 #endif
1005
1009 for (x = 0; x <
width; x++)
1011 src += srcstride;
1013 }
1014
1017
1018 ox = ox * (1 << (BIT_DEPTH - 8));
1019 for (y = 0; y <
height; y++) {
1020 for (x = 0; x <
width; x++)
1023 dst += dststride;
1024 }
1025 }
1026
1028 int16_t *src2,
1029 int height,
int denom,
int wx0,
int wx1,
1030 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
1031 {
1035 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1037 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1039 int16_t *tmp = tmp_array;
1041 int log2Wd = denom + shift - 1;
1042
1046 for (x = 0; x <
width; x++)
1048 src += srcstride;
1050 }
1051
1054
1057 for (y = 0; y <
height; y++) {
1058 for (x = 0; x <
width; x++)
1060 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1062 dst += dststride;
1064 }
1065 }
1066
1067 ////////////////////////////////////////////////////////////////////////////////
1068 //
1069 ////////////////////////////////////////////////////////////////////////////////
1070 #define EPEL_FILTER(src, stride) \
1071 (filter[0] * src[x - stride] + \
1072 filter[1] * src[x] + \
1073 filter[2] * src[x + stride] + \
1074 filter[3] * src[x + 2 * stride])
1075
1077 uint8_t *_src, ptrdiff_t _srcstride,
1079 {
1082 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1084 for (y = 0; y <
height; y++) {
1085 for (x = 0; x <
width; x++)
1087 src += srcstride;
1089 }
1090 }
1091
1093 uint8_t *_src, ptrdiff_t _srcstride,
1095 {
1098 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1100
1101 for (y = 0; y <
height; y++) {
1102 for (x = 0; x <
width; x++)
1104 src += srcstride;
1106 }
1107 }
1108
1110 uint8_t *_src, ptrdiff_t _srcstride,
1112 {
1115 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1118 int16_t *tmp = tmp_array;
1119
1121
1123 for (x = 0; x <
width; x++)
1125 src += srcstride;
1127 }
1128
1131
1132 for (y = 0; y <
height; y++) {
1133 for (x = 0; x <
width; x++)
1137 }
1138 }
1139
1142 {
1145 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1147 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1150 #if BIT_DEPTH < 14
1151 int offset = 1 << (shift - 1);
1152 #else
1153 int offset = 0;
1154 #endif
1155
1156 for (y = 0; y <
height; y++) {
1157 for (x = 0; x <
width; x++)
1159 src += srcstride;
1160 dst += dststride;
1161 }
1162 }
1163
1165 int16_t *src2,
1167 {
1170 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1172 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1175 #if BIT_DEPTH < 14
1176 int offset = 1 << (shift - 1);
1177 #else
1178 int offset = 0;
1179 #endif
1180
1181 for (y = 0; y <
height; y++) {
1182 for (x = 0; x <
width; x++) {
1184 }
1185 dst += dststride;
1186 src += srcstride;
1188 }
1189 }
1190
1193 {
1196 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1198 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1201 #if BIT_DEPTH < 14
1202 int offset = 1 << (shift - 1);
1203 #else
1204 int offset = 0;
1205 #endif
1206
1207 for (y = 0; y <
height; y++) {
1208 for (x = 0; x <
width; x++)
1210 src += srcstride;
1211 dst += dststride;
1212 }
1213 }
1214
1216 int16_t *src2,
1218 {
1221 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1224 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1226 #if BIT_DEPTH < 14
1227 int offset = 1 << (shift - 1);
1228 #else
1229 int offset = 0;
1230 #endif
1231
1232 for (y = 0; y <
height; y++) {
1233 for (x = 0; x <
width; x++)
1235 dst += dststride;
1236 src += srcstride;
1238 }
1239 }
1240
1243 {
1246 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1248 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1251 int16_t *tmp = tmp_array;
1253 #if BIT_DEPTH < 14
1254 int offset = 1 << (shift - 1);
1255 #else
1256 int offset = 0;
1257 #endif
1258
1260
1262 for (x = 0; x <
width; x++)
1264 src += srcstride;
1266 }
1267
1270
1271 for (y = 0; y <
height; y++) {
1272 for (x = 0; x <
width; x++)
1275 dst += dststride;
1276 }
1277 }
1278
1280 int16_t *src2,
1282 {
1285 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1287 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1290 int16_t *tmp = tmp_array;
1292 #if BIT_DEPTH < 14
1293 int offset = 1 << (shift - 1);
1294 #else
1295 int offset = 0;
1296 #endif
1297
1299
1301 for (x = 0; x <
width; x++)
1303 src += srcstride;
1305 }
1306
1309
1310 for (y = 0; y <
height; y++) {
1311 for (x = 0; x <
width; x++)
1314 dst += dststride;
1316 }
1317 }
1318
1320 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width)
1321 {
1324 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1326 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1329 #if BIT_DEPTH < 14
1330 int offset = 1 << (shift - 1);
1331 #else
1332 int offset = 0;
1333 #endif
1334
1335 ox = ox * (1 << (BIT_DEPTH - 8));
1336 for (y = 0; y <
height; y++) {
1337 for (x = 0; x <
width; x++) {
1339 }
1340 dst += dststride;
1341 src += srcstride;
1342 }
1343 }
1344
1346 int16_t *src2,
1347 int height,
int denom,
int wx0,
int wx1,
1348 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
1349 {
1352 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1354 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1357 int log2Wd = denom + shift - 1;
1358
1361 for (y = 0; y <
height; y++) {
1362 for (x = 0; x <
width; x++)
1364 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1365 src += srcstride;
1366 dst += dststride;
1368 }
1369 }
1370
1372 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width)
1373 {
1376 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1378 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1381 #if BIT_DEPTH < 14
1382 int offset = 1 << (shift - 1);
1383 #else
1384 int offset = 0;
1385 #endif
1386
1387 ox = ox * (1 << (BIT_DEPTH - 8));
1388 for (y = 0; y <
height; y++) {
1389 for (x = 0; x <
width; x++) {
1391 }
1392 dst += dststride;
1393 src += srcstride;
1394 }
1395 }
1396
1398 int16_t *src2,
1399 int height,
int denom,
int wx0,
int wx1,
1400 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
1401 {
1404 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1407 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1409 int log2Wd = denom + shift - 1;
1410
1413 for (y = 0; y <
height; y++) {
1414 for (x = 0; x <
width; x++)
1416 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1417 src += srcstride;
1418 dst += dststride;
1420 }
1421 }
1422
1424 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width)
1425 {
1428 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1430 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1433 int16_t *tmp = tmp_array;
1435 #if BIT_DEPTH < 14
1436 int offset = 1 << (shift - 1);
1437 #else
1438 int offset = 0;
1439 #endif
1440
1442
1444 for (x = 0; x <
width; x++)
1446 src += srcstride;
1448 }
1449
1452
1453 ox = ox * (1 << (BIT_DEPTH - 8));
1454 for (y = 0; y <
height; y++) {
1455 for (x = 0; x <
width; x++)
1458 dst += dststride;
1459 }
1460 }
1461
1463 int16_t *src2,
1464 int height,
int denom,
int wx0,
int wx1,
1465 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width)
1466 {
1469 ptrdiff_t srcstride = _srcstride /
sizeof(
pixel);
1471 ptrdiff_t dststride = _dststride /
sizeof(
pixel);
1474 int16_t *tmp = tmp_array;
1476 int log2Wd = denom + shift - 1;
1477
1479
1481 for (x = 0; x <
width; x++)
1483 src += srcstride;
1485 }
1486
1489
1492 for (y = 0; y <
height; y++) {
1493 for (x = 0; x <
width; x++)
1495 ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1497 dst += dststride;
1499 }
1500 }// line zero
1501 #define P3 pix[-4 * xstride]
1502 #define P2 pix[-3 * xstride]
1503 #define P1 pix[-2 * xstride]
1504 #define P0 pix[-1 * xstride]
1505 #define Q0 pix[0 * xstride]
1506 #define Q1 pix[1 * xstride]
1507 #define Q2 pix[2 * xstride]
1508 #define Q3 pix[3 * xstride]
1509
1510 // line three. used only for deblocking decision
1511 #define TP3 pix[-4 * xstride + 3 * ystride]
1512 #define TP2 pix[-3 * xstride + 3 * ystride]
1513 #define TP1 pix[-2 * xstride + 3 * ystride]
1514 #define TP0 pix[-1 * xstride + 3 * ystride]
1515 #define TQ0 pix[0 * xstride + 3 * ystride]
1516 #define TQ1 pix[1 * xstride + 3 * ystride]
1517 #define TQ2 pix[2 * xstride + 3 * ystride]
1518 #define TQ3 pix[3 * xstride + 3 * ystride]
1519
1521 ptrdiff_t _xstride, ptrdiff_t _ystride,
1522 int beta, int *_tc,
1524 {
1525 int d, j;
1527 ptrdiff_t xstride = _xstride /
sizeof(
pixel);
1528 ptrdiff_t ystride = _ystride /
sizeof(
pixel);
1529
1531
1532 for (j = 0; j < 2; j++) {
1533 const int dp0 = abs(
P2 - 2 *
P1 +
P0);
1534 const int dq0 = abs(
Q2 - 2 *
Q1 +
Q0);
1535 const int dp3 = abs(
TP2 - 2 *
TP1 +
TP0);
1536 const int dq3 = abs(
TQ2 - 2 *
TQ1 +
TQ0);
1537 const int d0 = dp0 + dq0;
1538 const int d3 = dp3 + dq3;
1540 const int no_p = _no_p[j];
1541 const int no_q = _no_q[j];
1542
1543 if (d0 + d3 >= beta) {
1544 pix += 4 * ystride;
1545 continue;
1546 } else {
1547 const int beta_3 = beta >> 3;
1548 const int beta_2 = beta >> 2;
1549 const int tc25 = ((tc * 5 + 1) >> 1);
1550
1551 if (abs(
P3 -
P0) + abs(
Q3 -
Q0) < beta_3 && abs(
P0 -
Q0) < tc25 &&
1553 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1554 // strong filtering
1555 const int tc2 = tc << 1;
1556 for (d = 0; d < 4; d++) {
1565 if (!no_p) {
1566 P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1567 P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1568 P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1569 }
1570 if (!no_q) {
1571 Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1572 Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1573 Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1574 }
1575 pix += ystride;
1576 }
1577 } else { // normal filtering
1578 int nd_p = 1;
1579 int nd_q = 1;
1580 const int tc_2 = tc >> 1;
1581 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1582 nd_p = 2;
1583 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1584 nd_q = 2;
1585
1586 for (d = 0; d < 4; d++) {
1593 int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1594 if (abs(delta0) < 10 *
tc) {
1595 delta0 = av_clip(delta0, -tc, tc);
1596 if (!no_p)
1598 if (!no_q)
1600 if (!no_p && nd_p > 1) {
1601 const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1603 }
1604 if (!no_q && nd_q > 1) {
1605 const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1607 }
1608 }
1609 pix += ystride;
1610 }
1611 }
1612 }
1613 }
1614 }
1615
1617 ptrdiff_t _ystride, int *_tc,
1619 {
1620 int d, j, no_p, no_q;
1622 ptrdiff_t xstride = _xstride /
sizeof(
pixel);
1623 ptrdiff_t ystride = _ystride /
sizeof(
pixel);
1624
1625 for (j = 0; j < 2; j++) {
1627 if (tc <= 0) {
1628 pix += 4 * ystride;
1629 continue;
1630 }
1631 no_p = _no_p[j];
1632 no_q = _no_q[j];
1633
1634 for (d = 0; d < 4; d++) {
1635 int delta0;
1640 delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1641 if (!no_p)
1643 if (!no_q)
1645 pix += ystride;
1646 }
1647 }
1648 }
1649
1653 {
1655 }
1656
1660 {
1662 }
1663
1667 {
1669 beta, tc, no_p, no_q);
1670 }
1671
1675 {
1677 beta, tc, no_p, no_q);
1678 }
1679
1680 #undef P3
1681 #undef P2
1682 #undef P1
1683 #undef P0
1684 #undef Q0
1685 #undef Q1
1686 #undef Q2
1687 #undef Q3
1688
1689 #undef TP3
1690 #undef TP2
1691 #undef TP1
1692 #undef TP0
1693 #undef TQ0
1694 #undef TQ1
1695 #undef TQ2
1696 #undef TQ3