1 /*
2 * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <string.h>
24
32 #include "config.h"
35
37 { 36, 68, 60, 92, 34, 66, 58, 90, },
38 { 100, 4, 124, 28, 98, 2, 122, 26, },
39 { 52, 84, 44, 76, 50, 82, 42, 74, },
40 { 116, 20, 108, 12, 114, 18, 106, 10, },
41 { 32, 64, 56, 88, 38, 70, 62, 94, },
42 { 96, 0, 120, 24, 102, 6, 126, 30, },
43 { 48, 80, 40, 72, 54, 86, 46, 78, },
44 { 112, 16, 104, 8, 118, 22, 110, 14, },
45 { 36, 68, 60, 92, 34, 66, 58, 90, },
46 };
47
49 64, 64, 64, 64, 64, 64, 64, 64
50 };
51
54 {
56 uint8_t *ptr = plane +
stride * y;
60 }
61 }
62
64 const uint8_t *_src,
const int16_t *
filter,
65 const int32_t *filterPos,
int filterSize)
66 {
70 const uint16_t *
src = (
const uint16_t *) _src;
73
75 sh = 9;
77 sh = 16 - 1 - 4;
78 }
79
80 for (
i = 0;
i < dstW;
i++) {
81 int j;
82 int srcPos = filterPos[
i];
84
85 for (j = 0; j < filterSize; j++) {
87 }
88 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
90 }
91 }
92
94 const uint8_t *_src,
const int16_t *
filter,
95 const int32_t *filterPos,
int filterSize)
96 {
99 const uint16_t *
src = (
const uint16_t *) _src;
100 int sh =
desc->comp[0].depth - 1;
101
102 if (sh<15) {
105 sh = 16 - 1;
106 }
107
108 for (
i = 0;
i < dstW;
i++) {
109 int j;
110 int srcPos = filterPos[
i];
112
113 for (j = 0; j < filterSize; j++) {
115 }
116 // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
117 dst[
i] =
FFMIN(
val >> sh, (1 << 15) - 1);
118 }
119 }
120
121 // bilinear / bicubic scaling
123 const uint8_t *
src,
const int16_t *
filter,
124 const int32_t *filterPos,
int filterSize)
125 {
127 for (
i = 0;
i < dstW;
i++) {
128 int j;
129 int srcPos = filterPos[
i];
131 for (j = 0; j < filterSize; j++) {
133 }
134 dst[
i] =
FFMIN(
val >> 7, (1 << 15) - 1);
// the cubic equation does overflow ...
135 }
136 }
137
139 const uint8_t *
src,
const int16_t *
filter,
140 const int32_t *filterPos,
int filterSize)
141 {
144 for (
i = 0;
i < dstW;
i++) {
145 int j;
146 int srcPos = filterPos[
i];
148 for (j = 0; j < filterSize; j++) {
150 }
151 dst[
i] =
FFMIN(
val >> 3, (1 << 19) - 1);
// the cubic equation does overflow ...
152 }
153 }
154
155 // FIXME all pal and rgb srcFormats could do this conversion as well
156 // FIXME all scalers more complex than bilinear could do half of this transform
158 {
161 dstU[
i] = (
FFMIN(dstU[
i], 30775) * 4663 - 9289992) >> 12;
// -264
162 dstV[
i] = (
FFMIN(dstV[
i], 30775) * 4663 - 9289992) >> 12;
// -264
163 }
164 }
165
167 {
170 dstU[
i] = (dstU[
i] * 1799 + 4081085) >> 11;
// 1469
171 dstV[
i] = (dstV[
i] * 1799 + 4081085) >> 11;
// 1469
172 }
173 }
174
176 {
179 dst[
i] = (
FFMIN(dst[
i], 30189) * 19077 - 39057361) >> 14;
180 }
181
183 {
186 dst[
i] = (dst[
i] * 14071 + 33561947) >> 14;
187 }
188
190 {
195 dstU[
i] = (
FFMIN(dstU[
i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
// -264
196 dstV[
i] = (
FFMIN(dstV[
i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
// -264
197 }
198 }
199
201 {
206 dstU[
i] = (dstU[
i] * 1799 + (4081085 << 4)) >> 11;
// 1469
207 dstV[
i] = (dstV[
i] * 1799 + (4081085 << 4)) >> 11;
// 1469
208 }
209 }
210
212 {
216 dst[
i] = ((
int)(
FFMIN(dst[
i], 30189 << 4) * 4769
U - (39057361 << 2))) >> 12;
217 }
218 }
219
221 {
225 dst[
i] = (dst[
i]*(14071/4) + (33561947<<4)/4)>>12;
226 }
227
228
229 #define DEBUG_SWSCALE_BUFFERS 0
230 #define DEBUG_BUFFERS(...) \
231 if (DEBUG_SWSCALE_BUFFERS) \
232 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
233
235 int srcStride[],
int srcSliceY,
int srcSliceH,
236 uint8_t *dst[], int dstStride[],
237 int dstSliceY, int dstSliceH)
238 {
239 const int scale_dst = dstSliceY > 0 || dstSliceH <
c->dstH;
240
241 /* load a few things into local vars to make the code more readable?
242 * and faster */
243 const int dstW =
c->dstW;
245
247 const int flags =
c->flags;
248 int32_t *vLumFilterPos =
c->vLumFilterPos;
249 int32_t *vChrFilterPos =
c->vChrFilterPos;
250
251 const int vLumFilterSize =
c->vLumFilterSize;
252 const int vChrFilterSize =
c->vChrFilterSize;
253
261 const int chrSrcSliceY = srcSliceY >>
c->chrSrcVSubSample;
263 int should_dither =
isNBPS(
c->srcFormat) ||
265 int lastDstY;
266
267 /* vars which will change and which we need to store back in the context */
269 int lastInLumBuf =
c->lastInLumBuf;
270 int lastInChrBuf =
c->lastInChrBuf;
271
272 int lumStart = 0;
273 int lumEnd =
c->descIndex[0];
274 int chrStart = lumEnd;
275 int chrEnd =
c->descIndex[1];
276 int vStart = chrEnd;
277 int vEnd =
c->numDesc;
278 SwsSlice *src_slice = &
c->slice[lumStart];
279 SwsSlice *hout_slice = &
c->slice[
c->numSlice-2];
280 SwsSlice *vout_slice = &
c->slice[
c->numSlice-1];
282
283 int needAlpha =
c->needAlpha;
284
285 int hasLumHoles = 1;
286 int hasChrHoles = 1;
287
292 srcStride[1] =
293 srcStride[2] =
294 srcStride[3] = srcStride[0];
295 }
296 srcStride[1] *= 1 <<
c->vChrDrop;
297 srcStride[2] *= 1 <<
c->vChrDrop;
298
299 DEBUG_BUFFERS(
"swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
300 src[0], srcStride[0],
src[1], srcStride[1],
301 src[2], srcStride[2],
src[3], srcStride[3],
302 dst[0], dstStride[0], dst[1], dstStride[1],
303 dst[2], dstStride[2], dst[3], dstStride[3]);
304 DEBUG_BUFFERS(
"srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
307 vLumFilterSize, vChrFilterSize);
308
309 if (dstStride[0]&15 || dstStride[1]&15 ||
310 dstStride[2]&15 || dstStride[3]&15) {
315 "Warning: dstStride is not aligned!\n"
316 " ->cannot do aligned memory accesses anymore\n");
317 }
318 }
319
320 #if ARCH_X86
321 if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
322 || (uintptr_t)
src[0]&15 || (uintptr_t)
src[1]&15 || (uintptr_t)
src[2]&15
323 || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
324 || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
325 ) {
331 }
332 }
333 #endif
334
335 if (scale_dst) {
336 dstY = dstSliceY;
337 dstH = dstY + dstSliceH;
338 lastInLumBuf = -1;
339 lastInChrBuf = -1;
340 } else if (srcSliceY == 0) {
341 /* Note the user might start scaling the picture in the middle so this
342 * will not get executed. This is not really intended but works
343 * currently, so people might do it. */
344 dstY = 0;
345 lastInLumBuf = -1;
346 lastInChrBuf = -1;
347 }
348
349 if (!should_dither) {
351 }
352 lastDstY = dstY;
353
355 yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX,
c->use_mmx_vfilter);
356
358 srcSliceY,
srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
359
361 dstY, dstSliceH, dstY >>
c->chrDstVSubSample,
363 if (srcSliceY == 0) {
368
373 hout_slice->
width = dstW;
374 }
375
376 for (; dstY < dstH; dstY++) {
377 const int chrDstY = dstY >>
c->chrDstVSubSample;
378 int use_mmx_vfilter=
c->use_mmx_vfilter;
379
380 // First line needed as input
381 const int firstLumSrcY =
FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
382 const int firstLumSrcY2 =
FFMAX(1 - vLumFilterSize, vLumFilterPos[
FFMIN(dstY | ((1 <<
c->chrDstVSubSample) - 1),
c->dstH - 1)]);
383 // First line needed as input
384 const int firstChrSrcY =
FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
385
386 // Last line needed as input
387 int lastLumSrcY =
FFMIN(
c->srcH, firstLumSrcY + vLumFilterSize) - 1;
388 int lastLumSrcY2 =
FFMIN(
c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
389 int lastChrSrcY =
FFMIN(
c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
390 int enough_lines;
391
393 int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
394
395 // handle holes (FAST_BILINEAR & weird filters)
396 if (firstLumSrcY > lastInLumBuf) {
397
398 hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
399 if (hasLumHoles) {
404 }
405
406 lastInLumBuf = firstLumSrcY - 1;
407 }
408 if (firstChrSrcY > lastInChrBuf) {
409
410 hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
411 if (hasChrHoles) {
416 }
417
418 lastInChrBuf = firstChrSrcY - 1;
419 }
420
422 DEBUG_BUFFERS(
"\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
423 firstLumSrcY, lastLumSrcY, lastInLumBuf);
424 DEBUG_BUFFERS(
"\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
425 firstChrSrcY, lastChrSrcY, lastInChrBuf);
426
427 // Do we have enough lines in this slice to output the dstY line
428 enough_lines = lastLumSrcY2 < srcSliceY +
srcSliceH &&
430
431 if (!enough_lines) {
433 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
434 DEBUG_BUFFERS(
"buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
435 lastLumSrcY, lastChrSrcY);
436 }
437
440
441
443 if (posY <= lastLumSrcY && !hasLumHoles) {
444 firstPosY =
FFMAX(firstLumSrcY, posY);
446 } else {
447 firstPosY = posY;
448 lastPosY = lastLumSrcY;
449 }
450
452 if (cPosY <= lastChrSrcY && !hasChrHoles) {
453 firstCPosY =
FFMAX(firstChrSrcY, cPosY);
455 } else {
456 firstCPosY = cPosY;
457 lastCPosY = lastChrSrcY;
458 }
459
461
462 if (posY < lastLumSrcY + 1) {
463 for (
i = lumStart;
i < lumEnd; ++
i)
465 }
466
467 lastInLumBuf = lastLumSrcY;
468
469 if (cPosY < lastChrSrcY + 1) {
470 for (
i = chrStart;
i < chrEnd; ++
i)
472 }
473
474 lastInChrBuf = lastChrSrcY;
475
476 if (!enough_lines)
477 break; // we can't output a dstY line so let's try with the next slice
478
479 #if HAVE_MMX_INLINE
481 #endif
482 if (should_dither) {
485 }
486 if (dstY >=
c->dstH - 2) {
487 /* hmm looks like we can't use MMX here without overwriting
488 * this array's tail */
490 &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
491 use_mmx_vfilter= 0;
493 yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
494 }
495
496 for (
i = vStart;
i < vEnd; ++
i)
498 }
500 int offset = lastDstY - dstSliceY;
501 int length = dstW;
502 int height = dstY - lastDstY;
503
507 1,
desc->comp[3].depth,
509 }
else if (
is32BPS(dstFormat)) {
512 1,
desc->comp[3].depth,
514 } else
516 }
517
518 #if HAVE_MMXEXT_INLINE
520 __asm__ volatile (
"sfence" :::
"memory");
521 #endif
522 emms_c();
523
524 /* store changed local vars back in the context */
526 c->lastInLumBuf = lastInLumBuf;
527 c->lastInChrBuf = lastInChrBuf;
528
529 return dstY - lastDstY;
530 }
531
533 {
534 c->lumConvertRange =
NULL;
535 c->chrConvertRange =
NULL;
536 if (
c->srcRange !=
c->dstRange && !
isAnyRGB(
c->dstFormat)) {
537 if (
c->dstBpc <= 14) {
541 } else {
544 }
545 } else {
549 } else {
552 }
553 }
554 }
555 }
556
558 {
560
562 &
c->yuv2nv12cX, &
c->yuv2packed1,
563 &
c->yuv2packed2, &
c->yuv2packedX, &
c->yuv2anyX);
564
566
567 if (
c->srcBpc == 8) {
568 if (
c->dstBpc <= 14) {
573 }
574 } else {
576 }
577 } else {
580 }
581
583
586 c->needs_hcscale = 1;
587 }
588
590 {
592
593 #if ARCH_PPC
595 #elif ARCH_X86
597 #elif ARCH_AARCH64
599 #elif ARCH_ARM
601 #elif ARCH_LOONGARCH64
603 #endif
604 }
605
607 {
612
615 }
616 }
617
619 const int linesizes[4])
620 {
623
625
626 for (
i = 0;
i < 4;
i++) {
627 int plane =
desc->comp[
i].plane;
628 if (!
data[plane] || !linesizes[plane])
629 return 0;
630 }
631
632 return 1;
633 }
634
637 {
638 int xp,yp;
640
641 for (yp=0; yp<
h; yp++) {
642 for (xp=0; xp+2<
stride; xp+=3) {
643 int x, y, z,
r,
g,
b;
644
649 } else {
653 }
654
655 x =
c->xyzgamma[x>>4];
656 y =
c->xyzgamma[y>>4];
657 z =
c->xyzgamma[z>>4];
658
659 // convert from XYZlinear to sRGBlinear
660 r =
c->xyz2rgb_matrix[0][0] * x +
661 c->xyz2rgb_matrix[0][1] * y +
662 c->xyz2rgb_matrix[0][2] * z >> 12;
663 g =
c->xyz2rgb_matrix[1][0] * x +
664 c->xyz2rgb_matrix[1][1] * y +
665 c->xyz2rgb_matrix[1][2] * z >> 12;
666 b =
c->xyz2rgb_matrix[2][0] * x +
667 c->xyz2rgb_matrix[2][1] * y +
668 c->xyz2rgb_matrix[2][2] * z >> 12;
669
670 // limit values to 12-bit depth
674
675 // convert from sRGBlinear to RGB and scale from 12bit to 16bit
677 AV_WB16(dst + xp + 0,
c->rgbgamma[
r] << 4);
678 AV_WB16(dst + xp + 1,
c->rgbgamma[
g] << 4);
679 AV_WB16(dst + xp + 2,
c->rgbgamma[
b] << 4);
680 } else {
681 AV_WL16(dst + xp + 0,
c->rgbgamma[
r] << 4);
682 AV_WL16(dst + xp + 1,
c->rgbgamma[
g] << 4);
683 AV_WL16(dst + xp + 2,
c->rgbgamma[
b] << 4);
684 }
685 }
688 }
689 }
690
693 {
694 int xp,yp;
696
697 for (yp=0; yp<
h; yp++) {
698 for (xp=0; xp+2<
stride; xp+=3) {
699 int x, y, z,
r,
g,
b;
700
705 } else {
709 }
710
711 r =
c->rgbgammainv[
r>>4];
712 g =
c->rgbgammainv[
g>>4];
713 b =
c->rgbgammainv[
b>>4];
714
715 // convert from sRGBlinear to XYZlinear
716 x =
c->rgb2xyz_matrix[0][0] *
r +
717 c->rgb2xyz_matrix[0][1] *
g +
718 c->rgb2xyz_matrix[0][2] *
b >> 12;
719 y =
c->rgb2xyz_matrix[1][0] *
r +
720 c->rgb2xyz_matrix[1][1] *
g +
721 c->rgb2xyz_matrix[1][2] *
b >> 12;
722 z =
c->rgb2xyz_matrix[2][0] *
r +
723 c->rgb2xyz_matrix[2][1] *
g +
724 c->rgb2xyz_matrix[2][2] *
b >> 12;
725
726 // limit values to 12-bit depth
730
731 // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
733 AV_WB16(dst + xp + 0,
c->xyzgammainv[x] << 4);
734 AV_WB16(dst + xp + 1,
c->xyzgammainv[y] << 4);
735 AV_WB16(dst + xp + 2,
c->xyzgammainv[z] << 4);
736 } else {
737 AV_WL16(dst + xp + 0,
c->xyzgammainv[x] << 4);
738 AV_WL16(dst + xp + 1,
c->xyzgammainv[y] << 4);
739 AV_WL16(dst + xp + 2,
c->xyzgammainv[z] << 4);
740 }
741 }
744 }
745 }
746
748 {
749 for (
int i = 0;
i < 256;
i++) {
750 int r,
g,
b, y,
u, v,
a = 0xff;
753 a = (p >> 24) & 0xFF;
754 r = (p >> 16) & 0xFF;
759 g = ((
i >> 2) & 7) * 36;
763 g = ((
i >> 3) & 7) * 36;
766 r = (
i >> 3 ) * 255;
767 g = ((
i >> 1) & 3) * 85;
771 } else {
773 b = (
i >> 3 ) * 255;
774 g = ((
i >> 1) & 3) * 85;
776 }
777 #define RGB2YUV_SHIFT 15
778 #define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
779 #define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
780 #define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
781 #define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
782 #define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
783 #define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
784 #define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
785 #define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
786 #define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
787
791 c->pal_yuv[
i]= y + (
u<<8) + (v<<16) + ((unsigned)
a<<24);
792
793 switch (
c->dstFormat) {
795 #if !HAVE_BIGENDIAN
797 #endif
798 c->pal_rgb[
i]=
r + (
g<<8) + (
b<<16) + ((unsigned)
a<<24);
799 break;
801 #if HAVE_BIGENDIAN
803 #endif
804 c->pal_rgb[
i]=
a + (
r<<8) + (
g<<16) + ((unsigned)
b<<24);
805 break;
807 #if HAVE_BIGENDIAN
809 #endif
810 c->pal_rgb[
i]=
a + (
b<<8) + (
g<<16) + ((unsigned)
r<<24);
811 break;
813 #if !HAVE_BIGENDIAN
815 #endif
816 default:
817 c->pal_rgb[
i]=
b + (
g<<8) + (
r<<16) + ((unsigned)
a<<24);
818 }
819 }
820 }
821
823 const uint8_t * const srcSlice[], const int srcStride[],
825 uint8_t *const dstSlice[], const int dstStride[],
826 int dstSliceY, int dstSliceH);
827
829 const uint8_t * const srcSlice[], const int srcStride[],
831 uint8_t * const dstSlice[], const int dstStride[],
832 int dstSliceY, int dstSliceH)
833 {
835 srcSlice, srcStride, srcSliceY,
srcSliceH,
836 c->cascaded_tmp,
c->cascaded_tmpStride, 0,
c->srcH);
837
840
841 if (
c->cascaded_context[2])
844 c->cascaded1_tmp,
c->cascaded1_tmpStride, 0,
c->dstH);
845 else
848 dstSlice, dstStride, dstSliceY, dstSliceH);
849
852
853 if (
c->cascaded_context[2]) {
855 c->cascaded1_tmpStride,
c->cascaded_context[1]->dstY -
ret,
856 c->cascaded_context[1]->dstY,
857 dstSlice, dstStride, dstSliceY, dstSliceH);
858 }
860 }
861
863 const uint8_t * const srcSlice[], const int srcStride[],
865 uint8_t * const dstSlice[], const int dstStride[],
866 int dstSliceY, int dstSliceH)
867 {
869 srcSlice, srcStride, srcSliceY,
srcSliceH,
870 c->cascaded_tmp,
c->cascaded_tmpStride,
871 0,
c->cascaded_context[0]->dstH);
875 (
const uint8_t *
const * )
c->cascaded_tmp,
c->cascaded_tmpStride,
876 0,
c->cascaded_context[0]->dstH,
877 dstSlice, dstStride, dstSliceY, dstSliceH);
879 }
880
882 const uint8_t * const srcSlice[], const int srcStride[],
884 uint8_t *const dstSlice[], const int dstStride[],
885 int dstSliceY, int dstSliceH)
886 {
887 const int scale_dst = dstSliceY > 0 || dstSliceH <
c->dstH;
890 const uint8_t *
src2[4];
891 uint8_t *dst2[4];
892 int macro_height_src =
isBayer(
c->srcFormat) ? 2 : (1 <<
c->chrSrcVSubSample);
893 int macro_height_dst =
isBayer(
c->dstFormat) ? 2 : (1 <<
c->chrDstVSubSample);
894 // copy strides, so they can safely be modified
895 int srcStride2[4];
896 int dstStride2[4];
897 int srcSliceY_internal = srcSliceY;
898
899 if (!srcStride || !dstStride || !dstSlice || !srcSlice) {
900 av_log(
c,
AV_LOG_ERROR,
"One of the input parameters to sws_scale() is NULL, please check the calling code\n");
902 }
903
904 if ((srcSliceY & (macro_height_src - 1)) ||
909 }
910
911 if ((dstSliceY & (macro_height_dst - 1)) ||
912 ((dstSliceH & (macro_height_dst - 1)) && dstSliceY + dstSliceH !=
c->dstH) ||
913 dstSliceY + dstSliceH >
c->dstH) {
916 }
917
921 }
925 }
926
927 // do not mess up sliceDir if we have a "trailing" 0-size slice
929 return 0;
930
931 if (
c->gamma_flag &&
c->cascaded_context[0])
933 dstSlice, dstStride, dstSliceY, dstSliceH);
934
935 if (
c->cascaded_context[0] && srcSliceY == 0 &&
srcSliceH ==
c->cascaded_context[0]->srcH)
937 dstSlice, dstStride, dstSliceY, dstSliceH);
938
940 for (
i = 0;
i < 4;
i++)
941 memset(
c->dither_error[
i], 0,
sizeof(
c->dither_error[0][0]) * (
c->dstW+2));
942
945
946 memcpy(
src2, srcSlice,
sizeof(
src2));
947 memcpy(dst2, dstSlice, sizeof(dst2));
948 memcpy(srcStride2, srcStride, sizeof(srcStride2));
949 memcpy(dstStride2, dstStride, sizeof(dstStride2));
950
952 if (srcSliceY != 0 && srcSliceY +
srcSliceH !=
c->srcH) {
955 }
956
957 c->sliceDir = (srcSliceY == 0) ? 1 : -1;
958 } else if (scale_dst)
960
961 if (
c->src0Alpha && !
c->dst0Alpha &&
isALPHA(
c->dstFormat)) {
963 int x,y;
964
967 if (!
c->rgb0_scratch)
969
970 base = srcStride[0] < 0 ?
c->rgb0_scratch - srcStride[0] * (
srcSliceH-1) :
973 memcpy(
base + srcStride[0]*y,
src2[0] + srcStride[0]*y, 4*
c->srcW);
974 for (x=
c->src0Alpha-1; x<4*c->srcW; x+=4) {
975 base[ srcStride[0]*y + x] = 0xFF;
976 }
977 }
979 }
980
981 if (
c->srcXYZ && !(
c->dstXYZ &&
c->srcW==
c->dstW &&
c->srcH==
c->dstH)) {
983
988
989 base = srcStride[0] < 0 ?
c->xyz_scratch - srcStride[0] * (
srcSliceH-1) :
991
994 }
995
996 if (
c->sliceDir != 1) {
997 // slices go from bottom to top => we flip the image internally
998 for (
i=0;
i<4;
i++) {
1000 dstStride2[
i] *= -1;
1001 }
1002
1005 src2[1] += ((
srcSliceH >>
c->chrSrcVSubSample) - 1) * srcStride[1];
1006 src2[2] += ((
srcSliceH >>
c->chrSrcVSubSample) - 1) * srcStride[2];
1008 dst2[0] += (
c->dstH - 1) * dstStride[0];
1009 dst2[1] += ((
c->dstH >>
c->chrDstVSubSample) - 1) * dstStride[1];
1010 dst2[2] += ((
c->dstH >>
c->chrDstVSubSample) - 1) * dstStride[2];
1011 dst2[3] += (
c->dstH - 1) * dstStride[3];
1012
1013 srcSliceY_internal =
c->srcH-srcSliceY-
srcSliceH;
1014 }
1017
1018 if (
c->convert_unscaled) {
1019 int offset = srcSliceY_internal;
1021
1022 // for dst slice scaling, offset the pointers to match the unscaled API
1023 if (scale_dst) {
1025 for (
i = 0;
i < 4 &&
src2[
i];
i++) {
1027 break;
1028 src2[
i] += (dstSliceY >> ((
i == 1 ||
i == 2) ?
c->chrSrcVSubSample : 0)) * srcStride2[
i];
1029 }
1030
1031 for (
i = 0;
i < 4 && dst2[
i];
i++) {
1032 if (!dst2[
i] || (
i > 0 &&
usePal(
c->dstFormat)))
1033 break;
1034 dst2[
i] -= (dstSliceY >> ((
i == 1 ||
i == 2) ?
c->chrDstVSubSample : 0)) * dstStride2[
i];
1035 }
1037 slice_h = dstSliceH;
1038 }
1039
1041 dst2, dstStride2);
1042 if (scale_dst)
1043 dst2[0] += dstSliceY * dstStride2[0];
1044 } else {
1046 dst2, dstStride2, dstSliceY, dstSliceH);
1047 }
1048
1049 if (
c->dstXYZ && !(
c->srcXYZ &&
c->srcW==
c->dstW &&
c->srcH==
c->dstH)) {
1050 uint16_t *dst16;
1051
1052 if (scale_dst) {
1053 dst16 = (uint16_t *)dst2[0];
1054 } else {
1055 int dstY =
c->dstY ?
c->dstY : srcSliceY +
srcSliceH;
1056
1060 dst16 = (uint16_t*)(dst2[0] + (dstY -
ret) * dstStride2[0]);
1061 }
1062
1063 /* replace on the same data */
1065 }
1066
1067 /* reset slice direction at end of frame */
1068 if ((srcSliceY_internal +
srcSliceH ==
c->srcH) || scale_dst)
1070
1072 }
1073
1075 {
1078 c->src_ranges.nb_ranges = 0;
1079 }
1080
1082 {
1083 int ret, allocated = 0;
1084
1088
1093
1097 allocated = 1;
1098 }
1099
1102 if (allocated)
1104
1106 }
1107
1108 return 0;
1109 }
1110
1112 unsigned int slice_height)
1113 {
1115
1119
1120 return 0;
1121 }
1122
1124 {
1126 return c->slice_ctx[0]->dst_slice_align;
1127
1128 return c->dst_slice_align;
1129 }
1130
1132 unsigned int slice_height)
1133 {
1135 uint8_t *dst[4];
1136
1137 /* wait until complete input has been received */
1138 if (!(
c->src_ranges.nb_ranges == 1 &&
1139 c->src_ranges.ranges[0].start == 0 &&
1140 c->src_ranges.ranges[0].len ==
c->srcH))
1142
1143 if ((slice_start > 0 || slice_height < c->dstH) &&
1144 (slice_start %
align || slice_height %
align)) {
1146 "Incorrectly aligned output: %u/%u not multiples of %u\n",
1147 slice_start, slice_height,
align);
1149 }
1150
1151 if (
c->slicethread) {
1152 int nb_jobs =
c->slice_ctx[0]->dither ==
SWS_DITHER_ED ? 1 :
c->nb_slice_ctx;
1154
1155 c->dst_slice_start = slice_start;
1156 c->dst_slice_height = slice_height;
1157
1159
1160 for (
int i = 0;
i <
c->nb_slice_ctx;
i++) {
1161 if (
c->slice_err[
i] < 0) {
1162 ret =
c->slice_err[
i];
1163 break;
1164 }
1165 }
1166
1167 memset(
c->slice_err, 0,
c->nb_slice_ctx *
sizeof(*
c->slice_err));
1168
1170 }
1171
1173 ptrdiff_t
offset =
c->frame_dst->linesize[
i] * (slice_start >>
c->chrDstVSubSample);
1175 }
1176
1178 c->frame_src->linesize, 0,
c->srcH,
1179 dst,
c->frame_dst->linesize, slice_start, slice_height);
1180 }
1181
1183 {
1185
1189
1193
1195
1197 }
1198
1199 /**
1200 * swscale wrapper, so we don't need to export the SwsContext.
1201 * Assumes planar YUV to be in YUV order instead of YVU.
1202 */
1204 const uint8_t * const srcSlice[],
1205 const int srcStride[], int srcSliceY,
1207 const int dstStride[])
1208 {
1209 if (
c->nb_slice_ctx)
1210 c =
c->slice_ctx[0];
1211
1213 dst, dstStride, 0,
c->dstH);
1214 }
1215
1217 int nb_jobs, int nb_threads)
1218 {
1221
1223 c->dst_slice_align);
1224 const int slice_start = jobnr * slice_height;
1226 int err = 0;
1227
1229 uint8_t *dst[4] = {
NULL };
1230
1232 const int vshift = (
i == 1 ||
i == 2) ?
c->chrDstVSubSample : 0;
1235
1237 }
1238
1243 }
1244
1246 }