1 /*
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "config.h"
22
23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
24 #define _DARWIN_C_SOURCE // needed for MAP_ANON
25 #include <inttypes.h>
26 #include <math.h>
27 #include <stdio.h>
28 #include <string.h>
29 #if HAVE_SYS_MMAN_H
30 #include <sys/mman.h>
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
33 #endif
34 #endif
35 #if HAVE_VIRTUALALLOC
36 #define WIN32_LEAN_AND_MEAN
37 #include <windows.h>
38 #endif
39
55
57
59 {
62 }
63
65 {
66 return FFMPEG_CONFIGURATION;
67 }
68
70 {
71 #define LICENSE_PREFIX "libswscale license: "
73 }
74
75 #define RET 0xC3 // near return opcode for x86
76
82
210 };
211
213 {
216 }
217
219 {
222 }
223
225 {
228 }
229
230 #if FF_API_SWS_FORMAT_NAME
232 {
234 if (desc)
236 else
237 return "Unknown format";
238 }
239 #endif
240
242 double dist)
243 {
244 if (dist <= 1.0)
245 return ((d * dist + c) * dist + b) * dist +
a;
246 else
248 b + 2.0 * c + 3.0 * d,
249 c + 3.0 * d,
250 -b - 3.0 * c - 6.0 * d,
251 dist - 1.0);
252 }
253
255 {
256 if (pos < 0) {
257 pos = (128 << chr_subsample) - 128;
258 }
259 pos += 128; // relative to ideal left edge
260 return pos >> chr_subsample;
261 }
262
264 int *outFilterSize, int xInc, int srcW,
265 int dstW, int filterAlign, int one,
268 double param[2], int srcPos, int dstPos)
269 {
270 int i;
271 int filterSize;
272 int filter2Size;
273 int minFilterSize;
275 int64_t *filter2 = NULL;
276 const int64_t fone = 1LL << (54 -
FFMIN(
av_log2(srcW/dstW), 8));
278
279 emms_c();
// FIXME should not be required but IS (even for non-MMX versions)
280
281 // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
283
284 if (
FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) {
// unscaled
285 int i;
286 filterSize = 1;
288 dstW * sizeof(*filter) * filterSize, fail);
289
290 for (i = 0; i < dstW; i++) {
291 filter[i * filterSize] = fone;
292 (*filterPos)[i] = i;
293 }
294 }
else if (flags &
SWS_POINT) {
// lame looking point sampling mode
295 int i;
296 int64_t xDstInSrc;
297 filterSize = 1;
299 dstW * sizeof(*filter) * filterSize, fail);
300
301 xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
302 for (i = 0; i < dstW; i++) {
303 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
304
305 (*filterPos)[i] = xx;
306 filter[i] = fone;
307 xDstInSrc += xInc;
308 }
309 }
else if ((xInc <= (1 << 16) && (flags &
SWS_AREA)) ||
311 int i;
312 int64_t xDstInSrc;
313 filterSize = 2;
315 dstW * sizeof(*filter) * filterSize, fail);
316
317 xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
318 for (i = 0; i < dstW; i++) {
319 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
320 int j;
321
322 (*filterPos)[i] = xx;
323 // bilinear upscale / linear interpolate / area averaging
324 for (j = 0; j < filterSize; j++) {
325 int64_t
coeff= fone -
FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
326 if (coeff < 0)
327 coeff = 0;
328 filter[i * filterSize + j] =
coeff;
329 xx++;
330 }
331 xDstInSrc += xInc;
332 }
333 } else {
334 int64_t xDstInSrc;
335 int sizeFactor;
336
338 sizeFactor = 4;
339 else if (flags &
SWS_X)
340 sizeFactor = 8;
342 sizeFactor = 1; // downscale only, for upscale it is bilinear
344 sizeFactor = 8; // infinite ;)
348 sizeFactor = 20; // infinite ;)
350 sizeFactor = 20; // infinite ;)
352 sizeFactor = 2;
353 else {
355 }
356
357 if (xInc <= 1 << 16)
358 filterSize = 1 + sizeFactor; // upscale
359 else
360 filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
361
362 filterSize =
FFMIN(filterSize, srcW - 2);
363 filterSize =
FFMAX(filterSize, 1);
364
366 dstW * sizeof(*filter) * filterSize, fail);
367
368 xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
369 for (i = 0; i < dstW; i++) {
370 int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
371 int j;
372 (*filterPos)[i] = xx;
373 for (j = 0; j < filterSize; j++) {
374 int64_t d = (
FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
375 double floatd;
377
378 if (xInc > 1 << 16)
379 d = d * dstW / srcW;
380 floatd = d * (1.0 / (1 << 30));
381
382 if (flags & SWS_BICUBIC) {
385
386 if (d >= 1LL << 31) {
387 coeff = 0.0;
388 } else {
389 int64_t dd = (d * d) >> 30;
390 int64_t ddd = (dd * d) >> 30;
391
392 if (d < 1LL << 30)
393 coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
394 (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
395 (6 * (1 << 24) - 2 * B) * (1 << 30);
396 else
397 coeff = (-B - 6 * C) * ddd +
398 (6 * B + 30 * C) * dd +
399 (-12 * B - 48 * C) * d +
400 (8 * B + 24 * C) * (1 << 30);
401 }
402 coeff /= (1LL<<54)/fone;
403 }
404 #if 0
405 else if (flags & SWS_X) {
406 double p = param ? param * 0.01 : 0.3;
407 coeff = d ? sin(d *
M_PI) / (d *
M_PI) : 1.0;
408 coeff *= pow(2.0, -p * d * d);
409 }
410 #endif
411 else if (flags & SWS_X) {
414
415 if (floatd < 1.0)
416 c = cos(floatd *
M_PI);
417 else
418 c = -1.0;
419 if (c < 0.0)
420 c = -pow(-c, A);
421 else
422 c = pow(c, A);
423 coeff = (c * 0.5 + 0.5) * fone;
424 } else if (flags & SWS_AREA) {
425 int64_t d2 = d - (1 << 29);
426 if (d2 * xInc < -(1LL << (29 + 16)))
427 coeff = 1.0 * (1LL << (30 + 16));
428 else if (d2 * xInc < (1LL << (29 + 16)))
429 coeff = -d2 * xInc + (1LL << (29 + 16));
430 else
431 coeff = 0.0;
432 coeff *= fone >> (30 + 16);
433 } else if (flags & SWS_GAUSS) {
435 coeff = (pow(2.0, -p * floatd * floatd)) * fone;
436 } else if (flags & SWS_SINC) {
437 coeff = (d ? sin(floatd *
M_PI) / (floatd *
M_PI) : 1.0) * fone;
438 } else if (flags & SWS_LANCZOS) {
440 coeff = (d ? sin(floatd *
M_PI) * sin(floatd * M_PI / p) /
441 (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
442 if (floatd > p)
443 coeff = 0;
444 } else if (flags & SWS_BILINEAR) {
445 coeff = (1 << 30) - d;
446 if (coeff < 0)
447 coeff = 0;
448 coeff *= fone >> 30;
449 } else if (flags & SWS_SPLINE) {
450 double p = -2.196152422706632;
452 } else {
454 }
455
456 filter[i * filterSize + j] =
coeff;
457 xx++;
458 }
459 xDstInSrc += 2 * xInc;
460 }
461 }
462
463 /* apply src & dst Filter to filter -> filter2
464 * av_free(filter);
465 */
467 filter2Size = filterSize;
468 if (srcFilter)
469 filter2Size += srcFilter->
length - 1;
470 if (dstFilter)
471 filter2Size += dstFilter->
length - 1;
474
475 for (i = 0; i < dstW; i++) {
476 int j, k;
477
478 if (srcFilter) {
479 for (k = 0; k < srcFilter->
length; k++) {
480 for (j = 0; j < filterSize; j++)
481 filter2[i * filter2Size + k + j] +=
482 srcFilter->
coeff[k] * filter[i * filterSize + j];
483 }
484 } else {
485 for (j = 0; j < filterSize; j++)
486 filter2[i * filter2Size + j] = filter[i * filterSize + j];
487 }
488 // FIXME dstFilter
489
490 (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
491 }
493
494 /* try to reduce the filter-size (step1 find size and shift left) */
495 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
496 minFilterSize = 0;
497 for (i = dstW - 1; i >= 0; i--) {
498 int min = filter2Size;
499 int j;
500 int64_t cutOff = 0.0;
501
502 /* get rid of near zero elements on the left by shifting left */
503 for (j = 0; j < filter2Size; j++) {
504 int k;
505 cutOff +=
FFABS(filter2[i * filter2Size]);
506
508 break;
509
510 /* preserve monotonicity because the core can't handle the
511 * filter otherwise */
512 if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
513 break;
514
515 // move filter coefficients left
516 for (k = 1; k < filter2Size; k++)
517 filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
518 filter2[i * filter2Size + k - 1] = 0;
519 (*filterPos)[i]++;
520 }
521
522 cutOff = 0;
523 /* count near zeros on the right */
524 for (j = filter2Size - 1; j > 0; j--) {
525 cutOff +=
FFABS(filter2[i * filter2Size + j]);
526
528 break;
529 min--;
530 }
531
532 if (min > minFilterSize)
534 }
535
537 // we can handle the special case 4, so we don't want to go the full 8
538 if (minFilterSize < 5)
539 filterAlign = 4;
540
541 /* We really don't want to waste our time doing useless computation, so
542 * fall back on the scalar C code for very small filters.
543 * Vectorizing is worth it only if you have a decent-sized vector. */
544 if (minFilterSize < 3)
545 filterAlign = 1;
546 }
547
549 // special case for unscaled vertical filtering
550 if (minFilterSize == 1 && filterAlign == 2)
551 filterAlign = 1;
552 }
553
555 filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
557 filter =
av_malloc(filterSize * dstW *
sizeof(*filter));
560 av_log(NULL,
AV_LOG_ERROR,
"sws: filterSize %d is too large, try less extreem scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize);
561 goto fail;
562 }
563 *outFilterSize = filterSize;
564
567 "SwScaler: reducing / aligning filtersize %d -> %d\n",
568 filter2Size, filterSize);
569 /* try to reduce the filter-size (step2 reduce it) */
570 for (i = 0; i < dstW; i++) {
571 int j;
572
573 for (j = 0; j < filterSize; j++) {
574 if (j >= filter2Size)
575 filter[i * filterSize + j] = 0;
576 else
577 filter[i * filterSize + j] = filter2[i * filter2Size + j];
579 filter[i * filterSize + j] = 0;
580 }
581 }
582
583 // FIXME try to align filterPos if possible
584
585 // fix borders
586 for (i = 0; i < dstW; i++) {
587 int j;
588 if ((*filterPos)[i] < 0) {
589 // move filter coefficients left to compensate for filterPos
590 for (j = 1; j < filterSize; j++) {
591 int left =
FFMAX(j + (*filterPos)[i], 0);
592 filter[i * filterSize + left] += filter[i * filterSize + j];
593 filter[i * filterSize + j] = 0;
594 }
595 (*filterPos)[i]= 0;
596 }
597
598 if ((*filterPos)[i] + filterSize > srcW) {
599 int shift = (*filterPos)[i] + filterSize - srcW;
600 // move filter coefficients right to compensate for filterPos
601 for (j = filterSize - 2; j >= 0; j--) {
602 int right =
FFMIN(j + shift, filterSize - 1);
603 filter[i * filterSize + right] += filter[i * filterSize + j];
604 filter[i * filterSize + j] = 0;
605 }
606 (*filterPos)[i]= srcW - filterSize;
607 }
608 }
609
610 // Note the +1 is for the MMX scaler which reads over the end
611 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
613 *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
614
615 /* normalize & store in outFilter */
616 for (i = 0; i < dstW; i++) {
617 int j;
618 int64_t error = 0;
619 int64_t sum = 0;
620
621 for (j = 0; j < filterSize; j++) {
622 sum += filter[i * filterSize + j];
623 }
624 sum = (sum + one / 2) / one;
625 if (!sum) {
627 sum = 1;
628 }
629 for (j = 0; j < *outFilterSize; j++) {
630 int64_t
v = filter[i * filterSize + j] + error;
632 (*outFilter)[i * (*outFilterSize) + j] = intV;
633 error = v - intV * sum;
634 }
635 }
636
637 (*filterPos)[dstW + 0] =
638 (*filterPos)[dstW + 1] =
639 (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
640 * read over the end */
641 for (i = 0; i < *outFilterSize; i++) {
642 int k = (dstW - 1) * (*outFilterSize) + i;
643 (*outFilter)[k + 1 * (*outFilterSize)] =
644 (*outFilter)[k + 2 * (*outFilterSize)] =
645 (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
646 }
647
648 ret = 0;
649
650 fail:
651 if(ret < 0)
656 }
657
658 #if HAVE_MMXEXT_INLINE
659 static av_cold int init_hscaler_mmxext(
int dstW,
int xInc,
uint8_t *filterCode,
661 int numSplits)
662 {
671 int fragmentPos;
672
673 int xpos, i;
674
675 // create an optimized horizontal scaling routine
676 /* This scaler is made of runtime-generated MMXEXT code using specially tuned
677 * pshufw instructions. For every four output pixels, if four input pixels
678 * are enough for the fast bilinear scaling, then a chunk of fragmentB is
679 * used. If five input pixels are needed, then a chunk of fragmentA is used.
680 */
681
682 // code fragment
683
684 __asm__ volatile (
685 "jmp 9f \n\t"
686 // Begin
687 "0: \n\t"
688 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
689 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
690 "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
691 "punpcklbw %%mm7, %%mm1 \n\t"
692 "punpcklbw %%mm7, %%mm0 \n\t"
693 "pshufw 0ドルxFF, %%mm1, %%mm1 \n\t"
694 "1: \n\t"
695 "pshufw 0ドルxFF, %%mm0, %%mm0 \n\t"
696 "2: \n\t"
697 "psubw %%mm1, %%mm0 \n\t"
698 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
699 "pmullw %%mm3, %%mm0 \n\t"
700 "psllw 7,ドル %%mm1 \n\t"
701 "paddw %%mm1, %%mm0 \n\t"
702
703 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
704
705 "add 8,ドル %%"REG_a" \n\t"
706 // End
707 "9: \n\t"
708 // "int 3ドル \n\t"
712 "dec %1 \n\t"
713 "dec %2 \n\t"
714 "sub %0, %1 \n\t"
715 "sub %0, %2 \n\t"
717 "sub %0, %3 \n\t"
718
719
720 : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
721 "=r" (fragmentLengthA)
722 );
723
724 __asm__ volatile (
725 "jmp 9f \n\t"
726 // Begin
727 "0: \n\t"
728 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
729 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
730 "punpcklbw %%mm7, %%mm0 \n\t"
731 "pshufw 0ドルxFF, %%mm0, %%mm1 \n\t"
732 "1: \n\t"
733 "pshufw 0ドルxFF, %%mm0, %%mm0 \n\t"
734 "2: \n\t"
735 "psubw %%mm1, %%mm0 \n\t"
736 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
737 "pmullw %%mm3, %%mm0 \n\t"
738 "psllw 7,ドル %%mm1 \n\t"
739 "paddw %%mm1, %%mm0 \n\t"
740
741 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
742
743 "add 8,ドル %%"REG_a" \n\t"
744 // End
745 "9: \n\t"
746 // "int 3ドル \n\t"
750 "dec %1 \n\t"
751 "dec %2 \n\t"
752 "sub %0, %1 \n\t"
753 "sub %0, %2 \n\t"
755 "sub %0, %3 \n\t"
756
757
758 : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
759 "=r" (fragmentLengthB)
760 );
761
762 xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
763 fragmentPos = 0;
764
765 for (i = 0; i < dstW / numSplits; i++) {
766 int xx = xpos >> 16;
767
768 if ((i & 3) == 0) {
770 int b = ((xpos + xInc) >> 16) - xx;
771 int c = ((xpos + xInc * 2) >> 16) - xx;
772 int d = ((xpos + xInc * 3) >> 16) - xx;
773 int inc = (d + 1 < 4);
774 uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
775 x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
776 x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
777 x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
778 int maxShift = 3 - (d + inc);
780
781 if (filterCode) {
782 filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
783 filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
784 filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
785 filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
786 filterPos[i / 2] = xx;
787
788 memcpy(filterCode + fragmentPos, fragment, fragmentLength);
789
790 filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
791 ((b + inc) << 2) |
792 ((c + inc) << 4) |
793 ((d + inc) << 6);
794 filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
795 (c << 4) |
796 (d << 6);
797
798 if (i + 4 - inc >= dstW)
799 shift = maxShift; // avoid overread
800 else if ((filterPos[i / 2] & 3) <= maxShift)
801 shift = filterPos[i / 2] & 3; // align
802
803 if (shift && i >= shift) {
804 filterCode[fragmentPos + imm8OfPShufW1] += 0x55 *
shift;
805 filterCode[fragmentPos + imm8OfPShufW2] += 0x55 *
shift;
806 filterPos[i / 2] -=
shift;
807 }
808 }
809
810 fragmentPos += fragmentLength;
811
812 if (filterCode)
813 filterCode[fragmentPos] =
RET;
814 }
815 xpos += xInc;
816 }
817 if (filterCode)
818 filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
819
820 return fragmentPos + 1;
821 }
822 #endif /* HAVE_MMXEXT_INLINE */
823
825 {
826 int64_t
W,
V, Z, Cy, Cu, Cv;
827 int64_t vr = table[0];
828 int64_t ub = table[1];
829 int64_t ug = -table[2];
830 int64_t vg = -table[3];
834 int i;
835 static const int8_t map[] = {
860 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24
861 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25
862 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26
863 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27
864 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28
865 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29
866 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30
867 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31
871 };
872
873 dstRange = 0; //FIXME range = 1 is handled elsewhere
874
875 if (!dstRange) {
876 cy = cy * 255 / 219;
877 } else {
878 vr = vr * 224 / 255;
879 ub = ub * 224 / 255;
880 ug = ug * 224 / 255;
881 vg = vg * 224 / 255;
882 }
886
890
894
898
902
904 c->input_rgb2yuv_table[
BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
905 c->input_rgb2yuv_table[
BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
906 c->input_rgb2yuv_table[
BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
907 c->input_rgb2yuv_table[
GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
908 c->input_rgb2yuv_table[
GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
909 c->input_rgb2yuv_table[
GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
910 c->input_rgb2yuv_table[
RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
911 c->input_rgb2yuv_table[
RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
912 c->input_rgb2yuv_table[
RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
913 }
915 AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
916 }
917
919 {
920 int i;
925 static const int16_t xyz2rgb_matrix[3][4] = {
926 {13270, -6295, -2041},
927 {-3969, 7682, 170},
928 { 228, -835, 4329} };
929 static const int16_t rgb2xyz_matrix[3][4] = {
930 {1689, 1464, 739},
931 { 871, 2929, 296},
932 { 79, 488, 3891} };
933 static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
934
941
942 if (rgbgamma_tab[4095])
943 return;
944
945 /* set gamma vectors */
946 for (i = 0; i < 4096; i++) {
947 xyzgamma_tab[i] =
lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
948 rgbgamma_tab[i] =
lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
949 xyzgammainv_tab[i] =
lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
950 rgbgammainv_tab[i] =
lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
951 }
952 }
953
955 int srcRange,
const int table[4],
int dstRange,
956 int brightness, int contrast, int saturation)
957 {
962
966
968 dstRange = 0;
970 srcRange = 0;
971
977
979 return -1;
980
983
986 contrast, saturation);
987 // FIXME factorize
988
989 if (ARCH_PPC)
991 contrast, saturation);
992 }
993
995
996 return 0;
997 }
998
1000 int *srcRange,
int **
table,
int *dstRange,
1001 int *brightness, int *contrast, int *saturation)
1002 {
1003 if (!c )
1004 return -1;
1005
1013
1014 return 0;
1015 }
1016
1018 {
1019 switch (*format) {
1022 return 1;
1025 return 1;
1028 return 1;
1031 return 1;
1034 return 1;
1036 return 1;
1037 default:
1038 return 0;
1039 }
1040 }
1041
1043 {
1044 switch (*format) {
1049 default: return 0;
1050 }
1051 }
1052
1054 {
1055 switch (*format) {
1058 default: return 0;
1059 }
1060 }
1061
1063 {
1070 }
1071
1073 {
1075
1076 if (c) {
1079 }
1080
1082 }
1083
1086 {
1087 int i, j;
1088 int usesVFilter, usesHFilter;
1089 int unscaled;
1090 SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
1095 int dst_stride =
FFALIGN(dstW *
sizeof(int16_t) + 66, 16);
1101
1107
1108 unscaled = (srcW == dstW && srcH == dstH);
1109
1112
1117
1119 av_log(c,
AV_LOG_WARNING,
"deprecated pixel format used, make sure you did set range correctly\n");
1125
1132 }
1137 }
1138 }
1139
1151
1152 /* provide a default scaler if not set by caller */
1153 if (!i) {
1154 if (dstW < srcW && dstH < srcH)
1156 else if (dstW > srcW && dstH > srcH)
1158 else
1161 } else if (i & (i - 1)) {
1163 "Exactly one scaler algorithm must be chosen, got %X\n", i);
1165 }
1166 /* sanity check */
1167 if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) {
1168 /* FIXME check if these are enough and try to lower them after
1169 * fixing the relevant parts of the code */
1171 srcW, srcH, dstW, dstH);
1173 }
1174
1175 if (!dstFilter)
1176 dstFilter = &dummyFilter;
1177 if (!srcFilter)
1178 srcFilter = &dummyFilter;
1179
1180 c->
lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
1181 c->
lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
1184 c->
vRounder = 4 * 0x0001000100010001ULL;
1185
1186 usesVFilter = (srcFilter->
lumV && srcFilter->
lumV->
length > 1) ||
1190 usesHFilter = (srcFilter->
lumH && srcFilter->
lumH->
length > 1) ||
1194
1197
1199 if (dstW&1) {
1203 }
1204
1209 ) {
1210 av_log(c,
AV_LOG_DEBUG,
"Forcing full internal H chroma due to input having non subsampled chroma\n");
1213 }
1214 }
1215
1219 }
1220
1227 if (!(flags & SWS_FULL_CHR_H_INT)) {
1230 "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
1234 }
1235 }
1236 if (flags & SWS_FULL_CHR_H_INT) {
1239 "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
1242 }
1243 }
1244 }
1246 if (!(flags & SWS_FULL_CHR_H_INT)) {
1248 "%s output is not supported with half chroma resolution, switching to full\n",
1252 }
1253 }
1254
1255 /* reuse chroma for 2 pixels RGB/BGR unless user wants full
1256 * chroma interpolation */
1257 if (flags & SWS_FULL_CHR_H_INT &&
1270 ) {
1272 "full chroma interpolation for destination format '%s' not yet implemented\n",
1274 flags &= ~SWS_FULL_CHR_H_INT;
1276 }
1277 if (
isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
1279
1280 // drop some chroma lines if the user wants it
1284
1285 /* drop every other pixel for chroma calculation unless user
1286 * wants full chroma */
1299
1300 // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
1305
1307
1308 /* unscaled special cases */
1309 if (unscaled && !usesHFilter && !usesVFilter &&
1312
1316 "using unscaled %s -> %s special converter\n",
1318 return 0;
1319 }
1320 }
1321
1331 dst_stride <<= 1;
1332
1336 (srcW & 15) == 0;
1338
1342 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1343 }
1346 } else
1348
1351
1352 /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
1353 * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
1354 * correct scaling.
1355 * n-2 is the last chrominance sample available.
1356 * This is not perfect, but no one should notice the difference, the more
1357 * correct variant would be like the vertical one, but that would require
1358 * some special code for the first and last pixel */
1359 if (flags & SWS_FAST_BILINEAR) {
1363 }
1364 // we don't use the x86 asm scaler if MMX is available
1366 c->
lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
1368 }
1369 }
1370
1371 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1372
1373 /* precalculate horizontal scaler filter coefficients */
1374 {
1375 #if HAVE_MMXEXT_INLINE
1376 // can't downscale !!!
1379 NULL, NULL, 8);
1381 NULL, NULL, NULL, 4);
1382
1383 #if USE_MMAP
1385 PROT_READ | PROT_WRITE,
1386 MAP_PRIVATE | MAP_ANONYMOUS,
1387 -1, 0);
1389 PROT_READ | PROT_WRITE,
1390 MAP_PRIVATE | MAP_ANONYMOUS,
1391 -1, 0);
1392 #elif HAVE_VIRTUALALLOC
1395 MEM_COMMIT,
1396 PAGE_EXECUTE_READWRITE);
1399 MEM_COMMIT,
1400 PAGE_EXECUTE_READWRITE);
1401 #else
1404 #endif
1405
1406 #ifdef MAP_ANONYMOUS
1408 #else
1410 #endif
1411 {
1414 }
1415
1420
1425
1426 #if USE_MMAP
1430 goto fail;
1431 }
1432 #endif
1433 } else
1434 #endif /* HAVE_MMXEXT_INLINE */
1435 {
1436 const int filterAlign =
X86_MMX(cpu_flags) ? 4 :
1438
1441 srcW, dstW, filterAlign, 1 << 14,
1443 cpu_flags, srcFilter->
lumH, dstFilter->
lumH,
1447 goto fail;
1451 (flags & SWS_BICUBLIN) ? (flags |
SWS_BILINEAR) : flags,
1452 cpu_flags, srcFilter->
chrH, dstFilter->
chrH,
1456 goto fail;
1457 }
1458 } // initialize horizontal stuff
1459
1460 /* precalculate vertical scaler filter coefficients */
1461 {
1462 const int filterAlign =
X86_MMX(cpu_flags) ? 2 :
1464
1466 c->
lumYInc, srcH, dstH, filterAlign, (1 << 12),
1468 cpu_flags, srcFilter->
lumV, dstFilter->
lumV,
1472 goto fail;
1475 filterAlign, (1 << 12),
1476 (flags & SWS_BICUBLIN) ? (flags |
SWS_BILINEAR) : flags,
1477 cpu_flags, srcFilter->
chrV, dstFilter->
chrV,
1481
1482 goto fail;
1483
1484 #if HAVE_ALTIVEC
1487
1489 int j;
1490 short *p = (short *)&c->vYCoeffsBank[i];
1491 for (j = 0; j < 8; j++)
1493 }
1494
1496 int j;
1497 short *p = (short *)&c->vCCoeffsBank[i];
1498 for (j = 0; j < 8; j++)
1500 }
1501 #endif
1502 }
1503
1504 // calculate buffer sizes so that they won't run out while handling these damn slices
1507 for (i = 0; i < dstH; i++) {
1508 int chrI = (int64_t)i * c->
chrDstH / dstH;
1512
1521 }
1522
1523 for (i = 0; i < 4; i++)
1525
1526 /* Allocate pixbufs (we use dynamic allocation because otherwise we would
1527 * need to allocate several megabytes to handle all possible cases) */
1533 /* Note we need at least one pixel more at the end because of the MMX code
1534 * (just in case someone wants to replace the 4000/8000). */
1535 /* align at 16 bytes for AltiVec */
1538 dst_stride + 16, fail);
1540 }
1541 // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
1546 dst_stride * 2 + 32, fail);
1550 }
1551 if (CONFIG_SWSCALE_ALPHA && c->
alpPixBuf)
1554 dst_stride + 16, fail);
1556 }
1557
1558 // try to avoid drawing green stuff between the right end and the stride end
1562 for(j=0; j<dst_stride/2+1; j++)
1564 } else
1565 for(j=0; j<dst_stride+1; j++)
1567
1569
1571 const char *scaler, *cpucaps;
1572 if (flags & SWS_FAST_BILINEAR)
1573 scaler = "FAST_BILINEAR scaler";
1575 scaler = "BILINEAR scaler";
1577 scaler = "BICUBIC scaler";
1578 else if (flags &
SWS_X)
1579 scaler = "Experimental scaler";
1581 scaler = "Nearest Neighbor / POINT scaler";
1583 scaler = "Area Averaging scaler";
1585 scaler = "luma BICUBIC / chroma BILINEAR scaler";
1587 scaler = "Gaussian scaler";
1589 scaler = "Sinc scaler";
1591 scaler = "Lanczos scaler";
1593 scaler = "Bicubic spline scaler";
1594 else
1595 scaler = "ehh flags invalid?!";
1596
1598 scaler,
1604 "dithered " : "",
1605 #else
1606 "",
1607 #endif
1609
1611 cpucaps = "MMXEXT";
1613 cpucaps = "3DNOW";
1615 cpucaps = "MMX";
1617 cpucaps = "AltiVec";
1618 else
1619 cpucaps = "C";
1620
1622
1625 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1628 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1631 }
1632
1634 return 0;
1635 fail: // FIXME replace things by appropriate error codes
1636 return -1;
1637 }
1638
1639 #if FF_API_SWS_GETCONTEXT
1643 SwsFilter *dstFilter,
const double *param)
1644 {
1646
1648 return NULL;
1649
1657
1658 if (param) {
1659 c->
param[0] = param[0];
1660 c->
param[1] = param[1];
1661 }
1662
1665 return NULL;
1666 }
1667
1669 }
1670 #endif
1671
1673 float lumaSharpen, float chromaSharpen,
1674 float chromaHShift, float chromaVShift,
1675 int verbose)
1676 {
1678 if (!filter)
1679 return NULL;
1680
1681 if (lumaGBlur != 0.0) {
1684 } else {
1687 }
1688
1689 if (chromaGBlur != 0.0) {
1692 } else {
1695 }
1696
1697 if (chromaSharpen != 0.0) {
1704 }
1705
1706 if (lumaSharpen != 0.0) {
1713 }
1714
1715 if (chromaHShift != 0.0)
1717
1718 if (chromaVShift != 0.0)
1720
1725
1726 if (verbose)
1728 if (verbose)
1730
1732 }
1733
1735 {
1737
1738 if(length <= 0 || length > INT_MAX/ sizeof(double))
1739 return NULL;
1740
1742 if (!vec)
1743 return NULL;
1748 return vec;
1749 }
1750
1752 {
1753 const int length = (int)(variance * quality + 0.5) | 1;
1754 int i;
1755 double middle = (length - 1) * 0.5;
1757
1758 if(variance < 0 || quality < 0)
1759 return NULL;
1760
1762
1763 if (!vec)
1764 return NULL;
1765
1766 for (i = 0; i <
length; i++) {
1767 double dist = i - middle;
1768 vec->
coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
1769 sqrt(2 * variance *
M_PI);
1770 }
1771
1773
1774 return vec;
1775 }
1776
1778 {
1779 int i;
1781
1782 if (!vec)
1783 return NULL;
1784
1785 for (i = 0; i <
length; i++)
1787
1788 return vec;
1789 }
1790
1792 {
1794 }
1795
1797 {
1798 int i;
1799 double sum = 0;
1800
1801 for (i = 0; i < a->
length; i++)
1803
1804 return sum;
1805 }
1806
1808 {
1809 int i;
1810
1811 for (i = 0; i < a->
length; i++)
1812 a->
coeff[i] *= scalar;
1813 }
1814
1816 {
1818 }
1819
1821 {
1823 int i, j;
1825
1826 if (!vec)
1827 return NULL;
1828
1829 for (i = 0; i < a->
length; i++) {
1830 for (j = 0; j < b->
length; j++) {
1832 }
1833 }
1834
1835 return vec;
1836 }
1837
1839 {
1841 int i;
1843
1844 if (!vec)
1845 return NULL;
1846
1847 for (i = 0; i < a->
length; i++)
1851
1852 return vec;
1853 }
1854
1856 {
1858 int i;
1860
1861 if (!vec)
1862 return NULL;
1863
1864 for (i = 0; i < a->
length; i++)
1868
1869 return vec;
1870 }
1871
1872 /* shift left / or right if "shift" is negative */
1874 {
1876 int i;
1878
1879 if (!vec)
1880 return NULL;
1881
1882 for (i = 0; i < a->
length; i++) {
1883 vec->
coeff[i + (length - 1) / 2 -
1885 }
1886
1887 return vec;
1888 }
1889
1891 {
1897 }
1898
1900 {
1906 }
1907
1909 {
1915 }
1916
1918 {
1924 }
1925
1927 {
1929
1930 if (!vec)
1931 return NULL;
1932
1934
1935 return vec;
1936 }
1937
1939 {
1940 int i;
1941 double max = 0;
1943 double range;
1944
1945 for (i = 0; i < a->
length; i++)
1946 if (a->
coeff[i] > max)
1948
1949 for (i = 0; i < a->
length; i++)
1950 if (a->
coeff[i] < min)
1952
1954
1955 for (i = 0; i < a->
length; i++) {
1956 int x = (int)((a->
coeff[i] - min) * 60.0 / range + 0.5);
1957 av_log(log_ctx, log_level,
"%1.3f ", a->
coeff[i]);
1958 for (; x > 0; x--)
1959 av_log(log_ctx, log_level,
" ");
1960 av_log(log_ctx, log_level,
"|\n");
1961 }
1962 }
1963
1965 {
1966 if (!a)
1967 return;
1971 }
1972
1974 {
1975 if (!filter)
1976 return;
1977
1983 }
1984
1986 {
1987 int i;
1988 if (!c)
1989 return;
1990
1995 }
1996
2002 }
2003
2004 if (CONFIG_SWSCALE_ALPHA && c->
alpPixBuf) {
2008 }
2009
2010 for (i = 0; i < 4; i++)
2012
2017 #if HAVE_ALTIVEC
2020 #endif
2021
2026
2027 #if HAVE_MMX_INLINE
2028 #if USE_MMAP
2033 #elif HAVE_VIRTUALALLOC
2038 #else
2041 #endif
2044 #endif /* HAVE_MMX_INLINE */
2045
2048
2050 }
2051
2058 const double *
param)
2059 {
2062
2063 if (!param)
2064 param = default_param;
2065
2066 if (context &&
2067 (context->
srcW != srcW ||
2068 context->
srcH != srcH ||
2070 context->
dstW != dstW ||
2071 context->
dstH != dstH ||
2073 context->
flags != flags ||
2074 context->
param[0] != param[0] ||
2075 context->
param[1] != param[1])) {
2077 context = NULL;
2078 }
2079
2080 if (!context) {
2082 return NULL;
2090 context->
param[0] = param[0];
2091 context->
param[1] = param[1];
2094 return NULL;
2095 }
2096 }
2097 return context;
2098 }