1 /*
2 * Copyright (c) 2010 Stefano Sabatini
3 * Copyright (c) 2010 Baptiste Coudurier
4 * Copyright (c) 2007 Bobby Bingham
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 /**
24 * @file
25 * overlay one video on top of another
26 */
27
43
47
49 "main_w", "W", ///< width of the main video
50 "main_h", "H", ///< height of the main video
51 "overlay_w", "w", ///< width of the overlay video
52 "overlay_h", "h", ///< height of the overlay video
53 "hsub",
54 "vsub",
55 "x",
56 "y",
57 "n", ///< number of frame
58 "t", ///< timestamp expressed in seconds
60 };
61
64
69
73
78 };
79
81 {
83
87 }
88
90 {
92 return INT_MAX;
93 return (int)d & ~((1 << chroma_sub) - 1);
94 }
95
97 {
99
102 /* It is necessary if x is expressed from y */
106 }
107
109 {
112
113 if (*pexpr)
114 old = *pexpr;
119 "Error when evaluating the expression '%s' for %s\n",
121 *pexpr = old;
123 }
124
126 return 0;
127 }
128
130 char *res,
int res_len,
int flags)
131 {
134
135 if (!strcmp(cmd, "x"))
137 else if (!strcmp(cmd, "y"))
139 else
141
144
150 }
152 }
153
159 };
160
164 {
166
167 /* overlay formats contains alpha, for avoiding conversion with alpha information loss */
172 };
173 static const enum AVPixelFormat overlay_pix_fmts_yuv420[] = {
175 };
176
177 static const enum AVPixelFormat main_pix_fmts_yuv420p10[] = {
180 };
181 static const enum AVPixelFormat overlay_pix_fmts_yuv420p10[] = {
183 };
184
187 };
188 static const enum AVPixelFormat overlay_pix_fmts_yuv422[] = {
190 };
191
192 static const enum AVPixelFormat main_pix_fmts_yuv422p10[] = {
194 };
195 static const enum AVPixelFormat overlay_pix_fmts_yuv422p10[] = {
197 };
198
201 };
202 static const enum AVPixelFormat overlay_pix_fmts_yuv444[] = {
204 };
205
206 static const enum AVPixelFormat main_pix_fmts_yuv444p10[] = {
208 };
209 static const enum AVPixelFormat overlay_pix_fmts_yuv444p10[] = {
211 };
212
215 };
218 };
219
225 };
230 };
231
235
241 }
242
245 main_formats = main_pix_fmts_yuv420;
246 overlay_formats = overlay_pix_fmts_yuv420;
247 break;
249 main_formats = main_pix_fmts_yuv420p10;
250 overlay_formats = overlay_pix_fmts_yuv420p10;
251 break;
253 main_formats = main_pix_fmts_yuv422;
254 overlay_formats = overlay_pix_fmts_yuv422;
255 break;
257 main_formats = main_pix_fmts_yuv422p10;
258 overlay_formats = overlay_pix_fmts_yuv422p10;
259 break;
261 main_formats = main_pix_fmts_yuv444;
262 overlay_formats = overlay_pix_fmts_yuv444;
263 break;
265 main_formats = main_pix_fmts_yuv444p10;
266 overlay_formats = overlay_pix_fmts_yuv444p10;
267 break;
269 main_formats = main_pix_fmts_rgb;
270 overlay_formats = overlay_pix_fmts_rgb;
271 break;
273 main_formats = main_pix_fmts_gbrp;
274 overlay_formats = overlay_pix_fmts_gbrp;
275 break;
278 default:
280 }
281
286
289 }
290
292 {
297
299
300 /* Finish the configuration by evaluating the expressions
301 now when both inputs are configured. */
312
316
317 s->overlay_is_packed_rgb =
320
326 }
327
329 "main w:%d h:%d fmt:%s overlay w:%d h:%d fmt:%s\n",
334 return 0;
335 }
336
338 {
342
345
349
351 }
352
353 // divide by 255 and round to nearest
354 // apply a fast variant: (X+127)/255 = ((X+127)*257+257)>>16 = ((X+128)*257)>>16
355 #define FAST_DIV255(x) ((((x) + 128) * 257) >> 16)
356
357 // calculate the unpremultiplied alpha, applying the general equation:
358 // alpha = alpha_overlay / ( (alpha_main + alpha_overlay) - (alpha_main * alpha_overlay) )
359 // (((x) << 16) - ((x) << 9) + (x)) is a faster version of: 255 * 255 * x
360 // ((((x) + (y)) << 8) - ((x) + (y)) - (y) * (x)) is a faster version of: 255 * (x + y)
361 // this is only needed when blending onto straight alpha main images
362 #define UNPREMULTIPLY_ALPHA(x, y) ((((x) << 16) - ((x) << 9) + (x)) / ((((x) + (y)) << 8) - ((x) + (y)) - (y) * (x)))
363
364 #define PTR_ADD(TYPE, ptr, byte_addend) ((TYPE*)((uint8_t*)ptr + (byte_addend)))
365 #define CPTR_ADD(TYPE, ptr, byte_addend) ((const TYPE*)((const uint8_t*)ptr + (byte_addend)))
366
367 /**
368 * Blend image in src to destination buffer dst at position (x, y).
369 */
370
373 int main_has_alpha, int x, int y,
374 int overlay_straight, int main_straight,
375 int jobnr, int nb_jobs)
376 {
378 int i, imax, j, jmax;
379 const int src_w =
src->width;
380 const int src_h =
src->height;
381 const int dst_w =
dst->width;
382 const int dst_h =
dst->height;
383 uint8_t
alpha;
///< the amount of overlay to blend on to main
384 const int dr =
s->main_rgba_map[
R];
385 const int dg =
s->main_rgba_map[
G];
386 const int db =
s->main_rgba_map[
B];
387 const int da =
s->main_rgba_map[
A];
388 const int dstep =
s->main_pix_step[0];
389 const int sr =
s->overlay_rgba_map[
R];
390 const int sg =
s->overlay_rgba_map[
G];
391 const int sb =
s->overlay_rgba_map[
B];
392 const int sa =
s->overlay_rgba_map[
A];
393 const int sstep =
s->overlay_pix_step[0];
395 uint8_t *
S, *sp, *d, *dp;
396
398 imax =
FFMIN3(-y + dst_h,
FFMIN(src_h, dst_h), y + src_h);
399
402
405
409 d = dp + (x+j) * dstep;
410
411 for (jmax =
FFMIN(-x + dst_w, src_w); j < jmax; j++) {
413
414 // if the main channel has an alpha channel, alpha has to be calculated
415 // to create an un-premultiplied (straight) alpha value
416 if (main_straight &&
alpha != 0 &&
alpha != 255) {
417 uint8_t alpha_d = d[da];
419 }
420
422 case 0:
423 break;
424 case 255:
428 break;
429 default:
430 // main_value = main_value * (1 - alpha) + overlay_value * alpha
431 // since alpha is in the range 0-255, the result must divided by 255
438 }
439 if (main_has_alpha) {
441 case 0:
442 break;
443 case 255:
445 break;
446 default:
447 // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
449 }
450 }
451 d += dstep;
453 }
454 dp +=
dst->linesize[0];
455 sp +=
src->linesize[0];
456 }
457 }
458
459 #define DEFINE_BLEND_PLANE(depth, T, nbits) \
460 static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext *ctx, \
461 AVFrame *dst, const AVFrame *src, \
462 int src_w, int src_h, \
463 int dst_w, int dst_h, \
464 int i, int hsub, int vsub, \
465 int x, int y, \
466 int main_straight, \
467 int dst_plane, \
468 int dst_offset, \
469 int dst_step, \
470 int overlay_straight, \
471 int yuv, \
472 int jobnr, \
473 int nb_jobs) \
474 { \
475 OverlayContext *octx = ctx->priv; \
476 int src_wp = AV_CEIL_RSHIFT(src_w, hsub); \
477 int src_hp = AV_CEIL_RSHIFT(src_h, vsub); \
478 int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub); \
479 int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \
480 int yp = y>>vsub; \
481 int xp = x>>hsub; \
482 const T max = (1 << nbits) - 1; \
483 const T mid = (1 << (nbits - 1)); \
484 \
485 const int jmin = FFMAX(-yp, 0), jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \
486 const int kmin = FFMAX(-xp, 0), kmax = FFMIN(-xp + dst_wp, src_wp); \
487 const int slice_start = jmin + (jmax * jobnr) / nb_jobs; \
488 const int slice_end = jmin + (jmax * (jobnr + 1)) / nb_jobs; \
489 \
490 const uint8_t *sp = src->data[i] + (slice_start) * src->linesize[i]; \
491 uint8_t *dp = dst->data[dst_plane] \
492 + (yp + slice_start) * dst->linesize[dst_plane] \
493 + dst_offset; \
494 const uint8_t *ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; \
495 const uint8_t *dap = main_straight ? dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3] : NULL; \
496 \
497 for (int j = slice_start; j < slice_end; ++j) { \
498 int k = kmin; \
499 const T *s = (const T *)sp + k; \
500 const T *a = (const T *)ap + (k << hsub); \
501 const T *da = main_straight ? (T *)dap + ((xp + k) << hsub) : NULL; \
502 T *d = (T *)(dp + (xp + k) * dst_step); \
503 \
504 if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \
505 int c = octx->blend_row[i]((uint8_t*)d, (uint8_t*)da, (uint8_t*)s, \
506 (uint8_t*)a, kmax - k, src->linesize[3]); \
507 \
508 s += c; \
509 d = PTR_ADD(T, d, dst_step * c); \
510 if (main_straight) \
511 da += (1 << hsub) * c; \
512 a += (1 << hsub) * c; \
513 k += c; \
514 } \
515 for (; k < kmax; k++) { \
516 int alpha_v, alpha_h, alpha; \
517 \
518 /* average alpha for color components, improve quality */ \
519 if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) { \
520 const T *next_line = CPTR_ADD(T, a, src->linesize[3]); \
521 alpha = (a[0] + next_line[0] + \
522 a[1] + next_line[1]) >> 2; \
523 } else if (hsub || vsub) { \
524 alpha_h = hsub && k+1 < src_wp ? \
525 (a[0] + a[1]) >> 1 : a[0]; \
526 alpha_v = vsub && j+1 < src_hp ? \
527 (a[0] + *CPTR_ADD(T, a, src->linesize[3])) >> 1 : a[0]; \
528 alpha = (alpha_v + alpha_h) >> 1; \
529 } else \
530 alpha = a[0]; \
531 /* if the main channel has an alpha channel, alpha has to be calculated */ \
532 /* to create an un-premultiplied (straight) alpha value */ \
533 if (main_straight && alpha != 0 && alpha != max) { \
534 /* average alpha for color components, improve quality */ \
535 uint8_t alpha_d; \
536 if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) { \
537 const T *next_line = CPTR_ADD(T, da, dst->linesize[3]); \
538 alpha_d = (da[0] + next_line[0] + \
539 da[1] + next_line[1]) >> 2; \
540 } else if (hsub || vsub) { \
541 alpha_h = hsub && k+1 < src_wp ? \
542 (da[0] + da[1]) >> 1 : da[0]; \
543 alpha_v = vsub && j+1 < src_hp ? \
544 (da[0] + *CPTR_ADD(T, da, dst->linesize[3])) >> 1 : da[0]; \
545 alpha_d = (alpha_v + alpha_h) >> 1; \
546 } else \
547 alpha_d = da[0]; \
548 alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d); \
549 } \
550 if (overlay_straight) { \
551 if (nbits > 8) \
552 *d = (*d * (max - alpha) + *s * alpha) / max; \
553 else \
554 *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha); \
555 } else { \
556 if (nbits > 8) { \
557 if (i && yuv) \
558 *d = av_clip((*d * (max - alpha) + *s * alpha) / max + *s - mid, -mid, mid) + mid; \
559 else \
560 *d = av_clip_uintp2((*d * (max - alpha) + *s * alpha) / max + *s - (16<<(nbits-8)),\
561 nbits);\
562 } else { \
563 if (i && yuv) \
564 *d = av_clip(FAST_DIV255((*d - mid) * (max - alpha)) + *s - mid, -mid, mid) + mid; \
565 else \
566 *d = av_clip_uint8(FAST_DIV255(*d * (255 - alpha)) + *s - 16); \
567 } \
568 } \
569 s++; \
570 d = PTR_ADD(T, d, dst_step); \
571 if (main_straight) \
572 da += 1 << hsub; \
573 a += 1 << hsub; \
574 } \
575 dp += dst->linesize[dst_plane]; \
576 sp += src->linesize[i]; \
577 ap += (1 << vsub) * src->linesize[3]; \
578 if (main_straight) \
579 dap += (1 << vsub) * dst->linesize[3]; \
580 } \
581 }
584
585 #define DEFINE_ALPHA_COMPOSITE(depth, T, nbits) \
586 static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, const AVFrame *dst, \
587 int src_w, int src_h, \
588 int dst_w, int dst_h, \
589 int x, int y, int main_straight, \
590 int jobnr, int nb_jobs) \
591 { \
592 T alpha; /* the amount of overlay to blend on to main */ \
593 const T max = (1 << nbits) - 1; \
594 \
595 const int imin = FFMAX(-y, 0), imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \
596 const int jmin = FFMAX(-x, 0), jmax = FFMIN(-x + dst_w, src_w); \
597 const int slice_start = imin + ( imax * jobnr) / nb_jobs; \
598 const int slice_end = imin + ((imax * (jobnr + 1)) / nb_jobs); \
599 \
600 const uint8_t *sa = src->data[3] + (slice_start) * src->linesize[3]; \
601 uint8_t *da = dst->data[3] + (y + slice_start) * dst->linesize[3]; \
602 \
603 for (int i = slice_start; i < slice_end; ++i) { \
604 const T *s = (const T *)sa + jmin; \
605 T *d = (T *)da + x + jmin; \
606 \
607 for (int j = jmin; j < jmax; ++j) { \
608 alpha = *s; \
609 if (main_straight && alpha != 0 && alpha != max) { \
610 uint8_t alpha_d = *d; \
611 alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d); \
612 } \
613 if (alpha == max) \
614 *d = *s; \
615 else if (alpha > 0) { \
616 /* apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha */ \
617 if (nbits > 8) \
618 *d += (max - *d) * *s / max; \
619 else \
620 *d += FAST_DIV255((max - *d) * *s); \
621 } \
622 d += 1; \
623 s += 1; \
624 } \
625 da += dst->linesize[3]; \
626 sa += src->linesize[3]; \
627 } \
628 }
631
632 #define DEFINE_BLEND_SLICE_YUV(depth, nbits) \
633 static av_always_inline void blend_slice_yuv_##depth##_##nbits##bits(AVFilterContext *ctx, \
634 AVFrame *dst, const AVFrame *src, \
635 int hsub, int vsub, \
636 int main_straight, \
637 int x, int y, \
638 int overlay_straight, \
639 int jobnr, int nb_jobs) \
640 { \
641 OverlayContext *s = ctx->priv; \
642 const int src_w = src->width; \
643 const int src_h = src->height; \
644 const int dst_w = dst->width; \
645 const int dst_h = dst->height; \
646 \
647 blend_plane_##depth##_##nbits##bits(ctx, dst, src, src_w, src_h, dst_w, dst_h, 0, 0, 0, \
648 x, y, main_straight, s->main_desc->comp[0].plane, s->main_desc->comp[0].offset, \
649 s->main_desc->comp[0].step, overlay_straight, 1, jobnr, nb_jobs); \
650 blend_plane_##depth##_##nbits##bits(ctx, dst, src, src_w, src_h, dst_w, dst_h, 1, hsub, vsub, \
651 x, y, main_straight, s->main_desc->comp[1].plane, s->main_desc->comp[1].offset, \
652 s->main_desc->comp[1].step, overlay_straight, 1, jobnr, nb_jobs); \
653 blend_plane_##depth##_##nbits##bits(ctx, dst, src, src_w, src_h, dst_w, dst_h, 2, hsub, vsub, \
654 x, y, main_straight, s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, \
655 s->main_desc->comp[2].step, overlay_straight, 1, jobnr, nb_jobs); \
656 \
657 if (s->main_has_alpha) \
658 alpha_composite_##depth##_##nbits##bits(src, dst, src_w, src_h, dst_w, dst_h, x, y, main_straight, \
659 jobnr, nb_jobs); \
660 }
663
667 int main_straight,
668 int x, int y,
669 int overlay_straight,
670 int jobnr,
671 int nb_jobs)
672 {
674 const int src_w =
src->width;
675 const int src_h =
src->height;
676 const int dst_w =
dst->width;
677 const int dst_h =
dst->height;
678
679 blend_plane_8_8bits(
ctx,
dst,
src, src_w, src_h, dst_w, dst_h, 0, 0, 0, x, y, main_straight,
680 s->main_desc->comp[1].plane,
s->main_desc->comp[1].offset,
s->main_desc->comp[1].step, overlay_straight, 0,
681 jobnr, nb_jobs);
682 blend_plane_8_8bits(
ctx,
dst,
src, src_w, src_h, dst_w, dst_h, 1,
hsub, vsub, x, y, main_straight,
683 s->main_desc->comp[2].plane,
s->main_desc->comp[2].offset,
s->main_desc->comp[2].step, overlay_straight, 0,
684 jobnr, nb_jobs);
685 blend_plane_8_8bits(
ctx,
dst,
src, src_w, src_h, dst_w, dst_h, 2,
hsub, vsub, x, y, main_straight,
686 s->main_desc->comp[0].plane,
s->main_desc->comp[0].offset,
s->main_desc->comp[0].step, overlay_straight, 0,
687 jobnr, nb_jobs);
688
689 if (
s->main_has_alpha)
690 alpha_composite_8_8bits(
src,
dst, src_w, src_h, dst_w, dst_h, x, y, main_straight, jobnr, nb_jobs);
691 }
692
693 #define DEFINE_BLEND_SLICE_PLANAR_FMT_(format_, blend_slice_fn_suffix_, hsub_, vsub_, main_straight_, overlay_straight_) \
694 static int blend_slice_##format_(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
695 { \
696 OverlayContext *s = ctx->priv; \
697 ThreadData *td = arg; \
698 blend_slice_##blend_slice_fn_suffix_(ctx, td->dst, td->src, \
699 hsub_, vsub_, main_straight_, \
700 s->x, s->y, overlay_straight_, \
701 jobnr, nb_jobs); \
702 return 0; \
703 }
704
705 #define DEFINE_BLEND_SLICE_PLANAR_FMT(format_, blend_slice_fn_suffix_, hsub_, vsub_) \
706 DEFINE_BLEND_SLICE_PLANAR_FMT_(format_ ## _ss, blend_slice_fn_suffix_, hsub_, vsub_, 1, 1) \
707 DEFINE_BLEND_SLICE_PLANAR_FMT_(format_ ## _sp, blend_slice_fn_suffix_, hsub_, vsub_, 1, 0) \
708 DEFINE_BLEND_SLICE_PLANAR_FMT_(format_ ## _ps, blend_slice_fn_suffix_, hsub_, vsub_, 0, 1) \
709 DEFINE_BLEND_SLICE_PLANAR_FMT_(format_ ## _pp, blend_slice_fn_suffix_, hsub_, vsub_, 0, 0)
710
711 // FMT FN H V
719
720 #define DEFINE_BLEND_SLICE_PACKED_FMT(format_, blend_slice_fn_suffix_, main_has_alpha_, main_straight_, overlay_straight_) \
721 static int blend_slice_##format_(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
722 { \
723 OverlayContext *s = ctx->priv; \
724 ThreadData *td = arg; \
725 blend_slice_packed_##blend_slice_fn_suffix_(ctx, td->dst, td->src, \
726 main_has_alpha_, \
727 s->x, s->y, \
728 overlay_straight_, \
729 main_straight_, \
730 jobnr, nb_jobs); \
731 return 0; \
732 }
733
734 // FMT FN A MS OS
741
743 {
746
748
751
752 s->main_desc = pix_desc;
753
754 s->main_is_packed_rgb =
757 return 0;
758 }
759
761 {
767
768 #define ASSIGN_BLEND_SLICE(format_) \
769 do { \
770 s->blend_slice = main_straight ? (overlay_straight ? format_##_ss : format_##_sp) \
771 : (overlay_straight ? format_##_ps : format_##_pp); \
772 } while (0)
773
777 break;
780 break;
783 break;
786 break;
789 break;
792 break;
794 if (
s->main_has_alpha)
796 else
797 s->blend_slice = overlay_straight ? blend_slice_rgb : blend_slice_rgb_pm;
798 break;
801 break;
803 switch (
main->format) {
806 break;
809 break;
812 break;
815 break;
818 break;
821 break;
827 break;
830 break;
831 default:
833 break;
834 }
835 break;
836 }
837
838 #if ARCH_X86
840 #endif
841
842 return 0;
843 }
844
846 {
853
857 if (!second)
859
861
865
870
876 }
877
878 if (
s->x < mainpic->
width &&
s->x + second->
width >= 0 &&
881
883
888 }
890 }
891
893 {
895
897 return 0;
898 }
899
901 {
904 }
905
906 #define OFFSET(x) offsetof(OverlayContext, x)
907 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
908 #define TFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
909
913 { "eof_action", "Action to take when encountering EOF from secondary input ",
922 {
"shortest",
"force termination when the shortest input terminates",
OFFSET(
fs.opt_shortest),
AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1,
FLAGS },
940 };
941
943
945 {
949 },
950 {
951 .name = "overlay",
954 },
955 };
956
958 {
962 },
963 };
964
968 .p.priv_class = &overlay_class,
971 .preinit = overlay_framesync_preinit,
980 };