1 /*
2 * Original copyright (c) 2002 Remi Guyomarch <rguyom@pobox.com>
3 * Port copyright (c) 2010 Daniel G. Taylor <dan@programmer-art.org>
4 * Relicensed to the LGPL with permission from Remi Guyomarch.
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 /**
24 * @file
25 * blur / sharpen filter, ported to FFmpeg from MPlayer
26 * libmpcodecs/unsharp.c.
27 *
28 * This code is based on:
29 *
30 * An Efficient algorithm for Gaussian blur using finite-state machines
31 * Frederick M. Waltz and John W. V. Miller
32 *
33 * SPIE Conf. on Machine Vision Systems for Inspection and Metrology VII
34 * Originally published Boston, Nov 98
35 *
36 * http://www.engin.umd.umich.edu/~jwvm/ece581/21_GBlur.pdf
37 */
38
47
48 #define MIN_MATRIX_SIZE 3
49 #define MAX_MATRIX_SIZE 63
50
59 uint32_t *
sr;
///< finite state machine storage within a row
60 uint32_t **
sc;
///< finite state machine storage across rows
62
79
89
90 #define DEF_UNSHARP_SLICE_FUNC(name, nbits) \
91 static int name##_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
92 { \
93 ThreadData *td = arg; \
94 UnsharpFilterParam *fp = td->fp; \
95 UnsharpContext *s = ctx->priv; \
96 uint32_t **sc = fp->sc; \
97 uint32_t *sr = fp->sr; \
98 const uint##nbits##_t *src2 = NULL; \
99 const int amount = fp->amount; \
100 const int steps_x = fp->steps_x; \
101 const int steps_y = fp->steps_y; \
102 const int scalebits = fp->scalebits; \
103 const int32_t halfscale = fp->halfscale; \
104 \
105 uint##nbits##_t *dst = (uint##nbits##_t*)td->dst; \
106 const uint##nbits##_t *src = (const uint##nbits##_t *)td->src; \
107 int dst_stride = td->dst_stride; \
108 int src_stride = td->src_stride; \
109 const int width = td->width; \
110 const int height = td->height; \
111 const int sc_offset = jobnr * 2 * steps_y; \
112 const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1); \
113 const int slice_start = (height * jobnr) / nb_jobs; \
114 const int slice_end = (height * (jobnr+1)) / nb_jobs; \
115 \
116 int32_t res; \
117 int x, y, z; \
118 uint32_t tmp1, tmp2; \
119 \
120 if (!amount) { \
121 av_image_copy_plane(td->dst + slice_start * dst_stride, dst_stride, \
122 td->src + slice_start * src_stride, src_stride, \
123 width * s->bps, slice_end - slice_start); \
124 return 0; \
125 } \
126 \
127 for (y = 0; y < 2 * steps_y; y++) \
128 memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); \
129 \
130 dst_stride = dst_stride / s->bps; \
131 src_stride = src_stride / s->bps; \
132 /* if this is not the first tile, we start from (slice_start - steps_y) */ \
133 /* so we can get smooth result at slice boundary */ \
134 if (slice_start > steps_y) { \
135 src += (slice_start - steps_y) * src_stride; \
136 dst += (slice_start - steps_y) * dst_stride; \
137 } \
138 \
139 for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) { \
140 if (y < height) \
141 src2 = src; \
142 \
143 memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1)); \
144 for (x = -steps_x; x < width + steps_x; x++) { \
145 tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x]; \
146 for (z = 0; z < steps_x * 2; z += 2) { \
147 tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = tmp1; \
148 tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = tmp2; \
149 } \
150 for (z = 0; z < steps_y * 2; z += 2) { \
151 tmp2 = sc[sc_offset + z + 0][x + steps_x] + tmp1; \
152 sc[sc_offset + z + 0][x + steps_x] = tmp1; \
153 tmp1 = sc[sc_offset + z + 1][x + steps_x] + tmp2; \
154 sc[sc_offset + z + 1][x + steps_x] = tmp2; \
155 } \
156 if (x >= steps_x && y >= (steps_y + slice_start)) { \
157 const uint##nbits##_t *srx = src - steps_y * src_stride + x - steps_x; \
158 uint##nbits##_t *dsx = dst - steps_y * dst_stride + x - steps_x; \
159 \
160 res = (int32_t)*srx + ((((int32_t) * srx - \
161 (int32_t)((tmp1 + halfscale) >> scalebits)) * amount) >> (8+nbits)); \
162 *dsx = av_clip_uint##nbits(res); \
163 } \
164 } \
165 if (y >= 0) { \
166 dst += dst_stride; \
167 src += src_stride; \
168 } \
169 } \
170 return 0; \
171 }
174
176 {
179 int i, plane_w[4], plane_h[4];
182
183 plane_w[0] = plane_w[3] =
inlink->w;
185 plane_h[0] = plane_h[3] =
inlink->h;
188 fp[1] = fp[2] = &
s->chroma;
190 for (
i = 0;
i <
s->nb_planes;
i++) {
193 td.
src = in->data[
i];
199 FFMIN(plane_h[
i],
s->nb_threads));
200 }
201 return 0;
202 }
203
204 #define MAX_SCALEBITS 25
205
208 {
211 fp->
amount = amount * 65536.0;
212
217
222 }
223
224 return 0;
225 }
226
228 {
231
232 #define SET_FILTER_PARAM(name_, short_) \
233 ret = set_filter_param(ctx, #name_, #short_, &s->name_, \
234 s->short_##msize_x, s->short_##msize_y, s->short_##amount); \
235 if (ret < 0) \
236 return ret; \
237
241
242 return 0;
243 }
244
257 };
258
260 {
261 int z;
263 const char *effect = fp->
amount == 0 ?
"none" : fp->
amount < 0 ?
"blur" :
"sharpen";
264
267 "Invalid even size for %s matrix size %dx%d\n",
270 }
271
274
277 if (!fp->
sr || !fp->
sc)
279
280 for (z = 0; z < 2 * fp->
steps_y *
s->nb_threads; z++)
282 sizeof(*(fp->
sc[z])))))
284
285 return 0;
286 }
287
289 {
293
294 s->nb_planes =
desc->nb_components;
295 s->hsub =
desc->log2_chroma_w;
296 s->vsub =
desc->log2_chroma_h;
297 s->bitdepth =
desc->comp[0].depth;
298 s->bps =
s->bitdepth > 8 ? 2 : 1;
299 s->unsharp_slice =
s->bitdepth > 8 ? unsharp_slice_16 : unsharp_slice_8;
300
301 // ensure (height / nb_threads) > 4 * steps_y,
302 // so that we don't have too much overlap between two threads
304 inlink->h / (4 *
s->luma.steps_y));
305
312
313 return 0;
314 }
315
317 {
318 int z;
319
321 for (z = 0; z < 2 * fp->
steps_y * nb_threads; z++)
324 }
326 }
327
329 {
331
334 }
335
337 {
341
346 }
348
350
352
356 }
358 }
359
360 #define OFFSET(x) offsetof(UnsharpContext, x)
361 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
384 };
385
387
389 {
394 },
395 };
396
400 .p.priv_class = &unsharp_class,
408 };