1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
24
30
31 #define MAX_DIAMETER 23
32
35
39
46
48
50 struct {
53
57
64
65
67 {
69 cl_int cle;
70 int err;
71
73 if (err < 0)
75
76 ctx->command_queue = clCreateCommandQueue(
ctx->ocf.hwctx->context,
77 ctx->ocf.hwctx->device_id,
78 0, &cle);
80 "command queue %d.\n", cle);
81
82 // Use global kernel if mask size will be too big for the local store..
83 ctx->global = (
ctx->luma_size_x > 17.0f ||
84 ctx->luma_size_y > 17.0f ||
85 ctx->chroma_size_x > 17.0f ||
86 ctx->chroma_size_y > 17.0f);
87
88 ctx->kernel = clCreateKernel(
ctx->ocf.program,
89 ctx->global ?
"unsharp_global"
90 : "unsharp_local", &cle);
92
94 return 0;
95
97 if (
ctx->command_queue)
98 clReleaseCommandQueue(
ctx->command_queue);
100 clReleaseKernel(
ctx->kernel);
101 return err;
102 }
103
105 {
110 cl_int cle;
112 size_t matrix_bytes;
113 float diam_x, diam_y, amount;
114 int err, p, x, y, size_x, size_y;
115
117
119 for (p = 0; p <
desc->nb_components; p++)
121
122 for (p = 0; p <
ctx->nb_planes; p++) {
124 diam_x =
ctx->luma_size_x;
125 diam_y =
ctx->luma_size_y;
126 amount =
ctx->luma_amount;
127 } else {
128 diam_x =
ctx->chroma_size_x;
129 diam_y =
ctx->chroma_size_y;
130 amount =
ctx->chroma_amount;
131 }
132 size_x = (
int)
ceil(diam_x) | 1;
133 size_y = (
int)
ceil(diam_y) | 1;
134 matrix_bytes = size_x * size_y *
sizeof(
float);
135
140 }
141
142 sum = 0.0;
143 for (x = 0; x < size_x; x++) {
144 double dx = (
double)(x - size_x / 2) / diam_x;
145 sum +=
ctx->plane[p].blur_x[x] =
exp(-16.0 * (dx * dx));
146 }
147 for (x = 0; x < size_x; x++)
148 ctx->plane[p].blur_x[x] /= sum;
149
150 sum = 0.0;
151 for (y = 0; y < size_y; y++) {
152 double dy = (
double)(y - size_y / 2) / diam_y;
153 sum +=
ctx->plane[p].blur_y[y] =
exp(-16.0 * (dy * dy));
154 }
155 for (y = 0; y < size_y; y++)
156 ctx->plane[p].blur_y[y] /= sum;
157
158 for (y = 0; y < size_y; y++) {
159 for (x = 0; x < size_x; x++) {
160 val =
ctx->plane[p].blur_x[x] *
ctx->plane[p].blur_y[y];
162 }
163 }
164
166 buffer = clCreateBuffer(
ctx->ocf.hwctx->context,
167 CL_MEM_READ_ONLY |
168 CL_MEM_COPY_HOST_PTR |
169 CL_MEM_HOST_NO_ACCESS,
170 matrix_bytes,
matrix, &cle);
172 "%d.\n", cle);
174 } else {
175 buffer = clCreateBuffer(
ctx->ocf.hwctx->context,
176 CL_MEM_READ_ONLY |
177 CL_MEM_COPY_HOST_PTR |
178 CL_MEM_HOST_NO_ACCESS,
179 sizeof(
ctx->plane[p].blur_x),
180 ctx->plane[p].blur_x, &cle);
182 "%d.\n", cle);
184
185 buffer = clCreateBuffer(
ctx->ocf.hwctx->context,
186 CL_MEM_READ_ONLY |
187 CL_MEM_COPY_HOST_PTR |
188 CL_MEM_HOST_NO_ACCESS,
189 sizeof(
ctx->plane[p].blur_y),
190 ctx->plane[p].blur_y, &cle);
192 "%d.\n", cle);
194 }
195
197
198 ctx->plane[p].size_x = size_x;
199 ctx->plane[p].size_y = size_y;
200 ctx->plane[p].amount = amount;
201 }
202
203 err = 0;
206 return err;
207 }
208
210 {
215 cl_int cle;
216 size_t global_work[2];
217 size_t local_work[2];
219 int err, p;
220
224
225 if (!
input->hw_frames_ctx)
227
228 if (!
ctx->initialised) {
230 if (err < 0)
232
234 if (err < 0)
236 }
237
242 }
243
246 dst = (cl_mem)
output->data[p];
247
248 if (!dst)
249 break;
250
256
259 } else {
262 }
263
265 ctx->global ? 0 : 16);
266 if (err < 0)
268
269 local_work[0] = 16;
270 local_work[1] = 16;
271
274 p, global_work[0], global_work[1]);
275
276 cle = clEnqueueNDRangeKernel(
ctx->command_queue,
ctx->kernel, 2,
NULL,
277 global_work,
ctx->global ?
NULL : local_work,
280 }
281
282 cle = clFinish(
ctx->command_queue);
284
286 if (err < 0)
288
290
294
296
298 clFinish(
ctx->command_queue);
301 return err;
302 }
303
305 {
307 cl_int cle;
309
310 for (
i = 0;
i <
ctx->nb_planes;
i++) {
311 if (
ctx->plane[
i].matrix)
312 clReleaseMemObject(
ctx->plane[
i].matrix);
313 if (
ctx->plane[
i].coef_x)
314 clReleaseMemObject(
ctx->plane[
i].coef_x);
315 if (
ctx->plane[
i].coef_y)
316 clReleaseMemObject(
ctx->plane[
i].coef_y);
317 }
318
320 cle = clReleaseKernel(
ctx->kernel);
321 if (cle != CL_SUCCESS)
323 "kernel: %d.\n", cle);
324 }
325
326 if (
ctx->command_queue) {
327 cle = clReleaseCommandQueue(
ctx->command_queue);
328 if (cle != CL_SUCCESS)
330 "command queue: %d.\n", cle);
331 }
332
334 }
335
336 #define OFFSET(x) offsetof(UnsharpOpenCLContext, x)
337 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
339 { "luma_msize_x", "Set luma mask horizontal diameter (pixels)",
342 { "lx", "Set luma mask horizontal diameter (pixels)",
345 { "luma_msize_y", "Set luma mask vertical diameter (pixels)",
348 { "ly", "Set luma mask vertical diameter (pixels)",
351 { "luma_amount", "Set luma amount (multiplier)",
353 { .dbl = 1.0 }, -10, 10,
FLAGS },
354 { "la", "Set luma amount (multiplier)",
356 { .dbl = 1.0 }, -10, 10,
FLAGS },
357
358 { "chroma_msize_x", "Set chroma mask horizontal diameter (pixels after subsampling)",
361 { "cx", "Set chroma mask horizontal diameter (pixels after subsampling)",
364 { "chroma_msize_y", "Set chroma mask vertical diameter (pixels after subsampling)",
367 { "cy", "Set chroma mask vertical diameter (pixels after subsampling)",
370 { "chroma_amount", "Set chroma amount (multiplier)",
372 { .dbl = 0.0 }, -10, 10,
FLAGS },
373 { "ca", "Set chroma amount (multiplier)",
375 { .dbl = 0.0 }, -10, 10,
FLAGS },
376
378 };
379
381
383 {
388 },
389 };
390
392 {
396 },
397 };
398
400 .
name =
"unsharp_opencl",
403 .priv_class = &unsharp_opencl_class,
411 };