1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
28
31
33
34 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
35
36 #define HIST_SIZE (3*256)
37 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
40
48 };
49
53 };
54
57 int n;
///< current frame
58 int n_frames;
///< number of frames for analysis
61
64
66
72
74
76
77 #define OFFSET(x) offsetof(ThumbnailCudaContext, x)
78 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
79
83 };
84
86
88 {
90
94 "Allocation failure, try to lower the number of frames\n");
96 }
98 return 0;
99 }
100
101 /**
102 * @brief Compute Sum-square deviation to estimate "closeness".
103 * @param hist color distribution histogram
104 * @param median average color distribution histogram
105 * @return sum of squared errors
106 */
108 {
110 double err, sum_sq_err = 0;
111
114 sum_sq_err += err*err;
115 }
116 return sum_sq_err;
117 }
118
120 {
123 int i, j, best_frame_idx = 0;
124 int nb_frames =
s->n;
125 double avg_hist[
HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
126
127 // average histogram of the N frames
129 for (
i = 0;
i < nb_frames;
i++)
130 avg_hist[j] += (
double)
s->frames[
i].histogram[j];
131 avg_hist[j] /= nb_frames;
132 }
133
134 // find the frame closer to the average using the sum of squared errors
135 for (
i = 0;
i < nb_frames;
i++) {
137 if (
i == 0 || sq_err < min_sq_err)
138 best_frame_idx =
i, min_sq_err = sq_err;
139 }
140
141 // free and reset everything (except the best frame buffer)
142 for (
i = 0;
i < nb_frames;
i++) {
143 memset(
s->frames[
i].histogram, 0,
sizeof(
s->frames[
i].histogram));
144 if (
i != best_frame_idx)
146 }
148
149 // raise the chosen one
150 picref =
s->frames[best_frame_idx].buf;
152 "from a set of %d images\n", best_frame_idx,
154 s->frames[best_frame_idx].buf =
NULL;
155
156 return picref;
157 }
158
160 int *
histogram, uint8_t *src_dptr,
int src_width,
int src_height,
int src_pitch,
int pixel_size)
161 {
164 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
165 CUtexObject tex = 0;
166 void *args[] = { &tex, &
histogram, &src_width, &src_height };
167
168 CUDA_TEXTURE_DESC tex_desc = {
169 .filterMode = CU_TR_FILTER_MODE_LINEAR,
170 .flags = CU_TRSF_READ_AS_INTEGER,
171 };
172
173 CUDA_RESOURCE_DESC res_desc = {
174 .resType = CU_RESOURCE_TYPE_PITCH2D,
175 .res.pitch2D.format = pixel_size == 1 ?
176 CU_AD_FORMAT_UNSIGNED_INT8 :
177 CU_AD_FORMAT_UNSIGNED_INT16,
178 .res.pitch2D.numChannels =
channels,
179 .res.pitch2D.width = src_width,
180 .res.pitch2D.height = src_height,
181 .res.pitch2D.pitchInBytes = src_pitch,
182 .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
183 };
184
185 ret =
CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc,
NULL));
187 goto exit;
188
192 exit:
193 if (tex)
194 CHECK_CU(cu->cuTexObjectDestroy(tex));
195
197 }
198
200 {
203
210 break;
218 break;
226 break;
233 break;
241 break;
242 default:
244 }
245
246 return 0;
247 }
248
250 {
253 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
255 int *hist =
s->frames[
s->n].histogram;
258 CUDA_MEMCPY2D cpy = { 0 };
260
261 // keep a reference of each frame
263
264 ret =
CHECK_CU(cu->cuCtxPushCurrent(
s->hwctx->cuda_ctx));
267
269
271
272 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
273 cpy.dstMemoryType = CU_MEMORYTYPE_HOST;
274 cpy.srcDevice =
s->data;
275 cpy.dstHost = hist;
279 cpy.Height = 1;
280
281 ret =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy,
s->cu_stream));
284
287 {
290 hist[
i] = 4 * hist[
i];
291 }
292
296
297 // no selection until the buffer of N frames is filled up
299 if (
s->n <
s->n_frames)
300 return 0;
301
303 }
304
306 {
308
310 CudaFunctions *cu =
s->hwctx->internal->cuda_dl;
311
315 }
316
318 CHECK_CU(cu->cuModuleUnload(
s->cu_module));
320 }
321 }
322
324 for (
int i = 0;
i <
s->n_frames &&
s->frames[
i].buf;
i++)
327 }
328 }
329
331 {
335
341 }
344 return 0;
345 }
346
348 {
350
353 return 1;
354 return 0;
355 }
356
358 {
363 CUcontext
dummy, cuda_ctx = device_hwctx->cuda_ctx;
364 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
366
367 extern const unsigned char ff_vf_thumbnail_cuda_ptx_data[];
368 extern const unsigned int ff_vf_thumbnail_cuda_ptx_len;
369
370 s->hwctx = device_hwctx;
371 s->cu_stream =
s->hwctx->stream;
372
376
380
381 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar,
s->cu_module,
"Thumbnail_uchar"));
384
385 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_uchar2,
s->cu_module,
"Thumbnail_uchar2"));
388
389 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort,
s->cu_module,
"Thumbnail_ushort"));
392
393 ret =
CHECK_CU(cu->cuModuleGetFunction(&
s->cu_func_ushort2,
s->cu_module,
"Thumbnail_ushort2"));
396
400
402
403 s->hw_frames_ctx =
ctx->inputs[0]->hw_frames_ctx;
404
406 if (!
ctx->outputs[0]->hw_frames_ctx)
408
410
414 }
415
416 return 0;
417 }
418
420 {
425 },
426 };
427
429 {
433 },
434 };
435
437 .
name =
"thumbnail_cuda",
438 .description =
NULL_IF_CONFIG_SMALL(
"Select the most representative frame in a given sequence of consecutive frames."),
445 .priv_class = &thumbnail_cuda_class,
447 };