1 /*
2 * Copyright (C) 2012 Peng Gao <peng@multicorewareinc.com>
3 * Copyright (C) 2012 Li Cao <li@multicorewareinc.com>
4 * Copyright (C) 2012 Wei Gao <weigao@multicorewareinc.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
28
29 #if HAVE_PTHREADS
30
31 #include <pthread.h>
33
34 #define LOCK_OPENCL pthread_mutex_lock(&atomic_opencl_lock)
35 #define UNLOCK_OPENCL pthread_mutex_unlock(&atomic_opencl_lock)
36
37 #elif !HAVE_THREADS
40 #endif
41
42
43 #define MAX_KERNEL_NUM 500
44 #define MAX_KERNEL_CODE_NUM 200
45
50
57 /**
58 * if set to 1, the OpenCL environment was created by the user and
59 * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
60 */
77
78 #define OFFSET(x) offsetof(OpenclContext, x)
79
81 {
"platform_idx",
"set platform index value",
OFFSET(platform_idx),
AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX},
82 {
"device_idx",
"set device index value",
OFFSET(device_idx),
AV_OPT_TYPE_INT, {.i64=-1}, -1, INT_MAX},
83 {
"build_options",
"build options of opencl",
OFFSET(build_options),
AV_OPT_TYPE_STRING, {.str=
"-I."}, CHAR_MIN, CHAR_MAX},
84 { NULL }
85 };
86
92 .log_level_offset_offset = offsetof(
OpenclContext, log_offset),
93 .parent_log_context_offset = offsetof(
OpenclContext, log_ctx),
94 };
95
97
98 static const cl_device_type
device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_DEFAULT};
99
104
106 {CL_DEVICE_NOT_FOUND, "DEVICE NOT FOUND"},
107 {CL_DEVICE_NOT_AVAILABLE, "DEVICE NOT AVAILABLE"},
108 {CL_COMPILER_NOT_AVAILABLE, "COMPILER NOT AVAILABLE"},
109 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "MEM OBJECT ALLOCATION FAILURE"},
110 {CL_OUT_OF_RESOURCES, "OUT OF RESOURCES"},
111 {CL_OUT_OF_HOST_MEMORY, "OUT OF HOST MEMORY"},
112 {CL_PROFILING_INFO_NOT_AVAILABLE, "PROFILING INFO NOT AVAILABLE"},
113 {CL_MEM_COPY_OVERLAP, "MEM COPY OVERLAP"},
114 {CL_IMAGE_FORMAT_MISMATCH, "IMAGE FORMAT MISMATCH"},
115 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "IMAGE FORMAT NOT_SUPPORTED"},
116 {CL_BUILD_PROGRAM_FAILURE, "BUILD PROGRAM FAILURE"},
117 {CL_MAP_FAILURE, "MAP FAILURE"},
118 {CL_MISALIGNED_SUB_BUFFER_OFFSET, "MISALIGNED SUB BUFFER OFFSET"},
119 {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
120 {CL_COMPILE_PROGRAM_FAILURE, "COMPILE PROGRAM FAILURE"},
121 {CL_LINKER_NOT_AVAILABLE, "LINKER NOT AVAILABLE"},
122 {CL_LINK_PROGRAM_FAILURE, "LINK PROGRAM FAILURE"},
123 {CL_DEVICE_PARTITION_FAILED, "DEVICE PARTITION FAILED"},
124 {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "KERNEL ARG INFO NOT AVAILABLE"},
125 {CL_INVALID_VALUE, "INVALID VALUE"},
126 {CL_INVALID_DEVICE_TYPE, "INVALID DEVICE TYPE"},
127 {CL_INVALID_PLATFORM, "INVALID PLATFORM"},
128 {CL_INVALID_DEVICE, "INVALID DEVICE"},
129 {CL_INVALID_CONTEXT, "INVALID CONTEXT"},
130 {CL_INVALID_QUEUE_PROPERTIES, "INVALID QUEUE PROPERTIES"},
131 {CL_INVALID_COMMAND_QUEUE, "INVALID COMMAND QUEUE"},
132 {CL_INVALID_HOST_PTR, "INVALID HOST PTR"},
133 {CL_INVALID_MEM_OBJECT, "INVALID MEM OBJECT"},
134 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "INVALID IMAGE FORMAT DESCRIPTOR"},
135 {CL_INVALID_IMAGE_SIZE, "INVALID IMAGE SIZE"},
136 {CL_INVALID_SAMPLER, "INVALID SAMPLER"},
137 {CL_INVALID_BINARY, "INVALID BINARY"},
138 {CL_INVALID_BUILD_OPTIONS, "INVALID BUILD OPTIONS"},
139 {CL_INVALID_PROGRAM, "INVALID PROGRAM"},
140 {CL_INVALID_PROGRAM_EXECUTABLE, "INVALID PROGRAM EXECUTABLE"},
141 {CL_INVALID_KERNEL_NAME, "INVALID KERNEL NAME"},
142 {CL_INVALID_KERNEL_DEFINITION, "INVALID KERNEL DEFINITION"},
143 {CL_INVALID_KERNEL, "INVALID KERNEL"},
144 {CL_INVALID_ARG_INDEX, "INVALID ARG INDEX"},
145 {CL_INVALID_ARG_VALUE, "INVALID ARG VALUE"},
146 {CL_INVALID_ARG_SIZE, "INVALID ARG_SIZE"},
147 {CL_INVALID_KERNEL_ARGS, "INVALID KERNEL ARGS"},
148 {CL_INVALID_WORK_DIMENSION, "INVALID WORK DIMENSION"},
149 {CL_INVALID_WORK_GROUP_SIZE, "INVALID WORK GROUP SIZE"},
150 {CL_INVALID_WORK_ITEM_SIZE, "INVALID WORK ITEM SIZE"},
151 {CL_INVALID_GLOBAL_OFFSET, "INVALID GLOBAL OFFSET"},
152 {CL_INVALID_EVENT_WAIT_LIST, "INVALID EVENT WAIT LIST"},
153 {CL_INVALID_EVENT, "INVALID EVENT"},
154 {CL_INVALID_OPERATION, "INVALID OPERATION"},
155 {CL_INVALID_GL_OBJECT, "INVALID GL OBJECT"},
156 {CL_INVALID_BUFFER_SIZE, "INVALID BUFFER SIZE"},
157 {CL_INVALID_MIP_LEVEL, "INVALID MIP LEVEL"},
158 {CL_INVALID_GLOBAL_WORK_SIZE, "INVALID GLOBAL WORK SIZE"},
159 {CL_INVALID_PROPERTY, "INVALID PROPERTY"},
160 {CL_INVALID_IMAGE_DESCRIPTOR, "INVALID IMAGE DESCRIPTOR"},
161 {CL_INVALID_COMPILER_OPTIONS, "INVALID COMPILER OPTIONS"},
162 {CL_INVALID_LINKER_OPTIONS, "INVALID LINKER OPTIONS"},
163 {CL_INVALID_DEVICE_PARTITION_COUNT, "INVALID DEVICE PARTITION COUNT"},
164 };
165
167 {
168 int i;
170 if (opencl_err_msg[i].err_code == status)
171 return opencl_err_msg[i].
err_str;
172 }
173 return "unknown error";
174 }
175
177 {
178 int i, j;
179 if (!device_list)
180 return;
183 continue;
186 }
189 }
192 }
193
195 {
196 cl_int status;
197 int i, j, k, device_num, total_devices_num,
ret = 0;
198 int *devices_num;
199 cl_platform_id *platform_ids = NULL;
200 cl_device_id *device_ids = NULL;
202 status = clGetPlatformIDs(0, NULL, &device_list->
platform_num);
203 if (status != CL_SUCCESS) {
207 }
209 if (!platform_ids)
211 status = clGetPlatformIDs(device_list->
platform_num, platform_ids, NULL);
212 if (status != CL_SUCCESS) {
217 }
222 }
224 if (!devices_num) {
227 }
233 }
235 status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
238 total_devices_num = 0;
242 total_devices_num += devices_num[j];
243 }
248 }
250 if (devices_num[j]) {
251 device_ids =
av_mallocz(devices_num[j] *
sizeof(cl_device_id));
252 if (!device_ids) {
255 }
257 devices_num[j], device_ids, NULL);
258 if (status != CL_SUCCESS) {
262 continue;
263 }
264 for (k = 0; k < devices_num[j]; k++) {
270 }
274 status = clGetDeviceInfo(device_node->
device_id, CL_DEVICE_NAME,
276 NULL);
277 if (status != CL_SUCCESS) {
280 continue;
281 }
283 }
285 }
286 }
287 }
292 if (ret < 0)
295 }
296
298 {
301 if (!(*device_list)) {
304 }
306 if (ret < 0) {
311 }
313 }
314
316 {
319 }
320
322 {
328 }
332 }
333
335 {
338 ret =
av_opt_get(&opencl_ctx, key, 0, out_val);
341 }
342
344 {
345 /*FIXME: free openclutils context*/
349 }
350
352 {
354 if (!ext) {
356 "Could not malloc external opencl environment data space\n");
357 }
358 return ext;
359 }
360
362 {
364 }
365
367 {
372 "Could not register kernel code, maximum number of registered kernel code %d already reached\n",
376 }
381 }
382 }
389 }
390
392 {
393 cl_int status;
397 av_log(&opencl_ctx,
AV_LOG_ERROR,
"Created kernel name %s is too long\n", kernel_name);
400 }
404 "Could not create kernel with name '%s', maximum number of kernels %d already reached\n",
408 }
410 av_log(&opencl_ctx,
AV_LOG_ERROR,
"Program count of OpenCL is 0, can not create kernel\n");
413 }
415 env->
kernel = clCreateKernel(opencl_ctx.
programs[i], kernel_name, &status);
416 if (status == CL_SUCCESS)
417 break;
418 }
419 if (status != CL_SUCCESS) {
423 }
427 }
431 }
432
434 {
435 cl_int status;
439 status = clReleaseKernel(env->
kernel);
440 if (status != CL_SUCCESS) {
443 }
450 }
451
453 {
454 cl_int status;
455 cl_context_properties cps[3];
458
459 if (ext_opencl_env) {
461 return 0;
468 } else {
472 if (ret < 0) {
474 }
475 }
480 }
482 av_log(opencl_ctx,
AV_LOG_ERROR,
"No devices in user specific platform with index %d\n",
485 }
487 } else {
488 /* get a usable platform by default*/
493 break;
494 }
495 }
496 }
500 }
501 /* get a usable device*/
505 "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->
platform_idx);
507 }
508 } else {
510 }
511
515
516 /*
517 * Use available platform.
518 */
522 cps[0] = CL_CONTEXT_PLATFORM;
523 cps[1] = (cl_context_properties)opencl_ctx->
platform_id;
524 cps[2] = 0;
525
527 NULL, NULL, &status);
528 if (status != CL_SUCCESS) {
530 "Could not get OpenCL context from device type: %s\n",
av_opencl_errstr(status));
532 }
534 0, &status);
535 if (status != CL_SUCCESS) {
539 }
540 }
541 }
543 }
544
546 {
547 cl_int status;
548 int i, kernel_code_count = 0;
551
557 kernel_code_count++;
558 }
559 }
560 if (!kernel_code_count)
561 return 0;
562 /* create a CL program using the kernel source */
564 kernel_code_count,
565 kernel_code,
566 kernel_code_len,
567 &status);
568 if(status != CL_SUCCESS) {
570 "Could not create OpenCL program with source code: %s\n",
av_opencl_errstr(status));
572 }
576 }
579 if (status != CL_SUCCESS) {
583 }
585 return 0;
586 }
587
589 {
596 }
598 if (ret < 0)
600 }
602 if (ret < 0)
606 "No kernel code is registered, compile kernel file failed\n");
609 }
611
615 }
616
618 {
619 cl_int status;
620 int i;
629 status = clReleaseProgram(opencl_ctx.
programs[i]);
630 if (status != CL_SUCCESS) {
633 }
635 }
636 }
639 if (status != CL_SUCCESS) {
642 }
644 }
646 status = clReleaseContext(opencl_ctx.
context);
647 if (status != CL_SUCCESS) {
650 }
652 }
656 av_opt_free(&opencl_ctx);
//FIXME: free openclutils context
658 }
659
661 {
662 cl_int status;
663 *cl_buf = clCreateBuffer(opencl_ctx.
context, flags, cl_buf_size, host_ptr, &status);
664 if (status != CL_SUCCESS) {
667 }
668 return 0;
669 }
670
672 {
673 cl_int status = 0;
674 if (!cl_buf)
675 return;
676 status = clReleaseMemObject(*cl_buf);
677 if (status != CL_SUCCESS) {
680 }
681 memset(cl_buf, 0, sizeof(*cl_buf));
682 }
683
685 {
686 cl_int status;
687 void *mapped = clEnqueueMapBuffer(opencl_ctx.
command_queue, dst_cl_buf,
688 CL_TRUE, CL_MAP_WRITE, 0,
sizeof(
uint8_t) * buf_size,
689 0, NULL, NULL, &status);
690
691 if (status != CL_SUCCESS) {
695 }
696 memcpy(mapped, src_buf, buf_size);
697
698 status = clEnqueueUnmapMemObject(opencl_ctx.
command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
699 if (status != CL_SUCCESS) {
703 }
704 return 0;
705 }
706
708 {
709 cl_int status;
710 void *mapped = clEnqueueMapBuffer(opencl_ctx.
command_queue, src_cl_buf,
711 CL_TRUE, CL_MAP_READ, 0, buf_size,
712 0, NULL, NULL, &status);
713
714 if (status != CL_SUCCESS) {
718 }
719 memcpy(dst_buf, mapped, buf_size);
720
721 status = clEnqueueUnmapMemObject(opencl_ctx.
command_queue, src_cl_buf, mapped, 0, NULL, NULL);
722 if (status != CL_SUCCESS) {
726 }
727 return 0;
728 }
729
731 uint8_t **src_data,
int *plane_size,
int plane_num)
732 {
733 int i, buffer_size = 0;
735 cl_int status;
736 void *mapped;
737 if ((unsigned int)plane_num > 8) {
739 }
740 for (i = 0;i < plane_num;i++) {
741 buffer_size += plane_size[i];
742 }
743 if (buffer_size > cl_buffer_size) {
745 "Cannot write image to OpenCL buffer: buffer too small\n");
747 }
748 mapped = clEnqueueMapBuffer(opencl_ctx.
command_queue, dst_cl_buf,
749 CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset,
750 0, NULL, NULL, &status);
751 if (status != CL_SUCCESS) {
755 }
756 temp = mapped;
757 temp += dst_cl_offset;
758 for (i = 0; i < plane_num; i++) {
759 memcpy(temp, src_data[i], plane_size[i]);
760 temp += plane_size[i];
761 }
762 status = clEnqueueUnmapMemObject(opencl_ctx.
command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
763 if (status != CL_SUCCESS) {
767 }
768 return 0;
769 }
770
772 cl_mem src_cl_buf, size_t cl_buffer_size)
773 {
774 int i,buffer_size = 0,
ret = 0;
776 void *mapped;
777 cl_int status;
778 if ((unsigned int)plane_num > 8) {
780 }
781 for (i = 0; i < plane_num; i++) {
782 buffer_size += plane_size[i];
783 }
784 if (buffer_size > cl_buffer_size) {
786 "Cannot write image to CPU buffer: OpenCL buffer too small\n");
788 }
789 mapped = clEnqueueMapBuffer(opencl_ctx.
command_queue, src_cl_buf,
790 CL_TRUE, CL_MAP_READ, 0, buffer_size,
791 0, NULL, NULL, &status);
792
793 if (status != CL_SUCCESS) {
797 }
798 temp = mapped;
800 for (i = 0; i < plane_num; i++) {
801 memcpy(dst_data[i], temp, plane_size[i]);
802 temp += plane_size[i];
803 }
804 }
805 status = clEnqueueUnmapMemObject(opencl_ctx.
command_queue, src_cl_buf, mapped, 0, NULL, NULL);
806 if (status != CL_SUCCESS) {
810 }
811 return 0;
812 }