1//===- OffloadWrapper.cpp ---------------------------------------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
40/// Magic number that begins the section containing the CUDA fatbinary.
41constexpr unsigned CudaFatMagic = 0x466243b1;
42constexpr unsigned HIPFatMagic = 0x48495046;
45 return M.getDataLayout().getIntPtrType(M.getContext());
48// struct __tgt_device_image {
51// __tgt_offload_entry *EntriesBegin;
52// __tgt_offload_entry *EntriesEnd;
69// struct __tgt_bin_desc {
70// int32_t NumDeviceImages;
71// __tgt_device_image *DeviceImages;
72// __tgt_offload_entry *HostEntriesBegin;
73// __tgt_offload_entry *HostEntriesEnd;
89/// Creates binary descriptor for the given device images. Binary descriptor
90/// is an object that is passed to the offloading runtime at program startup
91/// and it describes all device images available in the executable or shared
92/// library. It is defined as follows
94/// __attribute__((visibility("hidden")))
95/// extern __tgt_offload_entry *__start_omp_offloading_entries;
96/// __attribute__((visibility("hidden")))
97/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
99/// static const char Image0[] = { <Bufs.front() contents> };
101/// static const char ImageN[] = { <Bufs.back() contents> };
103/// static const __tgt_device_image Images[] = {
105/// Image0, /*ImageStart*/
106/// Image0 + sizeof(Image0), /*ImageEnd*/
107/// __start_omp_offloading_entries, /*EntriesBegin*/
108/// __stop_omp_offloading_entries /*EntriesEnd*/
112/// ImageN, /*ImageStart*/
113/// ImageN + sizeof(ImageN), /*ImageEnd*/
114/// __start_omp_offloading_entries, /*EntriesBegin*/
115/// __stop_omp_offloading_entries /*EntriesEnd*/
119/// static const __tgt_bin_desc BinDesc = {
120/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
121/// Images, /*DeviceImages*/
122/// __start_omp_offloading_entries, /*HostEntriesBegin*/
123/// __stop_omp_offloading_entries /*HostEntriesEnd*/
126/// Global variable that represents BinDesc is returned.
131 auto [EntriesB, EntriesE] = EntryArray;
136 // Create initializer for the images array.
138 ImagesInits.
reserve(Bufs.size());
140 // We embed the full offloading entry so the binary utilities can parse it.
144 ".omp_offloading.device_image" + Suffix);
146 Image->setSection(Relocatable ?
".llvm.offloading.relocatable"
147 :
".llvm.offloading");
152 "Invalid binary format");
154 // The device image struct contains the pointer to the beginning and end of
155 // the image stored inside of the offload binary. There should only be one
156 // of these for each buffer so we parse it out manually.
161 Binary.bytes_begin() + Header->EntryOffset);
163 auto *Begin = ConstantInt::get(
getSizeTTy(M), Entry->ImageOffset);
165 ConstantInt::get(
getSizeTTy(M), Entry->ImageOffset + Entry->ImageSize);
175 ImageE, EntriesB, EntriesE));
178 // Then create images array.
185 ".omp_offloading.device_images" + Suffix);
191 // And finally create the binary descriptor object.
197 return new GlobalVariable(M, DescInit->getType(),
/*isConstant=*/true,
199 ".omp_offloading.descriptor" + Suffix);
208 ".omp_offloading.descriptor_unreg" + Suffix, &M);
209 Func->setSection(
".text.startup");
211 // Get __tgt_unregister_lib function declaration.
215 M.getOrInsertFunction(
"__tgt_unregister_lib", UnRegFuncTy);
217 // Construct function body
219 Builder.CreateCall(UnRegFuncC, BinDesc);
220 Builder.CreateRetVoid();
230 ".omp_offloading.descriptor_reg" + Suffix, &M);
231 Func->setSection(
".text.startup");
233 // Get __tgt_register_lib function declaration.
237 M.getOrInsertFunction(
"__tgt_register_lib", RegFuncTy);
241 FunctionCallee AtExit = M.getOrInsertFunction(
"atexit", AtExitTy);
243 Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
245 // Construct function body
248 Builder.CreateCall(RegFuncC, BinDesc);
250 // Register the destructors with 'atexit'. This is expected by the CUDA
251 // runtime and ensures that we clean up before dynamic objects are destroyed.
252 // This needs to be done after plugin initialization to ensure that it is
253 // called before the plugin runtime is destroyed.
254 Builder.CreateCall(AtExit, UnregFunc);
255 Builder.CreateRetVoid();
257 // Add this function to constructors.
261// struct fatbin_wrapper {
277/// Embed the image \p Image into the module \p M so it can be found by the
285 // Create the global string containing the fatbinary.
287 IsHIP ?
".hip_fatbin"
292 ".fatbin_image" + Suffix);
293 Fatbin->setSection(FatbinConstantSection);
295 // Create the fatbinary wrapper
296 StringRef FatbinWrapperSection = IsHIP ?
".hipFatBinSegment"
298 :
".nvFatBinSegment";
311 FatbinInitializer,
".fatbin_wrapper" + Suffix);
312 FatbinDesc->setSection(FatbinWrapperSection);
313 FatbinDesc->setAlignment(
Align(8));
318/// Create the register globals function. We will iterate all of the offloading
319/// entries stored at the begin / end symbols and register them according to
320/// their type. This creates the following function in IR:
322/// extern struct __tgt_offload_entry __start_cuda_offloading_entries;
323/// extern struct __tgt_offload_entry __stop_cuda_offloading_entries;
325/// extern void __cudaRegisterFunction(void **, void *, void *, void *, int,
326/// void *, void *, void *, void *, int *);
327/// extern void __cudaRegisterVar(void **, void *, void *, void *, int32_t,
328/// int64_t, int32_t, int32_t);
330/// void __cudaRegisterTest(void **fatbinHandle) {
331/// for (struct __tgt_offload_entry *entry = &__start_cuda_offloading_entries;
332/// entry != &__stop_cuda_offloading_entries; ++entry) {
333/// if (entry->Kind != OFK_CUDA)
337/// __cudaRegisterFunction(fatbinHandle, entry->addr, entry->name,
338/// entry->name, -1, 0, 0, 0, 0, 0);
340/// __cudaRegisterVar(fatbinHandle, entry->addr, entry->name, entry->name,
341/// 0, entry->size, 0, 0);
347 bool EmitSurfacesAndTextures) {
349 auto [EntriesB, EntriesE] = EntryArray;
351 // Get the __cudaRegisterFunction function declaration.
358 Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int32PtrTy},
361 IsHIP ?
"__hipRegisterFunction" :
"__cudaRegisterFunction", RegFuncTy);
363 // Get the __cudaRegisterVar function declaration.
370 IsHIP ?
"__hipRegisterVar" :
"__cudaRegisterVar", RegVarTy);
372 // Get the __cudaRegisterSurface function declaration.
375 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
379 IsHIP ?
"__hipRegisterManagedVar" :
"__cudaRegisterManagedVar",
382 // Get the __cudaRegisterSurface function declaration.
385 {Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
389 IsHIP ?
"__hipRegisterSurface" :
"__cudaRegisterSurface", RegSurfaceTy);
391 // Get the __cudaRegisterTexture function declaration.
398 IsHIP ?
"__hipRegisterTexture" :
"__cudaRegisterTexture", RegTextureTy);
404 IsHIP ?
".hip.globals_reg" :
".cuda.globals_reg", &M);
405 RegGlobalsFn->setSection(
".text.startup");
407 // Create the loop to register all the entries.
420 auto *EntryCmp = Builder.CreateICmpNE(EntriesB, EntriesE);
421 Builder.CreateCondBr(EntryCmp, EntryBB, ExitBB);
422 Builder.SetInsertPoint(EntryBB);
428 auto *Addr = Builder.CreateLoad(Int8PtrTy, AddrPtr,
"addr");
433 auto *AuxAddr = Builder.CreateLoad(Int8PtrTy, AuxAddrPtr,
"aux_addr");
443 auto *
Name = Builder.CreateLoad(Int8PtrTy, NamePtr,
"name");
458 auto *
Data = Builder.CreateTrunc(
461 auto *
Type = Builder.CreateAnd(
464 // Extract the flags stored in the bit-field and convert them to C booleans.
465 auto *ExternBit = Builder.CreateAnd(
468 auto *
Extern = Builder.CreateLShr(
473 auto *
Const = Builder.CreateLShr(
475 auto *NormalizedBit = Builder.CreateAnd(
478 auto *Normalized = Builder.CreateLShr(
480 auto *KindCond = Builder.CreateICmpEQ(
484 Builder.CreateCondBr(KindCond, IfKindBB, IfEndBB);
485 Builder.SetInsertPoint(IfKindBB);
486 auto *FnCond = Builder.CreateICmpEQ(
488 Builder.CreateCondBr(FnCond, IfThenBB, IfElseBB);
490 // Create kernel registration code.
491 Builder.SetInsertPoint(IfThenBB);
492 Builder.CreateCall(RegFunc, {RegGlobalsFn->arg_begin(), Addr,
Name,
Name,
499 Builder.CreateBr(IfEndBB);
500 Builder.SetInsertPoint(IfElseBB);
502 auto *
Switch = Builder.CreateSwitch(
Type, IfEndBB);
503 // Create global variable registration code.
504 Builder.SetInsertPoint(SwGlobalBB);
505 Builder.CreateCall(RegVar,
508 Builder.CreateBr(IfEndBB);
512 // Create managed variable registration code.
513 Builder.SetInsertPoint(SwManagedBB);
514 Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), AuxAddr, Addr,
516 Builder.CreateBr(IfEndBB);
519 // Create surface variable registration code.
520 Builder.SetInsertPoint(SwSurfaceBB);
521 if (EmitSurfacesAndTextures)
522 Builder.CreateCall(RegSurface, {RegGlobalsFn->arg_begin(), Addr,
Name,
Name,
524 Builder.CreateBr(IfEndBB);
528 // Create texture variable registration code.
529 Builder.SetInsertPoint(SwTextureBB);
530 if (EmitSurfacesAndTextures)
531 Builder.CreateCall(RegTexture, {RegGlobalsFn->arg_begin(), Addr,
Name,
Name,
533 Builder.CreateBr(IfEndBB);
537 Builder.SetInsertPoint(IfEndBB);
538 auto *NewEntry = Builder.CreateInBoundsGEP(
540 auto *
Cmp = Builder.CreateICmpEQ(
551 &RegGlobalsFn->getEntryBlock());
552 Entry->addIncoming(NewEntry, IfEndBB);
553 Builder.CreateCondBr(Cmp, ExitBB, EntryBB);
554 Builder.SetInsertPoint(ExitBB);
555 Builder.CreateRetVoid();
560// Create the constructor and destructor to register the fatbinary with the CUDA
565 bool EmitSurfacesAndTextures) {
570 (IsHIP ?
".hip.fatbin_reg" :
".cuda.fatbin_reg") + Suffix, &M);
571 CtorFunc->setSection(
".text.startup");
576 (IsHIP ?
".hip.fatbin_unreg" :
".cuda.fatbin_unreg") + Suffix, &M);
577 DtorFunc->setSection(
".text.startup");
581 // Get the __cudaRegisterFatBinary function declaration.
584 IsHIP ?
"__hipRegisterFatBinary" :
"__cudaRegisterFatBinary", RegFatTy);
585 // Get the __cudaRegisterFatBinaryEnd function declaration.
589 M.getOrInsertFunction(
"__cudaRegisterFatBinaryEnd", RegFatEndTy);
590 // Get the __cudaUnregisterFatBinary function declaration.
594 IsHIP ?
"__hipUnregisterFatBinary" :
"__cudaUnregisterFatBinary",
599 FunctionCallee AtExit = M.getOrInsertFunction(
"atexit", AtExitTy);
604 (IsHIP ?
".hip.binary_handle" :
".cuda.binary_handle") + Suffix);
606 // Create the constructor to register this image with the runtime.
608 CallInst *Handle = CtorBuilder.CreateCall(
611 CtorBuilder.CreateAlignedStore(
612 Handle, BinaryHandleGlobal,
613 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
614 CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP, EntryArray,
616 EmitSurfacesAndTextures),
619 CtorBuilder.CreateCall(RegFatbinEnd, Handle);
620 CtorBuilder.CreateCall(AtExit, DtorFunc);
621 CtorBuilder.CreateRetVoid();
623 // Create the destructor to unregister the image with the runtime. We cannot
624 // use a standard global destructor after CUDA 9.2 so this must be called by
625 // `atexit()` instead.
627 LoadInst *BinaryHandle = DtorBuilder.CreateAlignedLoad(
628 PtrTy, BinaryHandleGlobal,
629 Align(M.getDataLayout().getPointerTypeSize(PtrTy)));
630 DtorBuilder.CreateCall(UnregFatbin, BinaryHandle);
631 DtorBuilder.CreateRetVoid();
633 // Add this function to constructors.
637/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
641 SYCLJITOptions Options;
643 StructType *EntryTy =
nullptr;
644 StructType *SyclDeviceImageTy =
nullptr;
645 StructType *SyclBinDescTy =
nullptr;
647 SYCLWrapper(
Module &M,
const SYCLJITOptions &Options)
650 SyclDeviceImageTy = getSyclDeviceImageTy();
651 SyclBinDescTy = getSyclBinDescTy();
655 switch (M.getDataLayout().getPointerSize()) {
657 return Type::getInt32Ty(C);
659 return Type::getInt64Ty(C);
667 ConstantInt::get(SizeTTy, Second)};
670 /// Note: Properties aren't supported and the support is going
671 /// to be added later.
672 /// Creates a structure corresponding to:
673 /// SYCL specific image descriptor type.
675 /// struct __sycl.tgt_device_image {
676 /// // version of this structure - for backward compatibility;
677 /// // all modifications which change order/type/offsets of existing fields
678 /// // should increment the version.
679 /// uint16_t Version;
680 /// // the kind of offload model the image employs.
681 /// uint8_t OffloadKind;
682 /// // format of the image data - SPIRV, LLVMIR bitcode, etc
684 /// // null-terminated string representation of the device's target
686 /// const char *Arch;
687 /// // a null-terminated string; target- and compiler-specific options
688 /// // which are suggested to use to "compile" program at runtime
689 /// const char *CompileOptions;
690 /// // a null-terminated string; target- and compiler-specific options
691 /// // which are suggested to use to "link" program at runtime
692 /// const char *LinkOptions;
693 /// // Pointer to the device binary image start
694 /// void *ImageStart;
695 /// // Pointer to the device binary image end
697 /// // the entry table
698 /// __tgt_offload_entry *EntriesBegin;
699 /// __tgt_offload_entry *EntriesEnd;
700 /// const char *PropertiesBegin;
701 /// const char *PropertiesEnd;
704 StructType *getSyclDeviceImageTy() {
707 Type::getInt16Ty(C),
// Version
708 Type::getInt8Ty(C),
// OffloadKind
709 Type::getInt8Ty(C),
// Format
710 PointerType::getUnqual(C),
// Arch
711 PointerType::getUnqual(C),
// CompileOptions
712 PointerType::getUnqual(C),
// LinkOptions
713 PointerType::getUnqual(C),
// ImageStart
714 PointerType::getUnqual(C),
// ImageEnd
715 PointerType::getUnqual(C),
// EntriesBegin
716 PointerType::getUnqual(C),
// EntriesEnd
717 PointerType::getUnqual(C),
// PropertiesBegin
718 PointerType::getUnqual(C)
// PropertiesEnd
720 "__sycl.tgt_device_image");
723 /// Creates a structure for SYCL specific binary descriptor type. Corresponds
727 /// struct __sycl.tgt_bin_desc {
728 /// // version of this structure - for backward compatibility;
729 /// // all modifications which change order/type/offsets of existing fields
730 /// // should increment the version.
731 /// uint16_t Version;
732 /// uint16_t NumDeviceImages;
733 /// __sycl.tgt_device_image *DeviceImages;
734 /// // the offload entry table
735 /// __tgt_offload_entry *HostEntriesBegin;
736 /// __tgt_offload_entry *HostEntriesEnd;
739 StructType *getSyclBinDescTy() {
741 {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
742 PointerType::getUnqual(C), PointerType::getUnqual(C)},
743 "__sycl.tgt_bin_desc");
746 /// Adds a global readonly variable that is initialized by given
747 /// \p Initializer to the module.
748 GlobalVariable *addGlobalArrayVariable(
const Twine &Name,
749 ArrayRef<char> Initializer,
750 const Twine &Section =
"") {
752 auto *Var =
new GlobalVariable(M, Arr->getType(),
/*isConstant*/ true,
753 GlobalVariable::InternalLinkage, Arr, Name);
754 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
756 SmallVector<char, 32> NameBuf;
759 Var->setSection(SectionName);
763 /// Adds given \p Buf as a global variable into the module.
764 /// \returns Pair of pointers that point at the beginning and the end of the
766 std::pair<Constant *, Constant *>
767 addArrayToModule(ArrayRef<char> Buf,
const Twine &Name,
768 const Twine &Section =
"") {
769 auto *Var = addGlobalArrayVariable(Name, Buf, Section);
771 getSizetConstPair(0, 0));
773 Var->getValueType(), Var, getSizetConstPair(0, Buf.
size()));
774 return std::make_pair(ImageB, ImageE);
777 /// Adds given \p Data as constant byte array in the module.
778 /// \returns Constant pointer to the added data. The pointer type does not
779 /// carry size information.
780 Constant *addRawDataToModule(ArrayRef<char>
Data,
const Twine &Name) {
781 auto *Var = addGlobalArrayVariable(Name,
Data);
783 getSizetConstPair(0, 0));
787 /// Creates a global variable of const char* type and creates an
788 /// initializer that initializes it with \p Str.
790 /// \returns Link-time constant pointer (constant expr) to that
792 Constant *addStringToModule(StringRef Str,
const Twine &Name) {
794 auto *Var =
new GlobalVariable(M, Arr->getType(),
/*isConstant*/ true,
795 GlobalVariable::InternalLinkage, Arr, Name);
796 Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
802 /// Each image contains its own set of symbols, which may contain different
803 /// symbols than other images. This function constructs an array of
804 /// symbol entries for a particular image.
806 /// \returns Pointers to the beginning and end of the array.
807 std::pair<Constant *, Constant *>
808 initOffloadEntriesPerImage(StringRef Entries,
const Twine &OffloadKindTag) {
811 Entries,
/*BufferName*/ "",
/*RequiresNullTerminator*/ false);
812 for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) {
816 /*Name*/ *LI,
/*Size*/ 0,
817 /*Flags*/ 0,
/*Data*/ 0);
823 auto *EntriesGV =
new GlobalVariable(M, Arr->getType(),
/*isConstant*/ true,
824 GlobalVariable::InternalLinkage, Arr,
825 OffloadKindTag +
"entries_arr");
828 EntriesGV->getValueType(), EntriesGV, getSizetConstPair(0, 0));
830 EntriesGV->getValueType(), EntriesGV,
831 getSizetConstPair(0, EntriesInits.
size()));
832 return std::make_pair(EntriesB, EntriesE);
835 Constant *wrapImage(
const OffloadBinary &OB,
const Twine &ImageID,
836 StringRef OffloadKindTag) {
837 // Note: Intel DPC++ compiler had 2 versions of this structure
838 // and clang++ has a third different structure. To avoid ABI incompatibility
839 // between generated device images the Version here starts from 3.
840 constexpr uint16_t DeviceImageStructVersion = 3;
842 ConstantInt::get(Type::getInt16Ty(C), DeviceImageStructVersion);
843 Constant *OffloadKindConstant = ConstantInt::get(
844 Type::getInt8Ty(C),
static_cast<uint8_t
>(
OB.getOffloadKind()));
845 Constant *ImageKindConstant = ConstantInt::get(
846 Type::getInt8Ty(C),
static_cast<uint8_t
>(
OB.getImageKind()));
847 StringRef Triple =
OB.getString(
"triple");
849 addStringToModule(Triple, Twine(OffloadKindTag) +
"target." + ImageID);
851 addStringToModule(Options.CompileOptions,
852 Twine(OffloadKindTag) +
"opts.compile." + ImageID);
853 Constant *LinkOptions = addStringToModule(
854 Options.LinkOptions, Twine(OffloadKindTag) +
"opts.link." + ImageID);
856 // Note: NULL for now.
857 std::pair<Constant *, Constant *> PropertiesConstants = {
861 StringRef RawImage =
OB.getImage();
862 std::pair<Constant *, Constant *>
Binary = addArrayToModule(
863 ArrayRef<char>(RawImage.
begin(), RawImage.
end()),
864 Twine(OffloadKindTag) + ImageID +
".data",
".llvm.offloading");
866 // For SYCL images offload entries are defined here per image.
867 std::pair<Constant *, Constant *> ImageEntriesPtrs =
868 initOffloadEntriesPerImage(
OB.getString(
"symbols"), OffloadKindTag);
870 SyclDeviceImageTy,
Version, OffloadKindConstant, ImageKindConstant,
871 TripleConstant, CompileOptions, LinkOptions,
Binary.first,
872 Binary.second, ImageEntriesPtrs.first, ImageEntriesPtrs.second,
873 PropertiesConstants.first, PropertiesConstants.second);
875 return WrappedBinary;
879 StringRef OffloadKindTag) {
881 ArrayType::get(SyclDeviceImageTy, WrappedImages.
size()), WrappedImages);
883 new GlobalVariable(M, ImagesData->getType(),
/*isConstant*/ true,
885 Twine(OffloadKindTag) +
"device_images");
886 ImagesGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
895 static constexpr uint16_t BinDescStructVersion = 1;
898 ConstantInt::get(Type::getInt16Ty(C), BinDescStructVersion),
899 ConstantInt::get(Type::getInt16Ty(C), WrappedImages.
size()), ImagesB,
902 return new GlobalVariable(M, DescInit->getType(),
/*isConstant*/ true,
904 Twine(OffloadKindTag) +
"descriptor");
907 /// Creates binary descriptor for the given device images. Binary descriptor
908 /// is an object that is passed to the offloading runtime at program startup
909 /// and it describes all device images available in the executable or shared
910 /// library. It is defined as follows:
913 /// __attribute__((visibility("hidden")))
914 /// __tgt_offload_entry *__sycl_offload_entries_arr0[];
916 /// __attribute__((visibility("hidden")))
917 /// __tgt_offload_entry *__sycl_offload_entries_arrN[];
919 /// __attribute__((visibility("hidden")))
920 /// extern const char *CompileOptions = "...";
922 /// __attribute__((visibility("hidden")))
923 /// extern const char *LinkOptions = "...";
926 /// static const char Image0[] = { ... };
928 /// static const char ImageN[] = { ... };
930 /// static const __sycl.tgt_device_image Images[] = {
932 /// Version, // Version
933 /// OffloadKind, // OffloadKind
934 /// Format, // Format of the image.
935 // TripleString, // Arch
936 /// CompileOptions, // CompileOptions
937 /// LinkOptions, // LinkOptions
938 /// Image0, // ImageStart
939 /// Image0 + IMAGE0_SIZE, // ImageEnd
940 /// __sycl_offload_entries_arr0, // EntriesBegin
941 /// __sycl_offload_entries_arr0 + ENTRIES0_SIZE, // EntriesEnd
942 /// NULL, // PropertiesBegin
943 /// NULL, // PropertiesEnd
948 /// static const __sycl.tgt_bin_desc FatbinDesc = {
949 /// Version, //Version
950 /// sizeof(Images) / sizeof(Images[0]), //NumDeviceImages
951 /// Images, //DeviceImages
952 /// NULL, //HostEntriesBegin
953 /// NULL //HostEntriesEnd
957 /// \returns Global variable that represents FatbinDesc.
959 StringRef OffloadKindTag =
".sycl_offloading.";
962 for (
size_t I = 0,
E = OffloadFiles.
size();
I !=
E; ++
I)
964 wrapImage(*OffloadFiles[
I].getBinary(), Twine(
I), OffloadKindTag));
966 return combineWrappedImages(WrappedImages, OffloadKindTag);
969 void createRegisterFatbinFunction(GlobalVariable *FatbinDesc) {
970 auto *FuncTy = FunctionType::get(Type::getVoidTy(C),
/*isVarArg*/ false);
972 Twine(
"sycl") +
".descriptor_reg", &M);
973 Func->setSection(
".text.startup");
975 // Get RegFuncName function declaration.
977 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
979 FunctionCallee RegFuncC =
980 M.getOrInsertFunction(
"__sycl_register_lib", RegFuncTy);
982 // Construct function body
984 Builder.CreateCall(RegFuncC, FatbinDesc);
985 Builder.CreateRetVoid();
987 // Add this function to constructors.
991 void createUnregisterFunction(GlobalVariable *FatbinDesc) {
992 auto *FuncTy = FunctionType::get(Type::getVoidTy(C),
/*isVarArg*/ false);
994 "sycl.descriptor_unreg", &M);
995 Func->setSection(
".text.startup");
997 // Get UnregFuncName function declaration.
999 FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C),
1000 /*isVarArg=*/false);
1001 FunctionCallee UnRegFuncC =
1002 M.getOrInsertFunction(
"__sycl_unregister_lib", UnRegFuncTy);
1004 // Construct function body
1006 Builder.CreateCall(UnRegFuncC, FatbinDesc);
1007 Builder.CreateRetVoid();
1009 // Add this function to global destructors.
1012};
// end of SYCLWrapper
1020 createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
1023 "No binary descriptors created.");
1024 createRegisterFunction(M,
Desc, Suffix);
1031 bool EmitSurfacesAndTextures) {
1035 "No fatbin section created.");
1037 createRegisterFatbinFunction(M,
Desc,
/*IsHip=*/false, EntryArray, Suffix,
1038 EmitSurfacesAndTextures);
1044 bool EmitSurfacesAndTextures) {
1048 "No fatbin section created.");
1050 createRegisterFatbinFunction(M,
Desc,
/*IsHip=*/true, EntryArray, Suffix,
1051 EmitSurfacesAndTextures);
1067 "No binary descriptors created.");
1069 W.createRegisterFatbinFunction(
Desc);
1070 W.createUnregisterFunction(
Desc);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static IntegerType * getSizeTTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
@ InternalLinkage
Rename collisions when linking (static functions).
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Class to represent struct types.
static LLVM_ABI StructType * getTypeByName(LLVMContext &C, StringRef Name)
Return the type with the specified name, or null if there is none by that name.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Triple - Helper class for working with autoconf configuration names.
bool isMacOSX() const
Is this a Mac OS X triple.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
static uint64_t getAlignment()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI Error extractOffloadBinaries(MemoryBufferRef Buffer, SmallVectorImpl< OffloadFile > &Binaries)
Extracts embedded device offloading code from a memory Buffer to a list of Binaries.
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
LLVM_ABI StructType * getEntryTy(Module &M)
Returns the type of the offloading entry we use to store kernels and globals that will be registered ...
LLVM_ABI llvm::Error wrapSYCLBinaries(llvm::Module &M, llvm::ArrayRef< char > Buffer, SYCLJITOptions Options=SYCLJITOptions())
Wraps OffloadBinaries in the given Buffers into the module M as global symbols and registers the imag...
@ OffloadGlobalSurfaceEntry
Mark the entry as a surface variable.
@ OffloadGlobalTextureEntry
Mark the entry as a texture variable.
@ OffloadGlobalNormalized
Mark the entry as being a normalized surface.
@ OffloadGlobalEntry
Mark the entry as a global entry.
@ OffloadGlobalManagedEntry
Mark the entry as a managed global variable.
@ OffloadGlobalExtern
Mark the entry as being extern.
@ OffloadGlobalConstant
Mark the entry as being constant.
LLVM_ABI llvm::Error wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef< llvm::ArrayRef< char > > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool Relocatable=false)
Wraps the input device images into the module M as global symbols and registers the images with the O...
std::pair< GlobalVariable *, GlobalVariable * > EntryArrayTy
LLVM_ABI llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input bundled image into the module M as global symbols and registers the images with the H...
LLVM_ABI llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef< char > Images, EntryArrayTy EntryArray, llvm::StringRef Suffix="", bool EmitSurfacesAndTextures=true)
Wraps the input fatbinary image into the module M as global symbols and registers the images with the...
NodeAddr< FuncNode * > Func
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI void appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Same as appendToGlobalCtors(), but for global dtors.
@ Extern
Replace returns with jump to thunk, don't emit thunk.
This struct is a compact representation of a valid (non-zero power of two) alignment.
@ offload_binary
LLVM offload object file.