1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
32// TODO -- delete this flag once we have more robust mechanisms to allocate the
33// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases
34// where it is better to produce the VGPR form (e.g. if there are VGPR users
35// of the MFMA result).
37 "amdgpu-mfma-vgpr-form",
38 cl::desc(
"Whether to force use VGPR for Opc and Dest of MFMA. If "
39 "unspecified, default to compiler heuristics"),
53 UserSGPRInfo(
F, *STI), WorkGroupIDX(
false), WorkGroupIDY(
false),
55 PrivateSegmentWaveByteOffset(
false), WorkItemIDX(
false),
57 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0),
58 IsWholeWaveFunction(
F.getCallingConv() ==
61 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(
F);
62 WavesPerEU = ST.getWavesPerEU(
F);
63 MaxNumWorkGroups = ST.getMaxNumWorkGroups(
F);
64 assert(MaxNumWorkGroups.size() == 3);
66 // Temporarily check both the attribute and the subtarget feature, until the
67 // latter is completely removed.
69 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
70 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
72 Occupancy = ST.computeOccupancy(
F,
getLDSSize()).second;
75 VRegFlags.reserve(1024);
87 if (ST.hasGFX90AInsts()) {
88 // FIXME: Extract logic out of getMaxNumVectorRegs; we need to apply the
89 // allocation granule and clamping.
90 auto [MinNumAGPRAttr, MaxNumAGPRAttr] =
92 /*OnlyFirstRequired=*/true);
93 MinNumAGPRs = MinNumAGPRAttr;
97 // Chain functions don't receive an SP from their caller, but are free to
98 // set one up. For now, we can use s32 to match what amdgpu_gfx functions
99 // would use if called, but this can be revisited.
100 // FIXME: Only reserve this if we actually need it.
101 StackPtrOffsetReg = AMDGPU::SGPR32;
103 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
105 ArgInfo.PrivateSegmentBuffer =
108 ImplicitArgPtr =
false;
114 FrameOffsetReg = AMDGPU::SGPR33;
115 StackPtrOffsetReg = AMDGPU::SGPR32;
117 if (!ST.enableFlatScratch()) {
118 // Non-entry functions have no special inputs for now, other registers
119 // required for scratch access.
120 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
122 ArgInfo.PrivateSegmentBuffer =
126 if (!
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
127 ImplicitArgPtr =
true;
129 ImplicitArgPtr =
false;
136 ST.hasArchitectedSGPRs())) {
137 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workgroup-id-x") ||
138 !
F.hasFnAttribute(
"amdgpu-no-cluster-id-x"))
141 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-y") ||
142 !
F.hasFnAttribute(
"amdgpu-no-cluster-id-y"))
145 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-z") ||
146 !
F.hasFnAttribute(
"amdgpu-no-cluster-id-z"))
151 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workitem-id-x"))
154 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-y") &&
155 ST.getMaxWorkitemID(
F, 1) != 0)
158 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-z") &&
159 ST.getMaxWorkitemID(
F, 2) != 0)
162 if (!IsKernel && !
F.hasFnAttribute(
"amdgpu-no-lds-kernel-id"))
167 // X, XY, and XYZ are the only supported combinations, so make sure Y is
172 if (!ST.flatScratchIsArchitected()) {
173 PrivateSegmentWaveByteOffset =
true;
175 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
178 ArgInfo.PrivateSegmentWaveByteOffset =
183 Attribute A =
F.getFnAttribute(
"amdgpu-git-ptr-high");
188 A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
189 S =
A.getValueAsString();
193 MaxMemoryClusterDWords =
F.getFnAttributeAsParsedInteger(
196 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
197 // VGPR available at all times. For now, reserve highest available VGPR. After
198 // RA, shift it to the lowest available unused VGPR if the one exist.
199 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
201 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(
F) - 1);
222 ArgInfo.PrivateSegmentBuffer =
224 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
226 return ArgInfo.PrivateSegmentBuffer.getRegister();
231 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
233 return ArgInfo.DispatchPtr.getRegister();
238 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
240 return ArgInfo.QueuePtr.getRegister();
244 ArgInfo.KernargSegmentPtr
246 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248 return ArgInfo.KernargSegmentPtr.getRegister();
253 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
255 return ArgInfo.DispatchID.getRegister();
260 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
262 return ArgInfo.FlatScratchInit.getRegister();
268 return ArgInfo.PrivateSegmentSize.getRegister();
273 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
275 return ArgInfo.ImplicitBufferPtr.getRegister();
281 return ArgInfo.LDSKernelId.getRegister();
286 unsigned AllocSizeDWord,
int KernArgIdx,
int PaddingSGPRs) {
287 auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(KernArgIdx);
288 assert(Inserted &&
"Preload kernel argument allocated twice.");
289 NumUserSGPRs += PaddingSGPRs;
290 // If the available register tuples are aligned with the kernarg to be
291 // preloaded use that register, otherwise we need to use a set of SGPRs and
293 if (!ArgInfo.FirstKernArgPreloadReg)
294 ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();
296 TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
297 auto &Regs = It->second.Regs;
299 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
300 Regs.push_back(PreloadReg);
301 NumUserSGPRs += AllocSizeDWord;
303 Regs.reserve(AllocSizeDWord);
304 for (
unsigned I = 0;
I < AllocSizeDWord; ++
I) {
305 Regs.push_back(getNextUserSGPR());
310 // Track the actual number of SGPRs that HW will preload to.
311 UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
317 // Skip if it is an entry function or the register is already added.
321 // Skip if this is a function with the amdgpu_cs_chain or
322 // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
323 // We never need to allocate a spill for these because we don't even need to
324 // restore the inactive lanes for them (they're scratchier than the usual
325 // scratch registers). We only need to do this if we have calls to
326 // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since
327 // chain functions do not return) and the function did not contain a call to
328 // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes
329 // when entering the function).
335 WWMSpills.insert(std::make_pair(
339// Separate out the callee-saved and scratch registers.
345 for (
auto &Reg : WWMSpills) {
347 CalleeSavedRegs.push_back(Reg);
349 ScratchRegs.push_back(Reg);
355 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
356 if (CSRegs[
I] == Reg)
368 for (
unsigned I = 0, E = WWMVGPRs.
size();
I < E; ++
I) {
371 TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
372 if (!NewReg || NewReg >= Reg)
375 MRI.replaceRegWith(Reg, NewReg);
377 // Update various tables with the new VGPR.
378 WWMVGPRs[
I] = NewReg;
379 WWMReservedRegs.remove(Reg);
380 WWMReservedRegs.insert(NewReg);
381 MRI.reserveReg(NewReg,
TRI);
383 // Replace the register in SpillPhysVGPRs. This is needed to look for free
384 // lanes while spilling special SGPRs like FP, BP, etc. during PEI.
385 auto *RegItr =
llvm::find(SpillPhysVGPRs, Reg);
386 if (RegItr != SpillPhysVGPRs.end()) {
387 unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);
388 SpillPhysVGPRs[Idx] = NewReg;
391 // The generic `determineCalleeSaves` might have set the old register if it
392 // is in the CSR range.
393 SavedVGPRs.
reset(Reg);
396 MBB.removeLiveIn(Reg);
397 MBB.sortUniqueLiveIns();
404bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
409 LaneVGPR =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
410 SpillVGPRs.push_back(LaneVGPR);
412 LaneVGPR = SpillVGPRs.back();
415 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
419bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
420 MachineFunction &MF,
int FI,
unsigned LaneIndex,
bool IsPrologEpilog) {
422 const SIRegisterInfo *
TRI =
ST.getRegisterInfo();
426 // Find the highest available register if called before RA to ensure the
427 // lowest registers are available for allocation. The LaneVGPR, in that
428 // case, will be shifted back to the lowest range after VGPR allocation.
429 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF,
431 if (LaneVGPR == AMDGPU::NoRegister) {
432 // We have no VGPRs left for spilling SGPRs. Reset because we will not
433 // partially spill the SGPR to VGPRs.
434 SGPRSpillsToPhysicalVGPRLanes.erase(FI);
442 for (MachineBasicBlock &
MBB : MF) {
446 SpillPhysVGPRs.push_back(LaneVGPR);
448 LaneVGPR = SpillPhysVGPRs.back();
451 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
457 bool IsPrologEpilog) {
458 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
459 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
460 : SGPRSpillsToVirtualVGPRLanes[FI];
462 // This has already been allocated.
463 if (!SpillLanes.empty())
468 unsigned WaveSize = ST.getWavefrontSize();
470 unsigned Size = FrameInfo.getObjectSize(FI);
471 unsigned NumLanes =
Size / 4;
473 if (NumLanes > WaveSize)
476 assert(
Size >= 4 &&
"invalid sgpr spill size");
477 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
478 "not spilling SGPRs to VGPRs");
480 unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
481 : NumVirtualVGPRSpillLanes;
483 for (
unsigned I = 0;
I < NumLanes; ++
I, ++NumSpillLanes) {
484 unsigned LaneIndex = (NumSpillLanes % WaveSize);
486 bool Allocated = SpillToPhysVGPRLane
487 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
489 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
499/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
500/// Either AGPR is spilled to VGPR to vice versa.
501/// Returns true if a \p FI can be eliminated completely.
511 auto &Spill = VGPRToAGPRSpills[FI];
513 // This has already been allocated.
514 if (!Spill.Lanes.empty())
515 return Spill.FullyAllocated;
518 unsigned NumLanes =
Size / 4;
519 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
522 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
525 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
527 Spill.FullyAllocated =
true;
529 // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
539 // TODO: Should include register tuples, but doesn't matter with current
542 OtherUsedRegs.
set(Reg);
544 OtherUsedRegs.
set(Reg);
547 for (
int I = NumLanes - 1;
I >= 0; --
I) {
548 NextSpillReg = std::find_if(
550 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
554 if (NextSpillReg == Regs.
end()) {
// Registers exhausted
555 Spill.FullyAllocated =
false;
559 OtherUsedRegs.
set(*NextSpillReg);
561 MRI.reserveReg(*NextSpillReg,
TRI);
562 Spill.Lanes[
I] = *NextSpillReg++;
565 return Spill.FullyAllocated;
570 // Remove dead frame indices from function frame, however keep FP & BP since
571 // spills for them haven't been inserted yet. And also make sure to remove the
572 // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
573 // otherwise, it could result in an unexpected side effect and bug, in case of
574 // any re-mapping of freed frame indices by later pass(es) like "stack slot
578 SGPRSpillsToVirtualVGPRLanes.erase(R.first);
581 // Remove the dead frame indices of CSR SGPRs which are spilled to physical
582 // VGPR lanes during SILowerSGPRSpills pass.
583 if (!ResetSGPRSpillStackIDs) {
586 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
589 bool HaveSGPRToMemory =
false;
591 if (ResetSGPRSpillStackIDs) {
592 // All other SGPRs must be allocated on the default stack, so reset the
599 HaveSGPRToMemory =
true;
605 for (
auto &R : VGPRToAGPRSpills) {
610 return HaveSGPRToMemory;
620 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),
false);
624MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const {
625 assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
626 return AMDGPU::SGPR0 + NumUserSGPRs;
629MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const {
630 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
633void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(
Register Reg) {
637void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(
Register NewReg,
639 VRegFlags.grow(NewReg);
640 VRegFlags[NewReg] = VRegFlags[SrcReg];
646 if (!ST.isAmdPalOS())
648 Register GitPtrLo = AMDGPU::SGPR0;
// Low GIT address passed in
649 if (ST.hasMergedShaders()) {
653 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
654 // ES+GS merged shader on gfx9+.
655 GitPtrLo = AMDGPU::SGPR8;
674static std::optional<yaml::SIArgumentInfo>
679 auto convertArg = [&](std::optional<yaml::SIArgument> &
A,
684 // Create a register or stack argument.
686 if (Arg.isRegister()) {
691 // Check and update the optional mask.
693 SA.
Mask = Arg.getMask();
699 // TODO: Need to serialize kernarg preloads.
714 ArgInfo.PrivateSegmentWaveByteOffset);
805 // Create a diagnostic for a the frame index.
812 SourceRange = YamlMFI.
ScavengeFI->SourceRange;
815 ScavengeFI = *FIOrErr;
817 ScavengeFI = std::nullopt;
823 auto [MinNumAGPR, MaxNumAGPR] =
825 /*OnlyFirstRequired=*/true);
826 return MinNumAGPR != 0u;
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static cl::opt< bool, true > MFMAVGPRFormOpt("amdgpu-mfma-vgpr-form", cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " "unspecified, default to compiler heuristics"), cl::location(SIMachineFunctionInfo::MFMAVGPRForm), cl::init(false), cl::Hidden)
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST)
uint32_t getLDSSize() const
Align DynLDSAlign
Align for dynamic shared memory if any.
uint32_t LDSSize
Number of bytes in the LDS that are being used.
bool isChainFunction() const
uint64_t ExplicitKernArgSize
bool hasInitWholeWave() const
bool isEntryFunction() const
static ClusterDimsAttr get(const Function &F)
Functions, function parameters, and return types can have attributes to indicate how they should be t...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Lightweight error class with error context and mandatory checking.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const SITargetLowering * getTargetLowering() const override
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool hasTailCall() const
Returns true if the function contains a tail call.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
This interface provides simple read-only access to a block of memory, and provides simple methods for...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
Register getLongBranchReservedReg() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
unsigned getMaxWavesPerEU() const
ArrayRef< Register > getSGPRSpillPhysVGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addLDSKernelId()
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
void limitOccupancy(const MachineFunction &MF)
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
void reserveWWMRegister(Register Reg)
static bool isChainScratchRegister(Register VGPR)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a location in source code.
Represents a range in source code.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
unsigned getMainFileID() const
const MemoryBuffer * getMemoryBuffer(unsigned i) const
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
const TargetMachine & getTargetMachine() const
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
unsigned getInitialPSInputAddr(const Function &F)
unsigned getDynamicVGPRBlockSize(const Function &F)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
constexpr unsigned DefaultMemoryClusterDWordsLimit
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
This class should be specialized by any type that needs to be converted to/from a YAML mapping.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
unsigned MaxMemoryClusterDWords
StringValue SGPRForEXECCopy
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
StringValue FrameOffsetReg
StringValue LongBranchReservedReg
uint64_t ExplicitKernArgSize
uint16_t NumWaveDispatchSGPRs
void mappingImpl(yaml::IO &YamlIO) override
unsigned DynamicVGPRBlockSize
StringValue VGPRForAGPRCopy
std::optional< SIArgumentInfo > ArgInfo
SmallVector< StringValue, 2 > SpillPhysVGPRS
std::optional< FrameIndex > ScavengeFI
uint16_t NumWaveDispatchVGPRs
unsigned BytesInStackArgArea
unsigned ScratchReservedForDynamicVGPRs
StringValue ScratchRSrcReg
StringValue StackPtrOffsetReg
A wrapper around std::string which contains a source range that's being set during parsing.