SIFrameLowering.cpp

1//===----------------------- SIFrameLowering.cpp --------------------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//==-----------------------------------------------------------------------===//

8

9#include "SIFrameLowering.h"

10#include "AMDGPU.h"

11#include "AMDGPULaneMaskUtils.h"

12#include "GCNSubtarget.h"

13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

14#include "SIMachineFunctionInfo.h"

15#include "llvm/CodeGen/LiveRegUnits.h"

16#include "llvm/CodeGen/MachineFrameInfo.h"

17#include "llvm/CodeGen/RegisterScavenging.h"

18#include "llvm/Target/TargetMachine.h"

19

20using namespace llvm;

21

22 #define DEBUG_TYPE "frame-info"

23

24 static cl::opt<bool> EnableSpillVGPRToAGPR(

25 "amdgpu-spill-vgpr-to-agpr",

26 cl::desc("Enable spilling VGPRs to AGPRs"),

27 cl::ReallyHidden,

28 cl::init(true));

29

30// Find a register matching \p RC from \p LiveUnits which is unused and

31// available throughout the function. On failure, returns AMDGPU::NoRegister.

32// TODO: Rewrite the loop here to iterate over MCRegUnits instead of

33// MCRegisters. This should reduce the number of iterations and avoid redundant

34// checking.

35 static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,

36 const LiveRegUnits &LiveUnits,

37 const TargetRegisterClass &RC) {

38 for (MCRegister Reg : RC) {

39 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&

40 !MRI.isReserved(Reg))

41 return Reg;

42 }

43 return MCRegister();

44}

45

46// Find a scratch register that we can use in the prologue. We avoid using

47// callee-save registers since they may appear to be free when this is called

48// from canUseAsPrologue (during shrink wrapping), but then no longer be free

49// when this is called from emitPrologue.

50 static MCRegister findScratchNonCalleeSaveRegister(

51 MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,

52 const TargetRegisterClass &RC, bool Unused = false) {

53 // Mark callee saved registers as used so we will not choose them.

54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

55 for (unsigned i = 0; CSRegs[i]; ++i)

56 LiveUnits.addReg(CSRegs[i]);

57

58 // We are looking for a register that can be used throughout the entire

59 // function, so any use is unacceptable.

60 if (Unused)

61 return findUnusedRegister(MRI, LiveUnits, RC);

62

63 for (MCRegister Reg : RC) {

64 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))

65 return Reg;

66 }

67

68 return MCRegister();

69}

70

71/// Query target location for spilling SGPRs

72/// \p IncludeScratchCopy : Also look for free scratch SGPRs

73 static void getVGPRSpillLaneOrTempRegister(

74 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,

75 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,

76 bool IncludeScratchCopy = true) {

77 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

78 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

79

80 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

81 const SIRegisterInfo *TRI = ST.getRegisterInfo();

82 unsigned Size = TRI->getSpillSize(RC);

83 Align Alignment = TRI->getSpillAlign(RC);

84

85 // We need to save and restore the given SGPR.

86

87 Register ScratchSGPR;

88 // 1: Try to save the given register into an unused scratch SGPR. The

89 // LiveUnits should have all the callee saved registers marked as used. For

90 // certain cases we skip copy to scratch SGPR.

91 if (IncludeScratchCopy)

92 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);

93

94 if (!ScratchSGPR) {

95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,

96 TargetStackID::SGPRSpill);

97

98 if (TRI->spillSGPRToVGPR() &&

99 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,

100 /*IsPrologEpilog=*/true)) {

101 // 2: There's no free lane to spill, and no free register to save the

102 // SGPR, so we're forced to take another VGPR to use for the spill.

103 MFI->addToPrologEpilogSGPRSpills(

104 SGPR, PrologEpilogSGPRSaveRestoreInfo(

105 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));

106

107 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();

108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "

109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane

110 << '\n';);

111 } else {

112 // Remove dead <FI> index

113 MF.getFrameInfo().RemoveStackObject(FI);

114 // 3: If all else fails, spill the register to memory.

115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);

116 MFI->addToPrologEpilogSGPRSpills(

117 SGPR,

118 PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));

119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "

120 << printReg(SGPR, TRI) << '\n');

121 }

122 } else {

123 MFI->addToPrologEpilogSGPRSpills(

124 SGPR, PrologEpilogSGPRSaveRestoreInfo(

125 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));

126 LiveUnits.addReg(ScratchSGPR);

127 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "

128 << printReg(ScratchSGPR, TRI) << '\n');

129 }

130}

131

132// We need to specially emit stack operations here because a different frame

133// register is used than in the rest of the function, as getFrameRegister would

134// use.

135 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,

136 const SIMachineFunctionInfo &FuncInfo,

137 LiveRegUnits &LiveUnits, MachineFunction &MF,

138 MachineBasicBlock &MBB,

139 MachineBasicBlock::iterator I, const DebugLoc &DL,

140 Register SpillReg, int FI, Register FrameReg,

141 int64_t DwordOff = 0) {

142 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;

144

145 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

146 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

147 MachineMemOperand *MMO = MF.getMachineMemOperand(

148 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),

149 FrameInfo.getObjectAlign(FI));

150 LiveUnits.addReg(SpillReg);

151 bool IsKill = !MBB.isLiveIn(SpillReg);

152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,

153 DwordOff, MMO, nullptr, &LiveUnits);

154 if (IsKill)

155 LiveUnits.removeReg(SpillReg);

156}

157

158 static void buildEpilogRestore(const GCNSubtarget &ST,

159 const SIRegisterInfo &TRI,

160 const SIMachineFunctionInfo &FuncInfo,

161 LiveRegUnits &LiveUnits, MachineFunction &MF,

162 MachineBasicBlock &MBB,

163 MachineBasicBlock::iterator I,

164 const DebugLoc &DL, Register SpillReg, int FI,

165 Register FrameReg, int64_t DwordOff = 0) {

166 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;

168

169 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

170 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

171 MachineMemOperand *MMO = MF.getMachineMemOperand(

172 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),

173 FrameInfo.getObjectAlign(FI));

174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,

175 DwordOff, MMO, nullptr, &LiveUnits);

176}

177

178 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

179 const DebugLoc &DL, const SIInstrInfo *TII,

180 Register TargetReg) {

181 MachineFunction *MF = MBB.getParent();

182 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

183 const SIRegisterInfo *TRI = &TII->getRegisterInfo();

184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);

186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);

187

188 if (MFI->getGITPtrHigh() != 0xffffffff) {

189 BuildMI(MBB, I, DL, SMovB32, TargetHi)

190 .addImm(MFI->getGITPtrHigh())

191 .addReg(TargetReg, RegState::ImplicitDefine);

192 } else {

193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);

194 BuildMI(MBB, I, DL, GetPC64, TargetReg);

195 }

196 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);

197 MF->getRegInfo().addLiveIn(GitPtrLo);

198 MBB.addLiveIn(GitPtrLo);

199 BuildMI(MBB, I, DL, SMovB32, TargetLo)

200 .addReg(GitPtrLo);

201}

202

203 static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,

204 const SIMachineFunctionInfo *FuncInfo,

205 MachineFunction &MF, MachineBasicBlock &MBB,

206 MachineBasicBlock::iterator MBBI, bool IsProlog) {

207 if (LiveUnits.empty()) {

208 LiveUnits.init(TRI);

209 if (IsProlog) {

210 LiveUnits.addLiveIns(MBB);

211 } else {

212 // In epilog.

213 LiveUnits.addLiveOuts(MBB);

214 LiveUnits.stepBackward(*MBBI);

215 }

216 }

217}

218

219namespace llvm {

220

221// SpillBuilder to save/restore special SGPR spills like the one needed for FP,

222// BP, etc. These spills are delayed until the current function's frame is

223// finalized. For a given register, the builder uses the

224// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.

225 class PrologEpilogSGPRSpillBuilder {

226 MachineBasicBlock::iterator MI;

227 MachineBasicBlock &MBB;

228 MachineFunction &MF;

229 const GCNSubtarget &ST;

230 MachineFrameInfo &MFI;

231 SIMachineFunctionInfo *FuncInfo;

232 const SIInstrInfo *TII;

233 const SIRegisterInfo &TRI;

234 Register SuperReg;

235 const PrologEpilogSGPRSaveRestoreInfo SI;

236 LiveRegUnits &LiveUnits;

237 const DebugLoc &DL;

238 Register FrameReg;

239 ArrayRef<int16_t> SplitParts;

240 unsigned NumSubRegs;

241 unsigned EltSize = 4;

242

243 void saveToMemory(const int FI) const {

244 MachineRegisterInfo &MRI = MF.getRegInfo();

245 assert(!MFI.isDeadObjectIndex(FI));

246

247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);

248

249 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

251 if (!TmpVGPR)

252 report_fatal_error("failed to find free scratch register");

253

254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

255 Register SubReg = NumSubRegs == 1

256 ? SuperReg

257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)

259 .addReg(SubReg);

260

261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,

262 FI, FrameReg, DwordOff);

263 DwordOff += 4;

264 }

265 }

266

267 void saveToVGPRLane(const int FI) const {

268 assert(!MFI.isDeadObjectIndex(FI));

269

270 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

271 ArrayRef<SIRegisterInfo::SpilledReg> Spill =

272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

273 assert(Spill.size() == NumSubRegs);

274

275 for (unsigned I = 0; I < NumSubRegs; ++I) {

276 Register SubReg = NumSubRegs == 1

277 ? SuperReg

278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),

280 Spill[I].VGPR)

281 .addReg(SubReg)

282 .addImm(Spill[I].Lane)

283 .addReg(Spill[I].VGPR, RegState::Undef);

284 }

285 }

286

287 void copyToScratchSGPR(Register DstReg) const {

288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)

289 .addReg(SuperReg)

290 .setMIFlag(MachineInstr::FrameSetup);

291 }

292

293 void restoreFromMemory(const int FI) {

294 MachineRegisterInfo &MRI = MF.getRegInfo();

295

296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);

297 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

299 if (!TmpVGPR)

300 report_fatal_error("failed to find free scratch register");

301

302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

303 Register SubReg = NumSubRegs == 1

304 ? SuperReg

305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

306

307 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,

308 TmpVGPR, FI, FrameReg, DwordOff);

309 assert(SubReg.isPhysical());

310

311 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)

312 .addReg(TmpVGPR, RegState::Kill);

313 DwordOff += 4;

314 }

315 }

316

317 void restoreFromVGPRLane(const int FI) {

318 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

319 ArrayRef<SIRegisterInfo::SpilledReg> Spill =

320 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

321 assert(Spill.size() == NumSubRegs);

322

323 for (unsigned I = 0; I < NumSubRegs; ++I) {

324 Register SubReg = NumSubRegs == 1

325 ? SuperReg

326 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

327 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

328 .addReg(Spill[I].VGPR)

329 .addImm(Spill[I].Lane);

330 }

331 }

332

333 void copyFromScratchSGPR(Register SrcReg) const {

334 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)

335 .addReg(SrcReg)

336 .setMIFlag(MachineInstr::FrameDestroy);

337 }

338

339public:

340 PrologEpilogSGPRSpillBuilder(Register Reg,

341 const PrologEpilogSGPRSaveRestoreInfo SI,

342 MachineBasicBlock &MBB,

343 MachineBasicBlock::iterator MI,

344 const DebugLoc &DL, const SIInstrInfo *TII,

345 const SIRegisterInfo &TRI,

346 LiveRegUnits &LiveUnits, Register FrameReg)

347 : MI(MI), MBB(MBB), MF(*MBB.getParent()),

348 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),

349 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),

350 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),

351 FrameReg(FrameReg) {

352 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);

353 SplitParts = TRI.getRegSplitParts(RC, EltSize);

354 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();

355

356 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

357 }

358

359 void save() {

360 switch (SI.getKind()) {

361 case SGPRSaveKind::SPILL_TO_MEM:

362 return saveToMemory(SI.getIndex());

363 case SGPRSaveKind::SPILL_TO_VGPR_LANE:

364 return saveToVGPRLane(SI.getIndex());

365 case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

366 return copyToScratchSGPR(SI.getReg());

367 }

368 }

369

370 void restore() {

371 switch (SI.getKind()) {

372 case SGPRSaveKind::SPILL_TO_MEM:

373 return restoreFromMemory(SI.getIndex());

374 case SGPRSaveKind::SPILL_TO_VGPR_LANE:

375 return restoreFromVGPRLane(SI.getIndex());

376 case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

377 return copyFromScratchSGPR(SI.getReg());

378 }

379 }

380};

381

382} // namespace llvm

383

384// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`

385void SIFrameLowering::emitEntryFunctionFlatScratchInit(

386 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

387 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {

388 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

389 const SIInstrInfo *TII = ST.getInstrInfo();

390 const SIRegisterInfo *TRI = &TII->getRegisterInfo();

391 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

392

393 // We don't need this if we only have spills since there is no user facing

394 // scratch.

395

396 // TODO: If we know we don't have flat instructions earlier, we can omit

397 // this from the input registers.

398 //

399 // TODO: We only need to know if we access scratch space through a flat

400 // pointer. Because we only detect if flat instructions are used at all,

401 // this will be used more often than necessary on VI.

402

403 Register FlatScrInitLo;

404 Register FlatScrInitHi;

405

406 if (ST.isAmdPalOS()) {

407 // Extract the scratch offset from the descriptor in the GIT

408 LiveRegUnits LiveUnits;

409 LiveUnits.init(*TRI);

410 LiveUnits.addLiveIns(MBB);

411

412 // Find unused reg to load flat scratch init into

413 MachineRegisterInfo &MRI = MF.getRegInfo();

414 Register FlatScrInit = AMDGPU::NoRegister;

415 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);

416 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;

417 AllSGPR64s = AllSGPR64s.slice(

418 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));

419 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

420 for (MCPhysReg Reg : AllSGPR64s) {

421 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&

422 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {

423 FlatScrInit = Reg;

424 break;

425 }

426 }

427 assert(FlatScrInit && "Failed to find free register for scratch init");

428

429 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);

430 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);

431

432 buildGitPtr(MBB, I, DL, TII, FlatScrInit);

433

434 // We now have the GIT ptr - now get the scratch descriptor from the entry

435 // at offset 0 (or offset 16 for a compute shader).

436 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

437 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

438 auto *MMO = MF.getMachineMemOperand(

439 PtrInfo,

440 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

441 MachineMemOperand::MODereferenceable,

442 8, Align(4));

443 unsigned Offset =

444 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

445 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

446 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

447 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)

448 .addReg(FlatScrInit)

449 .addImm(EncodedOffset) // offset

450 .addImm(0) // cpol

451 .addMemOperand(MMO);

452

453 // Mask the offset in [47:0] of the descriptor

454 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);

455 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)

456 .addReg(FlatScrInitHi)

457 .addImm(0xffff);

458 And->getOperand(3).setIsDead(); // Mark SCC as dead.

459 } else {

460 Register FlatScratchInitReg =

461 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);

462 assert(FlatScratchInitReg);

463

464 MachineRegisterInfo &MRI = MF.getRegInfo();

465 MRI.addLiveIn(FlatScratchInitReg);

466 MBB.addLiveIn(FlatScratchInitReg);

467

468 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);

469 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

470 }

471

472 // Do a 64-bit pointer add.

473 if (ST.flatScratchIsPointer()) {

474 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {

475 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)

476 .addReg(FlatScrInitLo)

477 .addReg(ScratchWaveOffsetReg);

478 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

479 FlatScrInitHi)

480 .addReg(FlatScrInitHi)

481 .addImm(0);

482 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.

483

484 using namespace AMDGPU::Hwreg;

485 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

486 .addReg(FlatScrInitLo)

487 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));

488 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

489 .addReg(FlatScrInitHi)

490 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));

491 return;

492 }

493

494 // For GFX9.

495 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)

496 .addReg(FlatScrInitLo)

497 .addReg(ScratchWaveOffsetReg);

498 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

499 AMDGPU::FLAT_SCR_HI)

500 .addReg(FlatScrInitHi)

501 .addImm(0);

502 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.

503

504 return;

505 }

506

507 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);

508

509 // Copy the size in bytes.

510 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)

511 .addReg(FlatScrInitHi, RegState::Kill);

512

513 // Add wave offset in bytes to private base offset.

514 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.

515 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)

516 .addReg(FlatScrInitLo)

517 .addReg(ScratchWaveOffsetReg);

518

519 // Convert offset to 256-byte units.

520 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),

521 AMDGPU::FLAT_SCR_HI)

522 .addReg(FlatScrInitLo, RegState::Kill)

523 .addImm(8);

524 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.

525}

526

527// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not

528// memory. They should have been removed by now.

529 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {

530 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

531 I != E; ++I) {

532 if (!MFI.isDeadObjectIndex(I))

533 return false;

534 }

535

536 return true;

537}

538

539// Shift down registers reserved for the scratch RSRC.

540Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

541 MachineFunction &MF) const {

542

543 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

544 const SIInstrInfo *TII = ST.getInstrInfo();

545 const SIRegisterInfo *TRI = &TII->getRegisterInfo();

546 MachineRegisterInfo &MRI = MF.getRegInfo();

547 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

548

549 assert(MFI->isEntryFunction());

550

551 Register ScratchRsrcReg = MFI->getScratchRSrcReg();

552

553 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&

554 allStackObjectsAreDead(MF.getFrameInfo())))

555 return Register();

556

557 if (ST.hasSGPRInitBug() ||

558 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))

559 return ScratchRsrcReg;

560

561 // We reserved the last registers for this. Shift it down to the end of those

562 // which were actually used.

563 //

564 // FIXME: It might be safer to use a pseudoregister before replacement.

565

566 // FIXME: We should be able to eliminate unused input registers. We only

567 // cannot do this for the resources required for scratch access. For now we

568 // skip over user SGPRs and may leave unused holes.

569

570 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;

571 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);

572 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));

573

574 // Skip the last N reserved elements because they should have already been

575 // reserved for VCC etc.

576 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

577 for (MCPhysReg Reg : AllSGPR128s) {

578 // Pick the first unallocated one. Make sure we don't clobber the other

579 // reserved input we needed. Also for PAL, make sure we don't clobber

580 // the GIT pointer passed in SGPR0 or SGPR8.

581 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

582 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {

583 MRI.replaceRegWith(ScratchRsrcReg, Reg);

584 MFI->setScratchRSrcReg(Reg);

585 MRI.reserveReg(Reg, TRI);

586 return Reg;

587 }

588 }

589

590 return ScratchRsrcReg;

591}

592

593 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {

594 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();

595}

596

597 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,

598 MachineBasicBlock &MBB) const {

599 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");

600

601 // FIXME: If we only have SGPR spills, we won't actually be using scratch

602 // memory since these spill to VGPRs. We should be cleaning up these unused

603 // SGPR spill frame indices somewhere.

604

605 // FIXME: We still have implicit uses on SGPR spill instructions in case they

606 // need to spill to vector memory. It's likely that will not happen, but at

607 // this point it appears we need the setup. This part of the prolog should be

608 // emitted after frame indices are eliminated.

609

610 // FIXME: Remove all of the isPhysRegUsed checks

611

612 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

613 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

614 const SIInstrInfo *TII = ST.getInstrInfo();

615 const SIRegisterInfo *TRI = &TII->getRegisterInfo();

616 MachineRegisterInfo &MRI = MF.getRegInfo();

617 const Function &F = MF.getFunction();

618 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

619

620 assert(MFI->isEntryFunction());

621

622 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(

623 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);

624

625 // We need to do the replacement of the private segment buffer register even

626 // if there are no stack objects. There could be stores to undef or a

627 // constant without an associated object.

628 //

629 // This will return `Register()` in cases where there are no actual

630 // uses of the SRSRC.

631 Register ScratchRsrcReg;

632 if (!ST.enableFlatScratch())

633 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);

634

635 // Make the selected register live throughout the function.

636 if (ScratchRsrcReg) {

637 for (MachineBasicBlock &OtherBB : MF) {

638 if (&OtherBB != &MBB) {

639 OtherBB.addLiveIn(ScratchRsrcReg);

640 }

641 }

642 }

643

644 // Now that we have fixed the reserved SRSRC we need to locate the

645 // (potentially) preloaded SRSRC.

646 Register PreloadedScratchRsrcReg;

647 if (ST.isAmdHsaOrMesa(F)) {

648 PreloadedScratchRsrcReg =

649 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);

650 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {

651 // We added live-ins during argument lowering, but since they were not

652 // used they were deleted. We're adding the uses now, so add them back.

653 MRI.addLiveIn(PreloadedScratchRsrcReg);

654 MBB.addLiveIn(PreloadedScratchRsrcReg);

655 }

656 }

657

658 // Debug location must be unknown since the first debug location is used to

659 // determine the end of the prologue.

660 DebugLoc DL;

661 MachineBasicBlock::iterator I = MBB.begin();

662

663 // We found the SRSRC first because it needs four registers and has an

664 // alignment requirement. If the SRSRC that we found is clobbering with

665 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR

666 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch

667 // wave offset to a free SGPR.

668 Register ScratchWaveOffsetReg;

669 if (PreloadedScratchWaveOffsetReg &&

670 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {

671 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);

672 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();

673 AllSGPRs = AllSGPRs.slice(

674 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));

675 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

676 for (MCPhysReg Reg : AllSGPRs) {

677 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

678 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {

679 ScratchWaveOffsetReg = Reg;

680 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)

681 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);

682 break;

683 }

684 }

685

686 // FIXME: We can spill incoming arguments and restore at the end of the

687 // prolog.

688 if (!ScratchWaveOffsetReg)

689 report_fatal_error(

690 "could not find temporary scratch offset register in prolog");

691 } else {

692 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;

693 }

694 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);

695

696 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);

697 if (!mayReserveScratchForCWSR(MF)) {

698 if (hasFP(MF)) {

699 Register FPReg = MFI->getFrameOffsetReg();

700 assert(FPReg != AMDGPU::FP_REG);

701 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);

702 }

703

704 if (requiresStackPointerReference(MF)) {

705 Register SPReg = MFI->getStackPtrOffsetReg();

706 assert(SPReg != AMDGPU::SP_REG);

707 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

708 }

709 } else {

710 // We need to check if we're on a compute queue - if we are, then the CWSR

711 // trap handler may need to store some VGPRs on the stack. The first VGPR

712 // block is saved separately, so we only need to allocate space for any

713 // additional VGPR blocks used. For now, we will make sure there's enough

714 // room for the theoretical maximum number of VGPRs that can be allocated.

715 // FIXME: Figure out if the shader uses fewer VGPRs in practice.

716 assert(hasFP(MF));

717 Register FPReg = MFI->getFrameOffsetReg();

718 assert(FPReg != AMDGPU::FP_REG);

719 unsigned VGPRSize = llvm::alignTo(

720 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -

721 AMDGPU::IsaInfo::getVGPRAllocGranule(&ST,

722 MFI->getDynamicVGPRBlockSize())) *

723 4,

724 FrameInfo.getMaxAlign());

725 MFI->setScratchReservedForDynamicVGPRs(VGPRSize);

726

727 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)

728 .addImm(AMDGPU::Hwreg::HwregEncoding::encode(

729 AMDGPU::Hwreg::ID_HW_ID2, AMDGPU::Hwreg::OFFSET_ME_ID, 2));

730 // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute

731 // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set

732 // SCC, so we need to check for 0 manually.

733 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);

734 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);

735 if (requiresStackPointerReference(MF)) {

736 Register SPReg = MFI->getStackPtrOffsetReg();

737 assert(SPReg != AMDGPU::SP_REG);

738

739 // If at least one of the constants can be inlined, then we can use

740 // s_cselect. Otherwise, use a mov and cmovk.

741 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||

742 AMDGPU::isInlinableLiteral32(Offset + VGPRSize,

743 ST.hasInv2PiInlineImm())) {

744 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)

745 .addImm(Offset + VGPRSize)

746 .addImm(Offset);

747 } else {

748 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

749 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)

750 .addImm(Offset + VGPRSize);

751 }

752 }

753 }

754

755 bool NeedsFlatScratchInit =

756 MFI->getUserSGPRInfo().hasFlatScratchInit() &&

757 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

758 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));

759

760 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&

761 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {

762 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);

763 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);

764 }

765

766 if (NeedsFlatScratchInit) {

767 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);

768 }

769

770 if (ScratchRsrcReg) {

771 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,

772 PreloadedScratchRsrcReg,

773 ScratchRsrcReg, ScratchWaveOffsetReg);

774 }

775}

776

777// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`

778void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(

779 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

780 const DebugLoc &DL, Register PreloadedScratchRsrcReg,

781 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {

782

783 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

784 const SIInstrInfo *TII = ST.getInstrInfo();

785 const SIRegisterInfo *TRI = &TII->getRegisterInfo();

786 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

787 const Function &Fn = MF.getFunction();

788

789 if (ST.isAmdPalOS()) {

790 // The pointer to the GIT is formed from the offset passed in and either

791 // the amdgpu-git-ptr-high function attribute or the top part of the PC

792 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

793 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

794

795 buildGitPtr(MBB, I, DL, TII, Rsrc01);

796

797 // We now have the GIT ptr - now get the scratch descriptor from the entry

798 // at offset 0 (or offset 16 for a compute shader).

799 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

800 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);

801 auto *MMO = MF.getMachineMemOperand(

802 PtrInfo,

803 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

804 MachineMemOperand::MODereferenceable,

805 16, Align(4));

806 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

807 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

808 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

809 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)

810 .addReg(Rsrc01)

811 .addImm(EncodedOffset) // offset

812 .addImm(0) // cpol

813 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)

814 .addMemOperand(MMO);

815

816 // The driver will always set the SRD for wave 64 (bits 118:117 of

817 // descriptor / bits 22:21 of third sub-reg will be 0b11)

818 // If the shader is actually wave32 we have to modify the const_index_stride

819 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The

820 // reason the driver does this is that there can be cases where it presents

821 // 2 shaders with different wave size (e.g. VsFs).

822 // TODO: convert to using SCRATCH instructions or multiple SRD buffers

823 if (ST.isWave32()) {

824 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);

825 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)

826 .addImm(21)

827 .addReg(Rsrc03);

828 }

829 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {

830 assert(!ST.isAmdHsaOrMesa(Fn));

831 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

832

833 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

834 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

835

836 // Use relocations to get the pointer, and setup the other bits manually.

837 uint64_t Rsrc23 = TII->getScratchRsrcWords23();

838

839 if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {

840 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

841

842 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {

843 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);

844

845 BuildMI(MBB, I, DL, Mov64, Rsrc01)

846 .addReg(MFI->getImplicitBufferPtrUserSGPR())

847 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

848 } else {

849 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

850

851 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

852 auto *MMO = MF.getMachineMemOperand(

853 PtrInfo,

854 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

855 MachineMemOperand::MODereferenceable,

856 8, Align(4));

857 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)

858 .addReg(MFI->getImplicitBufferPtrUserSGPR())

859 .addImm(0) // offset

860 .addImm(0) // cpol

861 .addMemOperand(MMO)

862 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

863

864 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

865 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

866 }

867 } else {

868 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

869 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

870

871 BuildMI(MBB, I, DL, SMovB32, Rsrc0)

872 .addExternalSymbol("SCRATCH_RSRC_DWORD0")

873 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

874

875 BuildMI(MBB, I, DL, SMovB32, Rsrc1)

876 .addExternalSymbol("SCRATCH_RSRC_DWORD1")

877 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

878 }

879

880 BuildMI(MBB, I, DL, SMovB32, Rsrc2)

881 .addImm(Lo_32(Rsrc23))

882 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

883

884 BuildMI(MBB, I, DL, SMovB32, Rsrc3)

885 .addImm(Hi_32(Rsrc23))

886 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

887 } else if (ST.isAmdHsaOrMesa(Fn)) {

888 assert(PreloadedScratchRsrcReg);

889

890 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

891 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)

892 .addReg(PreloadedScratchRsrcReg, RegState::Kill);

893 }

894 }

895

896 // Add the scratch wave offset into the scratch RSRC.

897 //

898 // We only want to update the first 48 bits, which is the base address

899 // pointer, without touching the adjacent 16 bits of flags. We know this add

900 // cannot carry-out from bit 47, otherwise the scratch allocation would be

901 // impossible to fit in the 48-bit global address space.

902 //

903 // TODO: Evaluate if it is better to just construct an SRD using the flat

904 // scratch init and some constants rather than update the one we are passed.

905 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

906 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

907

908 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in

909 // the kernel body via inreg arguments.

910 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)

911 .addReg(ScratchRsrcSub0)

912 .addReg(ScratchWaveOffsetReg)

913 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

914 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)

915 .addReg(ScratchRsrcSub1)

916 .addImm(0)

917 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

918 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.

919}

920

921 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {

922 switch (ID) {

923 case TargetStackID::Default:

924 case TargetStackID::NoAlloc:

925 case TargetStackID::SGPRSpill:

926 return true;

927 case TargetStackID::ScalableVector:

928 case TargetStackID::ScalablePredicateVector:

929 case TargetStackID::WasmLocal:

930 return false;

931 }

932 llvm_unreachable("Invalid TargetStackID::Value");

933}

934

935// Activate only the inactive lanes when \p EnableInactiveLanes is true.

936// Otherwise, activate all lanes. It returns the saved exec.

937 static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,

938 MachineFunction &MF,

939 MachineBasicBlock &MBB,

940 MachineBasicBlock::iterator MBBI,

941 const DebugLoc &DL, bool IsProlog,

942 bool EnableInactiveLanes) {

943 Register ScratchExecCopy;

944 MachineRegisterInfo &MRI = MF.getRegInfo();

945 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

946 const SIInstrInfo *TII = ST.getInstrInfo();

947 const SIRegisterInfo &TRI = TII->getRegisterInfo();

948 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

949

950 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);

951

952 if (FuncInfo->isWholeWaveFunction()) {

953 // Whole wave functions already have a copy of the original EXEC mask that

954 // we can use.

955 assert(IsProlog && "Epilog should look at return, not setup");

956 ScratchExecCopy =

957 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();

958 assert(ScratchExecCopy && "Couldn't find copy of EXEC");

959 } else {

960 ScratchExecCopy = findScratchNonCalleeSaveRegister(

961 MRI, LiveUnits, *TRI.getWaveMaskRegClass());

962 }

963

964 if (!ScratchExecCopy)

965 report_fatal_error("failed to find free scratch register");

966

967 LiveUnits.addReg(ScratchExecCopy);

968

969 const unsigned SaveExecOpc =

970 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32

971 : AMDGPU::S_OR_SAVEEXEC_B32)

972 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64

973 : AMDGPU::S_OR_SAVEEXEC_B64);

974 auto SaveExec =

975 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);

976 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.

977

978 return ScratchExecCopy;

979}

980

981 void SIFrameLowering::emitCSRSpillStores(

982 MachineFunction &MF, MachineBasicBlock &MBB,

983 MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,

984 Register FrameReg, Register FramePtrRegScratchCopy) const {

985 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

986 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

987 const SIInstrInfo *TII = ST.getInstrInfo();

988 const SIRegisterInfo &TRI = TII->getRegisterInfo();

989 MachineRegisterInfo &MRI = MF.getRegInfo();

990 const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);

991

992 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch

993 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we

994 // might end up flipping the EXEC bits twice.

995 Register ScratchExecCopy;

996 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

997 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

998 if (!WWMScratchRegs.empty())

999 ScratchExecCopy =

1000 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

1001 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);

1002

1003 auto StoreWWMRegisters =

1004 [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

1005 for (const auto &Reg : WWMRegs) {

1006 Register VGPR = Reg.first;

1007 int FI = Reg.second;

1008 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

1009 VGPR, FI, FrameReg);

1010 }

1011 };

1012

1013 for (const Register Reg : make_first_range(WWMScratchRegs)) {

1014 if (!MRI.isReserved(Reg)) {

1015 MRI.addLiveIn(Reg);

1016 MBB.addLiveIn(Reg);

1017 }

1018 }

1019 StoreWWMRegisters(WWMScratchRegs);

1020

1021 auto EnableAllLanes = [&]() {

1022 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);

1023 };

1024

1025 if (!WWMCalleeSavedRegs.empty()) {

1026 if (ScratchExecCopy) {

1027 EnableAllLanes();

1028 } else {

1029 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

1030 /*IsProlog*/ true,

1031 /*EnableInactiveLanes*/ false);

1032 }

1033 }

1034

1035 StoreWWMRegisters(WWMCalleeSavedRegs);

1036 if (FuncInfo->isWholeWaveFunction()) {

1037 // If we have already saved some WWM CSR registers, then the EXEC is already

1038 // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.

1039 if (!ScratchExecCopy)

1040 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,

1041 /*EnableInactiveLanes*/ true);

1042 else if (WWMCalleeSavedRegs.empty())

1043 EnableAllLanes();

1044 } else if (ScratchExecCopy) {

1045 // FIXME: Split block and make terminator.

1046 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)

1047 .addReg(ScratchExecCopy, RegState::Kill);

1048 LiveUnits.addReg(ScratchExecCopy);

1049 }

1050

1051 Register FramePtrReg = FuncInfo->getFrameOffsetReg();

1052

1053 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

1054 // Special handle FP spill:

1055 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.

1056 // Otherwise, FP has been moved to a temporary register and spill it

1057 // instead.

1058 Register Reg =

1059 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

1060 if (!Reg)

1061 continue;

1062

1063 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

1064 LiveUnits, FrameReg);

1065 SB.save();

1066 }

1067

1068 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make

1069 // such scratch registers live throughout the function.

1070 SmallVector<Register, 1> ScratchSGPRs;

1071 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);

1072 if (!ScratchSGPRs.empty()) {

1073 for (MachineBasicBlock &MBB : MF) {

1074 for (MCPhysReg Reg : ScratchSGPRs)

1075 MBB.addLiveIn(Reg);

1076

1077 MBB.sortUniqueLiveIns();

1078 }

1079 if (!LiveUnits.empty()) {

1080 for (MCPhysReg Reg : ScratchSGPRs)

1081 LiveUnits.addReg(Reg);

1082 }

1083 }

1084}

1085

1086 void SIFrameLowering::emitCSRSpillRestores(

1087 MachineFunction &MF, MachineBasicBlock &MBB,

1088 MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,

1089 Register FrameReg, Register FramePtrRegScratchCopy) const {

1090 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1091 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1092 const SIInstrInfo *TII = ST.getInstrInfo();

1093 const SIRegisterInfo &TRI = TII->getRegisterInfo();

1094 const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);

1095 Register FramePtrReg = FuncInfo->getFrameOffsetReg();

1096

1097 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

1098 // Special handle FP restore:

1099 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore

1100 // the FP value to a temporary register. The frame pointer should be

1101 // overwritten only at the end when all other spills are restored from

1102 // current frame.

1103 Register Reg =

1104 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

1105 if (!Reg)

1106 continue;

1107

1108 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

1109 LiveUnits, FrameReg);

1110 SB.restore();

1111 }

1112

1113 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the

1114 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to

1115 // this, we might end up flipping the EXEC bits twice.

1116 Register ScratchExecCopy;

1117 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

1118 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

1119 auto RestoreWWMRegisters =

1120 [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

1121 for (const auto &Reg : WWMRegs) {

1122 Register VGPR = Reg.first;

1123 int FI = Reg.second;

1124 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

1125 VGPR, FI, FrameReg);

1126 }

1127 };

1128

1129 if (FuncInfo->isWholeWaveFunction()) {

1130 // For whole wave functions, the EXEC is already -1 at this point.

1131 // Therefore, we can restore the CSR WWM registers right away.

1132 RestoreWWMRegisters(WWMCalleeSavedRegs);

1133

1134 // The original EXEC is the first operand of the return instruction.

1135 MachineInstr &Return = MBB.instr_back();

1136 unsigned Opcode = Return.getOpcode();

1137 switch (Opcode) {

1138 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:

1139 Opcode = AMDGPU::SI_RETURN;

1140 break;

1141 case AMDGPU::SI_TCRETURN_GFX_WholeWave:

1142 Opcode = AMDGPU::SI_TCRETURN_GFX;

1143 break;

1144 default:

1145 llvm_unreachable("Unexpected return inst");

1146 }

1147 Register OrigExec = Return.getOperand(0).getReg();

1148

1149 if (!WWMScratchRegs.empty()) {

1150 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)

1151 .addReg(OrigExec)

1152 .addImm(-1);

1153 RestoreWWMRegisters(WWMScratchRegs);

1154 }

1155

1156 // Restore original EXEC.

1157 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);

1158

1159 // Drop the first operand and update the opcode.

1160 Return.removeOperand(0);

1161 Return.setDesc(TII->get(Opcode));

1162

1163 return;

1164 }

1165

1166 if (!WWMScratchRegs.empty()) {

1167 ScratchExecCopy =

1168 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

1169 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);

1170 }

1171 RestoreWWMRegisters(WWMScratchRegs);

1172 if (!WWMCalleeSavedRegs.empty()) {

1173 if (ScratchExecCopy) {

1174 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);

1175 } else {

1176 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

1177 /*IsProlog*/ false,

1178 /*EnableInactiveLanes*/ false);

1179 }

1180 }

1181

1182 RestoreWWMRegisters(WWMCalleeSavedRegs);

1183 if (ScratchExecCopy) {

1184 // FIXME: Split block and make terminator.

1185 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)

1186 .addReg(ScratchExecCopy, RegState::Kill);

1187 }

1188}

1189

1190 void SIFrameLowering::emitPrologue(MachineFunction &MF,

1191 MachineBasicBlock &MBB) const {

1192 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1193 if (FuncInfo->isEntryFunction()) {

1194 emitEntryFunctionPrologue(MF, MBB);

1195 return;

1196 }

1197

1198 MachineFrameInfo &MFI = MF.getFrameInfo();

1199 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1200 const SIInstrInfo *TII = ST.getInstrInfo();

1201 const SIRegisterInfo &TRI = TII->getRegisterInfo();

1202 MachineRegisterInfo &MRI = MF.getRegInfo();

1203

1204 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

1205 Register FramePtrReg = FuncInfo->getFrameOffsetReg();

1206 Register BasePtrReg =

1207 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();

1208 LiveRegUnits LiveUnits;

1209

1210 MachineBasicBlock::iterator MBBI = MBB.begin();

1211 // DebugLoc must be unknown since the first instruction with DebugLoc is used

1212 // to determine the end of the prologue.

1213 DebugLoc DL;

1214

1215 if (FuncInfo->isChainFunction()) {

1216 // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but

1217 // are free to set one up if they need it.

1218 bool UseSP = requiresStackPointerReference(MF);

1219 if (UseSP) {

1220 assert(StackPtrReg != AMDGPU::SP_REG);

1221

1222 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)

1223 .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));

1224 }

1225 }

1226

1227 bool HasFP = false;

1228 bool HasBP = false;

1229 uint32_t NumBytes = MFI.getStackSize();

1230 uint32_t RoundedSize = NumBytes;

1231

1232 if (TRI.hasStackRealignment(MF))

1233 HasFP = true;

1234

1235 Register FramePtrRegScratchCopy;

1236 if (!HasFP && !hasFP(MF)) {

1237 // Emit the CSR spill stores with SP base register.

1238 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,

1239 FuncInfo->isChainFunction() ? Register() : StackPtrReg,

1240 FramePtrRegScratchCopy);

1241 } else {

1242 // CSR spill stores will use FP as base register.

1243 Register SGPRForFPSaveRestoreCopy =

1244 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

1245

1246 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);

1247 if (SGPRForFPSaveRestoreCopy) {

1248 // Copy FP to the scratch register now and emit the CFI entry. It avoids

1249 // the extra FP copy needed in the other two cases when FP is spilled to

1250 // memory or to a VGPR lane.

1251 PrologEpilogSGPRSpillBuilder SB(

1252 FramePtrReg,

1253 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,

1254 DL, TII, TRI, LiveUnits, FramePtrReg);

1255 SB.save();

1256 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1257 } else {

1258 // Copy FP into a new scratch register so that its previous value can be

1259 // spilled after setting up the new frame.

1260 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

1261 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1262 if (!FramePtrRegScratchCopy)

1263 report_fatal_error("failed to find free scratch register");

1264

1265 LiveUnits.addReg(FramePtrRegScratchCopy);

1266 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)

1267 .addReg(FramePtrReg);

1268 }

1269 }

1270

1271 if (HasFP) {

1272 const unsigned Alignment = MFI.getMaxAlign().value();

1273

1274 RoundedSize += Alignment;

1275 if (LiveUnits.empty()) {

1276 LiveUnits.init(TRI);

1277 LiveUnits.addLiveIns(MBB);

1278 }

1279

1280 // s_add_i32 s33, s32, NumBytes

1281 // s_and_b32 s33, s33, 0b111...0000

1282 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)

1283 .addReg(StackPtrReg)

1284 .addImm((Alignment - 1) * getScratchScaleFactor(ST))

1285 .setMIFlag(MachineInstr::FrameSetup);

1286 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)

1287 .addReg(FramePtrReg, RegState::Kill)

1288 .addImm(-Alignment * getScratchScaleFactor(ST))

1289 .setMIFlag(MachineInstr::FrameSetup);

1290 And->getOperand(3).setIsDead(); // Mark SCC as dead.

1291 FuncInfo->setIsStackRealigned(true);

1292 } else if ((HasFP = hasFP(MF))) {

1293 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

1294 .addReg(StackPtrReg)

1295 .setMIFlag(MachineInstr::FrameSetup);

1296 }

1297

1298 // If FP is used, emit the CSR spills with FP base register.

1299 if (HasFP) {

1300 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

1301 FramePtrRegScratchCopy);

1302 if (FramePtrRegScratchCopy)

1303 LiveUnits.removeReg(FramePtrRegScratchCopy);

1304 }

1305

1306 // If we need a base pointer, set it up here. It's whatever the value of

1307 // the stack pointer is at this point. Any variable size objects will be

1308 // allocated after this, so we can still use the base pointer to reference

1309 // the incoming arguments.

1310 if ((HasBP = TRI.hasBasePointer(MF))) {

1311 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)

1312 .addReg(StackPtrReg)

1313 .setMIFlag(MachineInstr::FrameSetup);

1314 }

1315

1316 if (HasFP && RoundedSize != 0) {

1317 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)

1318 .addReg(StackPtrReg)

1319 .addImm(RoundedSize * getScratchScaleFactor(ST))

1320 .setMIFlag(MachineInstr::FrameSetup);

1321 Add->getOperand(3).setIsDead(); // Mark SCC as dead.

1322 }

1323

1324 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);

1325 (void)FPSaved;

1326 assert((!HasFP || FPSaved) &&

1327 "Needed to save FP but didn't save it anywhere");

1328

1329 // If we allow spilling to AGPRs we may have saved FP but then spill

1330 // everything into AGPRs instead of the stack.

1331 assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&

1332 "Saved FP but didn't need it");

1333

1334 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);

1335 (void)BPSaved;

1336 assert((!HasBP || BPSaved) &&

1337 "Needed to save BP but didn't save it anywhere");

1338

1339 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");

1340

1341 if (FuncInfo->isWholeWaveFunction()) {

1342 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.

1343 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();

1344 }

1345}

1346

1347 void SIFrameLowering::emitEpilogue(MachineFunction &MF,

1348 MachineBasicBlock &MBB) const {

1349 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1350 if (FuncInfo->isEntryFunction())

1351 return;

1352

1353 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1354 const SIInstrInfo *TII = ST.getInstrInfo();

1355 const SIRegisterInfo &TRI = TII->getRegisterInfo();

1356 MachineRegisterInfo &MRI = MF.getRegInfo();

1357 LiveRegUnits LiveUnits;

1358 // Get the insert location for the epilogue. If there were no terminators in

1359 // the block, get the last instruction.

1360 MachineBasicBlock::iterator MBBI = MBB.end();

1361 DebugLoc DL;

1362 if (!MBB.empty()) {

1363 MBBI = MBB.getLastNonDebugInstr();

1364 if (MBBI != MBB.end())

1365 DL = MBBI->getDebugLoc();

1366

1367 MBBI = MBB.getFirstTerminator();

1368 }

1369

1370 const MachineFrameInfo &MFI = MF.getFrameInfo();

1371 uint32_t NumBytes = MFI.getStackSize();

1372 uint32_t RoundedSize = FuncInfo->isStackRealigned()

1373 ? NumBytes + MFI.getMaxAlign().value()

1374 : NumBytes;

1375 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

1376 Register FramePtrReg = FuncInfo->getFrameOffsetReg();

1377 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);

1378

1379 if (RoundedSize != 0) {

1380 if (TRI.hasBasePointer(MF)) {

1381 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

1382 .addReg(TRI.getBaseRegister())

1383 .setMIFlag(MachineInstr::FrameDestroy);

1384 } else if (hasFP(MF)) {

1385 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

1386 .addReg(FramePtrReg)

1387 .setMIFlag(MachineInstr::FrameDestroy);

1388 }

1389 }

1390

1391 Register FramePtrRegScratchCopy;

1392 Register SGPRForFPSaveRestoreCopy =

1393 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

1394 if (FPSaved) {

1395 // CSR spill restores should use FP as base register. If

1396 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP

1397 // into a new scratch register and copy to FP later when other registers are

1398 // restored from the current stack frame.

1399 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);

1400 if (SGPRForFPSaveRestoreCopy) {

1401 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

1402 } else {

1403 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

1404 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

1405 if (!FramePtrRegScratchCopy)

1406 report_fatal_error("failed to find free scratch register");

1407

1408 LiveUnits.addReg(FramePtrRegScratchCopy);

1409 }

1410

1411 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

1412 FramePtrRegScratchCopy);

1413 }

1414

1415 if (FPSaved) {

1416 // Insert the copy to restore FP.

1417 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy

1418 : FramePtrRegScratchCopy;

1419 MachineInstrBuilder MIB =

1420 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

1421 .addReg(SrcReg);

1422 if (SGPRForFPSaveRestoreCopy)

1423 MIB.setMIFlag(MachineInstr::FrameDestroy);

1424 } else {

1425 // Insert the CSR spill restores with SP as the base register.

1426 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,

1427 FuncInfo->isChainFunction() ? Register() : StackPtrReg,

1428 FramePtrRegScratchCopy);

1429 }

1430}

1431

1432#ifndef NDEBUG

1433 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {

1434 const MachineFrameInfo &MFI = MF.getFrameInfo();

1435 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1436 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

1437 I != E; ++I) {

1438 if (!MFI.isDeadObjectIndex(I) &&

1439 MFI.getStackID(I) == TargetStackID::SGPRSpill &&

1440 !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {

1441 return false;

1442 }

1443 }

1444

1445 return true;

1446}

1447#endif

1448

1449 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,

1450 int FI,

1451 Register &FrameReg) const {

1452 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();

1453

1454 FrameReg = RI->getFrameRegister(MF);

1455 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));

1456}

1457

1458 void SIFrameLowering::processFunctionBeforeFrameFinalized(

1459 MachineFunction &MF,

1460 RegScavenger *RS) const {

1461 MachineFrameInfo &MFI = MF.getFrameInfo();

1462

1463 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1464 const SIInstrInfo *TII = ST.getInstrInfo();

1465 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1466 MachineRegisterInfo &MRI = MF.getRegInfo();

1467 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1468

1469 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()

1470 && EnableSpillVGPRToAGPR;

1471

1472 if (SpillVGPRToAGPR) {

1473 // To track the spill frame indices handled in this pass.

1474 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);

1475 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);

1476

1477 bool SeenDbgInstr = false;

1478

1479 for (MachineBasicBlock &MBB : MF) {

1480 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {

1481 int FrameIndex;

1482 if (MI.isDebugInstr())

1483 SeenDbgInstr = true;

1484

1485 if (TII->isVGPRSpill(MI)) {

1486 // Try to eliminate stack used by VGPR spills before frame

1487 // finalization.

1488 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

1489 AMDGPU::OpName::vaddr);

1490 int FI = MI.getOperand(FIOp).getIndex();

1491 Register VReg =

1492 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();

1493 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,

1494 TRI->isAGPR(MRI, VReg))) {

1495 assert(RS != nullptr);

1496 RS->enterBasicBlockEnd(MBB);

1497 RS->backward(std::next(MI.getIterator()));

1498 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);

1499 SpillFIs.set(FI);

1500 continue;

1501 }

1502 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||

1503 TII->isLoadFromStackSlot(MI, FrameIndex))

1504 if (!MFI.isFixedObjectIndex(FrameIndex))

1505 NonVGPRSpillFIs.set(FrameIndex);

1506 }

1507 }

1508

1509 // Stack slot coloring may assign different objects to the same stack slot.

1510 // If not, then the VGPR to AGPR spill slot is dead.

1511 for (unsigned FI : SpillFIs.set_bits())

1512 if (!NonVGPRSpillFIs.test(FI))

1513 FuncInfo->setVGPRToAGPRSpillDead(FI);

1514

1515 for (MachineBasicBlock &MBB : MF) {

1516 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())

1517 MBB.addLiveIn(Reg);

1518

1519 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())

1520 MBB.addLiveIn(Reg);

1521

1522 MBB.sortUniqueLiveIns();

1523

1524 if (!SpillFIs.empty() && SeenDbgInstr) {

1525 // FIXME: The dead frame indices are replaced with a null register from

1526 // the debug value instructions. We should instead, update it with the

1527 // correct register value. But not sure the register value alone is

1528 for (MachineInstr &MI : MBB) {

1529 if (MI.isDebugValue()) {

1530 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;

1531 if (MI.getOperand(StackOperandIdx).isFI() &&

1532 !MFI.isFixedObjectIndex(

1533 MI.getOperand(StackOperandIdx).getIndex()) &&

1534 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {

1535 MI.getOperand(StackOperandIdx)

1536 .ChangeToRegister(Register(), false /*isDef*/);

1537 }

1538 }

1539 }

1540 }

1541 }

1542 }

1543

1544 // At this point we've already allocated all spilled SGPRs to VGPRs if we

1545 // can. Any remaining SGPR spills will go to memory, so move them back to the

1546 // default stack.

1547 bool HaveSGPRToVMemSpill =

1548 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);

1549 assert(allSGPRSpillsAreDead(MF) &&

1550 "SGPR spill should have been removed in SILowerSGPRSpills");

1551

1552 // FIXME: The other checks should be redundant with allStackObjectsAreDead,

1553 // but currently hasNonSpillStackObjects is set only from source

1554 // allocas. Stack temps produced from legalization are not counted currently.

1555 if (!allStackObjectsAreDead(MFI)) {

1556 assert(RS && "RegScavenger required if spilling");

1557

1558 // Add an emergency spill slot

1559 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));

1560

1561 // If we are spilling SGPRs to memory with a large frame, we may need a

1562 // second VGPR emergency frame index.

1563 if (HaveSGPRToVMemSpill &&

1564 allocateScavengingFrameIndexesNearIncomingSP(MF)) {

1565 RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));

1566 }

1567 }

1568}

1569

1570 void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(

1571 MachineFunction &MF, RegScavenger *RS) const {

1572 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1573 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1574 MachineRegisterInfo &MRI = MF.getRegInfo();

1575 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1576

1577 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

1578 // On gfx908, we had initially reserved highest available VGPR for AGPR

1579 // copy. Now since we are done with RA, check if there exist an unused VGPR

1580 // which is lower than the eariler reserved VGPR before RA. If one exist,

1581 // use it for AGPR copy instead of one reserved before RA.

1582 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();

1583 Register UnusedLowVGPR =

1584 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

1585 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <

1586 TRI->getHWRegIndex(VGPRForAGPRCopy))) {

1587 // Reserve this newly identified VGPR (for AGPR copy)

1588 // reserved registers should already be frozen at this point

1589 // so we can avoid calling MRI.freezeReservedRegs and just use

1590 // MRI.reserveReg

1591 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);

1592 MRI.reserveReg(UnusedLowVGPR, TRI);

1593 }

1594 }

1595 // We initally reserved the highest available SGPR pair for long branches

1596 // now, after RA, we shift down to a lower unused one if one exists

1597 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();

1598 Register UnusedLowSGPR =

1599 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);

1600 // If LongBranchReservedReg is null then we didn't find a long branch

1601 // and never reserved a register to begin with so there is nothing to

1602 // shift down. Then if UnusedLowSGPR is null, there isn't available lower

1603 // register to use so just keep the original one we set.

1604 if (LongBranchReservedReg && UnusedLowSGPR) {

1605 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);

1606 MRI.reserveReg(UnusedLowSGPR, TRI);

1607 }

1608}

1609

1610// The special SGPR spills like the one needed for FP, BP or any reserved

1611// registers delayed until frame lowering.

1612 void SIFrameLowering::determinePrologEpilogSGPRSaves(

1613 MachineFunction &MF, BitVector &SavedVGPRs,

1614 bool NeedExecCopyReservedReg) const {

1615 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

1616 MachineRegisterInfo &MRI = MF.getRegInfo();

1617 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

1618 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1619 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1620 LiveRegUnits LiveUnits;

1621 LiveUnits.init(*TRI);

1622 // Initially mark callee saved registers as used so we will not choose them

1623 // while looking for scratch SGPRs.

1624 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

1625 for (unsigned I = 0; CSRegs[I]; ++I)

1626 LiveUnits.addReg(CSRegs[I]);

1627

1628 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();

1629

1630 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();

1631 if (NeedExecCopyReservedReg ||

1632 (ReservedRegForExecCopy &&

1633 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {

1634 MRI.reserveReg(ReservedRegForExecCopy, TRI);

1635 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);

1636 if (UnusedScratchReg) {

1637 // If found any unused scratch SGPR, reserve the register itself for Exec

1638 // copy and there is no need for any spills in that case.

1639 MFI->setSGPRForEXECCopy(UnusedScratchReg);

1640 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);

1641 LiveUnits.addReg(UnusedScratchReg);

1642 } else {

1643 // Needs spill.

1644 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&

1645 "Re-reserving spill slot for EXEC copy register");

1646 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,

1647 /*IncludeScratchCopy=*/false);

1648 }

1649 } else if (ReservedRegForExecCopy) {

1650 // Reset it at this point. There are no whole-wave copies and spills

1651 // encountered.

1652 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);

1653 }

1654

1655 // hasFP only knows about stack objects that already exist. We're now

1656 // determining the stack slots that will be created, so we have to predict

1657 // them. Stack objects force FP usage with calls.

1658 //

1659 // Note a new VGPR CSR may be introduced if one is used for the spill, but we

1660 // don't want to report it here.

1661 //

1662 // FIXME: Is this really hasReservedCallFrame?

1663 const bool WillHaveFP =

1664 FrameInfo.hasCalls() &&

1665 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));

1666

1667 if (WillHaveFP || hasFP(MF)) {

1668 Register FramePtrReg = MFI->getFrameOffsetReg();

1669 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&

1670 "Re-reserving spill slot for FP");

1671 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);

1672 }

1673

1674 if (TRI->hasBasePointer(MF)) {

1675 Register BasePtrReg = TRI->getBaseRegister();

1676 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&

1677 "Re-reserving spill slot for BP");

1678 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);

1679 }

1680}

1681

1682// Only report VGPRs to generic code.

1683 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,

1684 BitVector &SavedVGPRs,

1685 RegScavenger *RS) const {

1686 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

1687

1688 // If this is a function with the amdgpu_cs_chain[_preserve] calling

1689 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then

1690 // we don't need to save and restore anything.

1691 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())

1692 return;

1693

1694 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);

1695

1696 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1697 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1698 const SIInstrInfo *TII = ST.getInstrInfo();

1699 bool NeedExecCopyReservedReg = false;

1700

1701 MachineInstr *ReturnMI = nullptr;

1702 for (MachineBasicBlock &MBB : MF) {

1703 for (MachineInstr &MI : MBB) {

1704 // TODO: Walking through all MBBs here would be a bad heuristic. Better

1705 // handle them elsewhere.

1706 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))

1707 NeedExecCopyReservedReg = true;

1708 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||

1709 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||

1710 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||

1711 (MFI->isChainFunction() &&

1712 TII->isChainCallOpcode(MI.getOpcode()))) {

1713 // We expect all return to be the same size.

1714 assert(!ReturnMI ||

1715 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==

1716 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));

1717 ReturnMI = &MI;

1718 }

1719 }

1720 }

1721

1722 SmallVector<Register> SortedWWMVGPRs;

1723 for (Register Reg : MFI->getWWMReservedRegs()) {

1724 // The shift-back is needed only for the VGPRs used for SGPR spills and they

1725 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM

1726 // reserved registers.

1727 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

1728 if (TRI->getRegSizeInBits(*RC) != 32)

1729 continue;

1730 SortedWWMVGPRs.push_back(Reg);

1731 }

1732

1733 sort(SortedWWMVGPRs, std::greater<Register>());

1734 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);

1735

1736 if (MFI->isEntryFunction())

1737 return;

1738

1739 if (MFI->isWholeWaveFunction()) {

1740 // In practice, all the VGPRs are WWM registers, and we will need to save at

1741 // least their inactive lanes. Add them to WWMReservedRegs.

1742 assert(!NeedExecCopyReservedReg &&

1743 "Whole wave functions can use the reg mapped for their i1 argument");

1744

1745 // FIXME: Be more efficient!

1746 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;

1747 for (MCRegister Reg :

1748 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))

1749 if (MF.getRegInfo().isPhysRegModified(Reg)) {

1750 MFI->reserveWWMRegister(Reg);

1751 MF.begin()->addLiveIn(Reg);

1752 }

1753 MF.begin()->sortUniqueLiveIns();

1754 }

1755

1756 // Remove any VGPRs used in the return value because these do not need to be saved.

1757 // This prevents CSR restore from clobbering return VGPRs.

1758 if (ReturnMI) {

1759 for (auto &Op : ReturnMI->operands()) {

1760 if (Op.isReg())

1761 SavedVGPRs.reset(Op.getReg());

1762 }

1763 }

1764

1765 // Create the stack objects for WWM registers now.

1766 for (Register Reg : MFI->getWWMReservedRegs()) {

1767 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

1768 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),

1769 TRI->getSpillAlign(*RC));

1770 }

1771

1772 // Ignore the SGPRs the default implementation found.

1773 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());

1774

1775 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.

1776 // In gfx908 there was do AGPR loads and stores and thus spilling also

1777 // require a temporary VGPR.

1778 if (!ST.hasGFX90AInsts())

1779 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());

1780

1781 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);

1782

1783 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't

1784 // allow the default insertion to handle them.

1785 for (auto &Reg : MFI->getWWMSpills())

1786 SavedVGPRs.reset(Reg.first);

1787}

1788

1789 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,

1790 BitVector &SavedRegs,

1791 RegScavenger *RS) const {

1792 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);

1793 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

1794 if (MFI->isEntryFunction())

1795 return;

1796

1797 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1798 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1799

1800 // The SP is specifically managed and we don't want extra spills of it.

1801 SavedRegs.reset(MFI->getStackPtrOffsetReg());

1802

1803 const BitVector AllSavedRegs = SavedRegs;

1804 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());

1805

1806 // We have to anticipate introducing CSR VGPR spills or spill of caller

1807 // save VGPR reserved for SGPR spills as we now always create stack entry

1808 // for it, if we don't have any stack objects already, since we require a FP

1809 // if there is a call and stack. We will allocate a VGPR for SGPR spills if

1810 // there are any SGPR spills. Whether they are CSR spills or otherwise.

1811 MachineFrameInfo &FrameInfo = MF.getFrameInfo();

1812 const bool WillHaveFP =

1813 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());

1814

1815 // FP will be specially managed like SP.

1816 if (WillHaveFP || hasFP(MF))

1817 SavedRegs.reset(MFI->getFrameOffsetReg());

1818

1819 // Return address use with return instruction is hidden through the SI_RETURN

1820 // pseudo. Given that and since the IPRA computes actual register usage and

1821 // does not use CSR list, the clobbering of return address by function calls

1822 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register

1823 // usage collection. This will ensure save/restore of return address happens

1824 // in those scenarios.

1825 const MachineRegisterInfo &MRI = MF.getRegInfo();

1826 Register RetAddrReg = TRI->getReturnAddressReg(MF);

1827 if (!MFI->isEntryFunction() &&

1828 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {

1829 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));

1830 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));

1831 }

1832}

1833

1834 static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,

1835 const GCNSubtarget &ST,

1836 std::vector<CalleeSavedInfo> &CSI,

1837 unsigned &MinCSFrameIndex,

1838 unsigned &MaxCSFrameIndex) {

1839 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1840 MachineFrameInfo &MFI = MF.getFrameInfo();

1841 const SIRegisterInfo *TRI = ST.getRegisterInfo();

1842

1843 assert(

1844 llvm::is_sorted(CSI,

1845 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {

1846 return A.getReg() < B.getReg();

1847 }) &&

1848 "Callee saved registers not sorted");

1849

1850 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {

1851 return !CSI.isSpilledToReg() &&

1852 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&

1853 !FuncInfo->isWWMReservedRegister(CSI.getReg());

1854 };

1855

1856 auto CSEnd = CSI.end();

1857 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {

1858 Register Reg = CSIt->getReg();

1859 if (!CanUseBlockOps(*CSIt))

1860 continue;

1861

1862 // Find all the regs that will fit in a 32-bit mask starting at the current

1863 // reg and build said mask. It should have 1 for every register that's

1864 // included, with the current register as the least significant bit.

1865 uint32_t Mask = 1;

1866 CSEnd = std::remove_if(

1867 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {

1868 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {

1869 Mask |= 1 << (CSI.getReg() - Reg);

1870 return true;

1871 } else {

1872 return false;

1873 }

1874 });

1875

1876 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);

1877 Register RegBlock =

1878 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);

1879 if (!RegBlock) {

1880 // We couldn't find a super register for the block. This can happen if

1881 // the register we started with is too high (e.g. v232 if the maximum is

1882 // v255). We therefore try to get the last register block and figure out

1883 // the mask from there.

1884 Register LastBlockStart =

1885 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);

1886 RegBlock =

1887 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);

1888 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&

1889 "Couldn't find super register");

1890 int RegDelta = Reg - LastBlockStart;

1891 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&

1892 "Bad shift amount");

1893 Mask <<= RegDelta;

1894 }

1895

1896 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);

1897

1898 // The stack objects can be a bit smaller than the register block if we know

1899 // some of the high bits of Mask are 0. This may happen often with calling

1900 // conventions where the caller and callee-saved VGPRs are interleaved at

1901 // a small boundary (e.g. 8 or 16).

1902 int UnusedBits = llvm::countl_zero(Mask);

1903 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;

1904 int FrameIdx =

1905 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),

1906 /*isSpillSlot=*/true);

1907 if ((unsigned)FrameIdx < MinCSFrameIndex)

1908 MinCSFrameIndex = FrameIdx;

1909 if ((unsigned)FrameIdx > MaxCSFrameIndex)

1910 MaxCSFrameIndex = FrameIdx;

1911

1912 CSIt->setFrameIdx(FrameIdx);

1913 CSIt->setReg(RegBlock);

1914 }

1915 CSI.erase(CSEnd, CSI.end());

1916}

1917

1918 bool SIFrameLowering::assignCalleeSavedSpillSlots(

1919 MachineFunction &MF, const TargetRegisterInfo *TRI,

1920 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,

1921 unsigned &MaxCSFrameIndex) const {

1922 if (CSI.empty())

1923 return true; // Early exit if no callee saved registers are modified!

1924

1925 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1926 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();

1927

1928 if (UseVGPRBlocks)

1929 assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);

1930

1931 return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;

1932}

1933

1934 bool SIFrameLowering::assignCalleeSavedSpillSlots(

1935 MachineFunction &MF, const TargetRegisterInfo *TRI,

1936 std::vector<CalleeSavedInfo> &CSI) const {

1937 if (CSI.empty())

1938 return true; // Early exit if no callee saved registers are modified!

1939

1940 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

1941 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1942 const SIRegisterInfo *RI = ST.getRegisterInfo();

1943 Register FramePtrReg = FuncInfo->getFrameOffsetReg();

1944 Register BasePtrReg = RI->getBaseRegister();

1945 Register SGPRForFPSaveRestoreCopy =

1946 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

1947 Register SGPRForBPSaveRestoreCopy =

1948 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);

1949 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)

1950 return false;

1951

1952 unsigned NumModifiedRegs = 0;

1953

1954 if (SGPRForFPSaveRestoreCopy)

1955 NumModifiedRegs++;

1956 if (SGPRForBPSaveRestoreCopy)

1957 NumModifiedRegs++;

1958

1959 for (auto &CS : CSI) {

1960 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {

1961 CS.setDstReg(SGPRForFPSaveRestoreCopy);

1962 if (--NumModifiedRegs)

1963 break;

1964 } else if (CS.getReg() == BasePtrReg.asMCReg() &&

1965 SGPRForBPSaveRestoreCopy) {

1966 CS.setDstReg(SGPRForBPSaveRestoreCopy);

1967 if (--NumModifiedRegs)

1968 break;

1969 }

1970 }

1971

1972 return false;

1973}

1974

1975 bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(

1976 const MachineFunction &MF) const {

1977

1978 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

1979 const MachineFrameInfo &MFI = MF.getFrameInfo();

1980 const SIInstrInfo *TII = ST.getInstrInfo();

1981 uint64_t EstStackSize = MFI.estimateStackSize(MF);

1982 uint64_t MaxOffset = EstStackSize - 1;

1983

1984 // We need the emergency stack slots to be allocated in range of the

1985 // MUBUF/flat scratch immediate offset from the base register, so assign these

1986 // first at the incoming SP position.

1987 //

1988 // TODO: We could try sorting the objects to find a hole in the first bytes

1989 // rather than allocating as close to possible. This could save a lot of space

1990 // on frames with alignment requirements.

1991 if (ST.enableFlatScratch()) {

1992 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,

1993 SIInstrFlags::FlatScratch))

1994 return false;

1995 } else {

1996 if (TII->isLegalMUBUFImmOffset(MaxOffset))

1997 return false;

1998 }

1999

2000 return true;

2001}

2002

2003 bool SIFrameLowering::spillCalleeSavedRegisters(

2004 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

2005 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {

2006 MachineFunction *MF = MBB.getParent();

2007 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

2008 if (!ST.useVGPRBlockOpsForCSR())

2009 return false;

2010

2011 MachineFrameInfo &FrameInfo = MF->getFrameInfo();

2012 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

2013 const SIInstrInfo *TII = ST.getInstrInfo();

2014 SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();

2015

2016 const TargetRegisterClass *BlockRegClass =

2017 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);

2018 for (const CalleeSavedInfo &CS : CSI) {

2019 Register Reg = CS.getReg();

2020 if (!BlockRegClass->contains(Reg) ||

2021 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

2022 spillCalleeSavedRegister(MBB, MI, CS, TII, TRI);

2023 continue;

2024 }

2025

2026 // Build a scratch block store.

2027 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

2028 int FrameIndex = CS.getFrameIdx();

2029 MachinePointerInfo PtrInfo =

2030 MachinePointerInfo::getFixedStack(*MF, FrameIndex);

2031 MachineMemOperand *MMO =

2032 MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,

2033 FrameInfo.getObjectSize(FrameIndex),

2034 FrameInfo.getObjectAlign(FrameIndex));

2035

2036 BuildMI(MBB, MI, MI->getDebugLoc(),

2037 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))

2038 .addReg(Reg, getKillRegState(false))

2039 .addFrameIndex(FrameIndex)

2040 .addReg(MFI->getStackPtrOffsetReg())

2041 .addImm(0)

2042 .addImm(Mask)

2043 .addMemOperand(MMO);

2044

2045 FuncInfo->setHasSpilledVGPRs();

2046

2047 // Add the register to the liveins. This is necessary because if any of the

2048 // VGPRs in the register block is reserved (e.g. if it's a WWM register),

2049 // then the whole block will be marked as reserved and `updateLiveness` will

2050 // skip it.

2051 MBB.addLiveIn(Reg);

2052 }

2053 MBB.sortUniqueLiveIns();

2054

2055 return true;

2056}

2057

2058 bool SIFrameLowering::restoreCalleeSavedRegisters(

2059 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

2060 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {

2061 MachineFunction *MF = MBB.getParent();

2062 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

2063 if (!ST.useVGPRBlockOpsForCSR())

2064 return false;

2065

2066 SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();

2067 MachineFrameInfo &MFI = MF->getFrameInfo();

2068 const SIInstrInfo *TII = ST.getInstrInfo();

2069 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);

2070 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);

2071 for (const CalleeSavedInfo &CS : reverse(CSI)) {

2072 Register Reg = CS.getReg();

2073 if (!BlockRegClass->contains(Reg) ||

2074 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

2075 restoreCalleeSavedRegister(MBB, MI, CS, TII, TRI);

2076 continue;

2077 }

2078

2079 // Build a scratch block load.

2080 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

2081 int FrameIndex = CS.getFrameIdx();

2082 MachinePointerInfo PtrInfo =

2083 MachinePointerInfo::getFixedStack(*MF, FrameIndex);

2084 MachineMemOperand *MMO = MF->getMachineMemOperand(

2085 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),

2086 MFI.getObjectAlign(FrameIndex));

2087

2088 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),

2089 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)

2090 .addFrameIndex(FrameIndex)

2091 .addReg(FuncInfo->getStackPtrOffsetReg())

2092 .addImm(0)

2093 .addImm(Mask)

2094 .addMemOperand(MMO);

2095 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);

2096

2097 // Add the register to the liveins. This is necessary because if any of the

2098 // VGPRs in the register block is reserved (e.g. if it's a WWM register),

2099 // then the whole block will be marked as reserved and `updateLiveness` will

2100 // skip it.

2101 MBB.addLiveIn(Reg);

2102 }

2103

2104 MBB.sortUniqueLiveIns();

2105 return true;

2106}

2107

2108 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(

2109 MachineFunction &MF,

2110 MachineBasicBlock &MBB,

2111 MachineBasicBlock::iterator I) const {

2112 int64_t Amount = I->getOperand(0).getImm();

2113 if (Amount == 0)

2114 return MBB.erase(I);

2115

2116 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

2117 const SIInstrInfo *TII = ST.getInstrInfo();

2118 const DebugLoc &DL = I->getDebugLoc();

2119 unsigned Opc = I->getOpcode();

2120 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

2121 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

2122

2123 if (!hasReservedCallFrame(MF)) {

2124 Amount = alignTo(Amount, getStackAlign());

2125 assert(isUInt<32>(Amount) && "exceeded stack address space size");

2126 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

2127 Register SPReg = MFI->getStackPtrOffsetReg();

2128

2129 Amount *= getScratchScaleFactor(ST);

2130 if (IsDestroy)

2131 Amount = -Amount;

2132 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)

2133 .addReg(SPReg)

2134 .addImm(Amount);

2135 Add->getOperand(3).setIsDead(); // Mark SCC as dead.

2136 } else if (CalleePopAmount != 0) {

2137 llvm_unreachable("is this used?");

2138 }

2139

2140 return MBB.erase(I);

2141}

2142

2143/// Returns true if the frame will require a reference to the stack pointer.

2144///

2145/// This is the set of conditions common to setting up the stack pointer in a

2146/// kernel, and for using a frame pointer in a callable function.

2147///

2148/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm

2149/// references SP.

2150 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {

2151 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();

2152}

2153

2154// The FP for kernels is always known 0, so we never really need to setup an

2155// explicit register for it. However, DisableFramePointerElim will force us to

2156// use a register for it.

2157 bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {

2158 const MachineFrameInfo &MFI = MF.getFrameInfo();

2159

2160 // For entry & chain functions we can use an immediate offset in most cases,

2161 // so the presence of calls doesn't imply we need a distinct frame pointer.

2162 if (MFI.hasCalls() &&

2163 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&

2164 !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {

2165 // All offsets are unsigned, so need to be addressed in the same direction

2166 // as stack growth.

2167

2168 // FIXME: This function is pretty broken, since it can be called before the

2169 // frame layout is determined or CSR spills are inserted.

2170 return MFI.getStackSize() != 0;

2171 }

2172

2173 return (frameTriviallyRequiresSP(MFI) &&

2174 !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) ||

2175 MFI.isFrameAddressTaken() ||

2176 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(

2177 MF) ||

2178 mayReserveScratchForCWSR(MF) ||

2179 MF.getTarget().Options.DisableFramePointerElim(MF);

2180}

2181

2182 bool SIFrameLowering::mayReserveScratchForCWSR(

2183 const MachineFunction &MF) const {

2184 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&

2185 AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) &&

2186 AMDGPU::isCompute(MF.getFunction().getCallingConv());

2187}

2188

2189// This is essentially a reduced version of hasFP for entry functions. Since the

2190// stack pointer is known 0 on entry to kernels, we never really need an FP

2191// register. We may need to initialize the stack pointer depending on the frame

2192// properties, which logically overlaps many of the cases where an ordinary

2193// function would require an FP.

2194// Also used for chain functions. While not technically entry functions, chain

2195// functions may need to set up a stack pointer in some situations.

2196 bool SIFrameLowering::requiresStackPointerReference(

2197 const MachineFunction &MF) const {

2198 // Callable functions always require a stack pointer reference.

2199 assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||

2200 MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&

2201 "only expected to call this for entry points and chain functions");

2202

2203 const MachineFrameInfo &MFI = MF.getFrameInfo();

2204

2205 // Entry points ordinarily don't need to initialize SP. We have to set it up

2206 // for callees if there are any. Also note tail calls are impossible/don't

2207 // make any sense for kernels.

2208 if (MFI.hasCalls())

2209 return true;

2210

2211 // We still need to initialize the SP if we're doing anything weird that

2212 // references the SP, like variable sized stack objects.

2213 return frameTriviallyRequiresSP(MFI);

2214}

SubReg

unsigned SubReg

Definition AArch64AdvSIMDScalarPass.cpp:102

MRI

unsigned const MachineRegisterInfo * MRI

Definition AArch64AdvSIMDScalarPass.cpp:103

assert

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPULaneMaskUtils.h

AMDGPUMCTargetDesc.h

Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB

MachineBasicBlock & MBB

Definition ARMSLSHardening.cpp:71

DL

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition ARMSLSHardening.cpp:73

MBBI

MachineBasicBlock MachineBasicBlock::iterator MBBI

Definition ARMSLSHardening.cpp:72

getParent

static const Function * getParent(const Value *V)

Definition BasicAliasAnalysis.cpp:885

A

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

GCNSubtarget.h

AMD GCN specific subclass of TargetSubtarget.

TII

const HexagonInstrInfo * TII

Definition HexagonCopyToCombine.cpp:118

MI

IRTranslator LLVM IR MI

Definition IRTranslator.cpp:110

InlinePriorityMode::Size

@ Size

Definition InlineOrder.cpp:25

LiveRegUnits.h

A set of register units.

F

#define F(x, y, z)

Definition MD5.cpp:55

I

#define I(x, y, z)

Definition MD5.cpp:58

MachineFrameInfo.h

Reg

Register Reg

Definition MachineSink.cpp:2117

TRI

Register const TargetRegisterInfo * TRI

Definition MachineSink.cpp:2118

Register

Promote Memory to Register

Definition Mem2Reg.cpp:110

FPReg

static constexpr MCPhysReg FPReg

Definition RISCVFrameLowering.cpp:51

SPReg

static constexpr MCPhysReg SPReg

Definition RISCVFrameLowering.cpp:54

Opc

auto Opc

Definition RISCVRedundantCopyElimination.cpp:75

RegisterScavenging.h

This file declares the machine register scavenger class.

buildEpilogRestore

static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

Definition SIFrameLowering.cpp:158

EnableSpillVGPRToAGPR

static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))

getVGPRSpillLaneOrTempRegister

static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)

Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.

Definition SIFrameLowering.cpp:73

buildGitPtr

static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)

Definition SIFrameLowering.cpp:178

allStackObjectsAreDead

static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)

Definition SIFrameLowering.cpp:529

buildPrologSpill

static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)

Definition SIFrameLowering.cpp:135

buildScratchExecCopy

static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)

Definition SIFrameLowering.cpp:937

frameTriviallyRequiresSP

static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)

Returns true if the frame will require a reference to the stack pointer.

Definition SIFrameLowering.cpp:2150

assignSlotsUsingVGPRBlocks

static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)

Definition SIFrameLowering.cpp:1834

initLiveUnits

static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)

Definition SIFrameLowering.cpp:203

allSGPRSpillsAreDead

static bool allSGPRSpillsAreDead(const MachineFunction &MF)

Definition SIFrameLowering.cpp:1433

findScratchNonCalleeSaveRegister

static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)

Definition SIFrameLowering.cpp:50

findUnusedRegister

static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)

Definition SIFrameLowering.cpp:35

getScratchScaleFactor

static unsigned getScratchScaleFactor(const GCNSubtarget &ST)

Definition SIFrameLowering.cpp:593

SIFrameLowering.h

SIMachineFunctionInfo.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition Debug.h:114

BlockSize

static const int BlockSize

Definition TarWriter.cpp:33

llvm::AMDGPUMachineFunction::isChainFunction

bool isChainFunction() const

Definition AMDGPUMachineFunction.h:103

llvm::AMDGPUMachineFunction::isEntryFunction

bool isEntryFunction() const

Definition AMDGPUMachineFunction.h:97

llvm::AMDGPUSubtarget::GFX10

@ GFX10

Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9

@ GFX9

Definition AMDGPUSubtarget.h:41

llvm::AMDGPU::LaneMaskConstants

Definition AMDGPULaneMaskUtils.h:21

llvm::AMDGPU::LaneMaskConstants::XorOpc

const unsigned XorOpc

Definition AMDGPULaneMaskUtils.h:40

llvm::AMDGPU::LaneMaskConstants::MovOpc

const unsigned MovOpc

Definition AMDGPULaneMaskUtils.h:35

llvm::AMDGPU::LaneMaskConstants::get

static const LaneMaskConstants & get(const GCNSubtarget &ST)

Definition AMDGPULaneMaskUtils.h:79

llvm::AMDGPU::LaneMaskConstants::ExecReg

const Register ExecReg

Definition AMDGPULaneMaskUtils.h:23

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition ArrayRef.h:41

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition ArrayRef.h:143

llvm::ArrayRef::slice

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

Definition ArrayRef.h:187

llvm::BitVector

Definition BitVector.h:101

llvm::BitVector::test

bool test(unsigned Idx) const

Definition BitVector.h:480

llvm::BitVector::reset

BitVector & reset()

Definition BitVector.h:411

llvm::BitVector::clearBitsNotInMask

void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.

Definition BitVector.h:741

llvm::BitVector::set

BitVector & set()

Definition BitVector.h:370

llvm::BitVector::any

bool any() const

any - Returns true if any bit is set.

Definition BitVector.h:189

llvm::BitVector::clearBitsInMask

void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsInMask - Clear any bits in this vector that are set in Mask.

Definition BitVector.h:729

llvm::BitVector::set_bits

iterator_range< const_set_bits_iterator > set_bits() const

Definition BitVector.h:159

llvm::BitVector::empty

bool empty() const

empty - Tests whether there are no bits in this bitvector.

Definition BitVector.h:175

llvm::CalleeSavedInfo

The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...

Definition MachineFrameInfo.h:35

llvm::CalleeSavedInfo::getReg

MCRegister getReg() const

Definition MachineFrameInfo.h:62

llvm::DebugLoc

A debug info location.

Definition DebugLoc.h:124

llvm::Function

Definition Function.h:64

llvm::Function::getCallingConv

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

Definition Function.h:270

llvm::GCNSubtarget

Definition GCNSubtarget.h:34

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr

bool hasImplicitBufferPtr() const

Definition GCNSubtarget.h:1873

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit

bool hasFlatScratchInit() const

Definition GCNSubtarget.h:1885

llvm::LiveRegUnits

A set of register units used to track register liveness.

Definition LiveRegUnits.h:31

llvm::LiveRegUnits::available

bool available(MCRegister Reg) const

Returns true if no part of physical register Reg is live.

Definition LiveRegUnits.h:117

llvm::LiveRegUnits::init

void init(const TargetRegisterInfo &TRI)

Initialize and clear the set.

Definition LiveRegUnits.h:74

llvm::LiveRegUnits::addReg

void addReg(MCRegister Reg)

Adds register units covered by physical register Reg.

Definition LiveRegUnits.h:87

llvm::LiveRegUnits::stepBackward

LLVM_ABI void stepBackward(const MachineInstr &MI)

Updates liveness when stepping backwards over the instruction MI.

Definition LiveRegUnits.cpp:44

llvm::LiveRegUnits::addLiveOuts

LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)

Adds registers living out of block MBB.

Definition LiveRegUnits.cpp:145

llvm::LiveRegUnits::removeReg

void removeReg(MCRegister Reg)

Removes all register units covered by physical register Reg.

Definition LiveRegUnits.h:103

llvm::LiveRegUnits::empty

bool empty() const

Returns true if the set is empty.

Definition LiveRegUnits.h:84

llvm::LiveRegUnits::addLiveIns

LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)

Adds registers living into block MBB.

Definition LiveRegUnits.cpp:158

llvm::MCInstrDesc

Describe properties that are true of each instruction in the target description file.

Definition MCInstrDesc.h:210

llvm::MCRegister

Wrapper class representing physical registers. Should be passed by value.

Definition MCRegister.h:33

llvm::MachineBasicBlock

Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::addLiveIn

void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())

Adds the specified register as a live in.

Definition MachineBasicBlock.h:478

llvm::MachineBasicBlock::iterator

MachineInstrBundleIterator< MachineInstr > iterator

Definition MachineBasicBlock.h:341

llvm::MachineFrameInfo

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

Definition MachineFrameInfo.h:111

llvm::MachineFrameInfo::hasVarSizedObjects

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

Definition MachineFrameInfo.h:362

llvm::MachineFrameInfo::getStackSize

uint64_t getStackSize() const

Return the number of bytes that must be allocated to hold all of the fixed size frame objects.

Definition MachineFrameInfo.h:603

llvm::MachineFrameInfo::hasCalls

bool hasCalls() const

Return true if the current function has any function calls.

Definition MachineFrameInfo.h:637

llvm::MachineFrameInfo::isFrameAddressTaken

bool isFrameAddressTaken() const

This method may be called any time after instruction selection is complete to determine if there is a...

Definition MachineFrameInfo.h:378

llvm::MachineFrameInfo::getMaxAlign

Align getMaxAlign() const

Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...

Definition MachineFrameInfo.h:619

llvm::MachineFrameInfo::hasPatchPoint

bool hasPatchPoint() const

This method may be called any time after instruction selection is complete to determine if there is a...

Definition MachineFrameInfo.h:396

llvm::MachineFrameInfo::CreateSpillStackObject

LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)

Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...

Definition MachineFrameInfo.cpp:66

llvm::MachineFrameInfo::hasTailCall

bool hasTailCall() const

Returns true if the function contains a tail call.

Definition MachineFrameInfo.h:662

llvm::MachineFrameInfo::getObjectAlign

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

Definition MachineFrameInfo.h:491

llvm::MachineFrameInfo::getObjectSize

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

Definition MachineFrameInfo.h:477

llvm::MachineFrameInfo::hasStackMap

bool hasStackMap() const

This method may be called any time after instruction selection is complete to determine if there is a...

Definition MachineFrameInfo.h:390

llvm::MachineFrameInfo::RemoveStackObject

void RemoveStackObject(int ObjectIdx)

Remove or mark dead a statically sized stack object.

Definition MachineFrameInfo.h:813

llvm::MachineFrameInfo::getObjectIndexEnd

int getObjectIndexEnd() const

Return one past the maximum frame object index.

Definition MachineFrameInfo.h:417

llvm::MachineFrameInfo::getStackID

uint8_t getStackID(int ObjectIdx) const

Definition MachineFrameInfo.h:766

llvm::MachineFrameInfo::getObjectOffset

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

Definition MachineFrameInfo.h:544

llvm::MachineFrameInfo::isFixedObjectIndex

bool isFixedObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a fixed stack object.

Definition MachineFrameInfo.h:716

llvm::MachineFrameInfo::getObjectIndexBegin

int getObjectIndexBegin() const

Return the minimum frame object index.

Definition MachineFrameInfo.h:414

llvm::MachineFrameInfo::isDeadObjectIndex

bool isDeadObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a dead object.

Definition MachineFrameInfo.h:780

llvm::MachineFunction

Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Definition MachineFunction.h:762

llvm::MachineFunction::getMachineMemOperand

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

Definition MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Definition MachineFunction.h:778

llvm::MachineFunction::getRegInfo

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Definition MachineFunction.h:772

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition MachineFunction.h:733

llvm::MachineFunction::begin

iterator begin()

Definition MachineFunction.h:984

llvm::MachineFunction::getInfo

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Definition MachineFunction.h:860

llvm::MachineFunction::front

const MachineBasicBlock & front() const

Definition MachineFunction.h:996

llvm::MachineFunction::getTarget

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

Definition MachineFunction.h:758

llvm::MachineInstrBuilder

Definition MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::addExternalSymbol

const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const

Definition MachineInstrBuilder.h:213

llvm::MachineInstrBuilder::setMIFlag

const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const

Definition MachineInstrBuilder.h:306

llvm::MachineInstrBuilder::addImm

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::addFrameIndex

const MachineInstrBuilder & addFrameIndex(int Idx) const

Definition MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addReg

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addMemOperand

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

Definition MachineInstrBuilder.h:231

llvm::MachineInstr

Representation of each machine instruction.

Definition MachineInstr.h:72

llvm::MachineInstr::operands

mop_range operands()

Definition MachineInstr.h:693

llvm::MachineInstr::FrameDestroy

@ FrameDestroy

Definition MachineInstr.h:90

llvm::MachineInstr::FrameSetup

@ FrameSetup

Definition MachineInstr.h:88

llvm::MachineInstr::getOperand

const MachineOperand & getOperand(unsigned i) const

Definition MachineInstr.h:595

llvm::MachineMemOperand

A description of a memory reference used in the backend.

Definition MachineMemOperand.h:130

llvm::MachineMemOperand::MODereferenceable

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MOLoad

@ MOLoad

The memory access reads data.

Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOInvariant

@ MOInvariant

The memory access always returns the same value (or traps).

Definition MachineMemOperand.h:147

llvm::MachineMemOperand::MOStore

@ MOStore

The memory access writes data.

Definition MachineMemOperand.h:139

llvm::MachineOperand::setIsDead

void setIsDead(bool Val=true)

Definition MachineOperand.h:525

llvm::MachineRegisterInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getCalleeSavedRegs

LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const

Returns list of callee saved registers.

Definition MachineRegisterInfo.cpp:643

llvm::MachineRegisterInfo::addLiveIn

void addLiveIn(MCRegister Reg, Register vreg=Register())

addLiveIn - Add the specified register as a live-in.

Definition MachineRegisterInfo.h:1004

llvm::MachineRegisterInfo::isPhysRegModified

LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const

Return true if the specified register is modified in this function.

Definition MachineRegisterInfo.cpp:592

llvm::MutableArrayRef

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

Definition ArrayRef.h:299

llvm::PrologEpilogSGPRSaveRestoreInfo

Definition SIMachineFunctionInfo.h:383

llvm::PrologEpilogSGPRSpillBuilder

Definition SIFrameLowering.cpp:225

llvm::PrologEpilogSGPRSpillBuilder::restore

void restore()

Definition SIFrameLowering.cpp:370

llvm::PrologEpilogSGPRSpillBuilder::PrologEpilogSGPRSpillBuilder

PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)

Definition SIFrameLowering.cpp:340

llvm::PrologEpilogSGPRSpillBuilder::save

void save()

Definition SIFrameLowering.cpp:359

llvm::RegScavenger

Definition RegisterScavenging.h:34

llvm::Register

Wrapper class representing virtual and physical registers.

Definition Register.h:20

llvm::Register::asMCReg

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

Definition Register.h:107

llvm::SIFrameLowering::determinePrologEpilogSGPRSaves

void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const

Definition SIFrameLowering.cpp:1612

llvm::SIFrameLowering::getFrameIndexReference

StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override

getFrameIndexReference - This method should return the base register and offset used to reference a f...

Definition SIFrameLowering.cpp:1449

llvm::SIFrameLowering::processFunctionBeforeFrameFinalized

void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...

Definition SIFrameLowering.cpp:1458

llvm::SIFrameLowering::mayReserveScratchForCWSR

bool mayReserveScratchForCWSR(const MachineFunction &MF) const

Definition SIFrameLowering.cpp:2182

llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP

bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override

Control the placement of special register scavenging spill slots when allocating a stack frame.

Definition SIFrameLowering.cpp:1975

llvm::SIFrameLowering::requiresStackPointerReference

bool requiresStackPointerReference(const MachineFunction &MF) const

Definition SIFrameLowering.cpp:2196

llvm::SIFrameLowering::emitEntryFunctionPrologue

void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const

Definition SIFrameLowering.cpp:597

llvm::SIFrameLowering::determineCalleeSaves

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

Definition SIFrameLowering.cpp:1683

llvm::SIFrameLowering::emitCSRSpillStores

void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

Definition SIFrameLowering.cpp:981

llvm::SIFrameLowering::hasFPImpl

bool hasFPImpl(const MachineFunction &MF) const override

Definition SIFrameLowering.cpp:2157

llvm::SIFrameLowering::spillCalleeSavedRegisters

bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...

Definition SIFrameLowering.cpp:2003

llvm::SIFrameLowering::assignCalleeSavedSpillSlots

bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override

Definition SIFrameLowering.cpp:1934

llvm::SIFrameLowering::determineCalleeSavesSGPR

void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

Definition SIFrameLowering.cpp:1789

llvm::SIFrameLowering::emitEpilogue

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override

Definition SIFrameLowering.cpp:1347

llvm::SIFrameLowering::emitCSRSpillRestores

void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const

Definition SIFrameLowering.cpp:1086

llvm::SIFrameLowering::processFunctionBeforeFrameIndicesReplaced

void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override

processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...

Definition SIFrameLowering.cpp:1570

llvm::SIFrameLowering::isSupportedStackID

bool isSupportedStackID(TargetStackID::Value ID) const override

Definition SIFrameLowering.cpp:921

llvm::SIFrameLowering::emitPrologue

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override

emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.

Definition SIFrameLowering.cpp:1190

llvm::SIFrameLowering::eliminateCallFramePseudoInstr

MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override

This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...

Definition SIFrameLowering.cpp:2108

llvm::SIFrameLowering::restoreCalleeSavedRegisters

bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override

restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...

Definition SIFrameLowering.cpp:2058

llvm::SIInstrInfo

Definition SIInstrInfo.h:90

llvm::SIMachineFunctionInfo

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

Definition SIMachineFunctionInfo.h:412

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSpills

ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const

Definition SIMachineFunctionInfo.h:693

llvm::SIMachineFunctionInfo::getWWMSpills

const WWMSpillsMap & getWWMSpills() const

Definition SIMachineFunctionInfo.h:684

llvm::SIMachineFunctionInfo::getAllScratchSGPRCopyDstRegs

void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const

Definition SIMachineFunctionInfo.h:737

llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs

ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const

Definition SIMachineFunctionInfo.h:796

llvm::SIMachineFunctionInfo::setSGPRForEXECCopy

void setSGPRForEXECCopy(Register Reg)

Definition SIMachineFunctionInfo.h:802

llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs

unsigned getNumPreloadedSGPRs() const

Definition SIMachineFunctionInfo.h:1000

llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange

void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)

Definition SIMachineFunctionInfo.cpp:363

llvm::SIMachineFunctionInfo::setMaskForVGPRBlockOps

void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)

Definition SIMachineFunctionInfo.h:634

llvm::SIMachineFunctionInfo::getUserSGPRInfo

GCNUserSGPRUsageInfo & getUserSGPRInfo()

Definition SIMachineFunctionInfo.h:698

llvm::SIMachineFunctionInfo::allocateWWMSpill

void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))

Definition SIMachineFunctionInfo.cpp:315

llvm::SIMachineFunctionInfo::getLongBranchReservedReg

Register getLongBranchReservedReg() const

Definition SIMachineFunctionInfo.h:1055

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize

unsigned getDynamicVGPRBlockSize() const

Definition SIMachineFunctionInfo.h:848

llvm::SIMachineFunctionInfo::hasSpilledVGPRs

bool hasSpilledVGPRs() const

Definition SIMachineFunctionInfo.h:1073

llvm::SIMachineFunctionInfo::setVGPRToAGPRSpillDead

void setVGPRToAGPRSpillDead(int FrameIndex)

Definition SIMachineFunctionInfo.h:814

llvm::SIMachineFunctionInfo::isWholeWaveFunction

bool isWholeWaveFunction() const

Definition SIMachineFunctionInfo.h:691

llvm::SIMachineFunctionInfo::getStackPtrOffsetReg

Register getStackPtrOffsetReg() const

Definition SIMachineFunctionInfo.h:1051

llvm::SIMachineFunctionInfo::isStackRealigned

bool isStackRealigned() const

Definition SIMachineFunctionInfo.h:1089

llvm::SIMachineFunctionInfo::getScratchRSrcReg

Register getScratchRSrcReg() const

Returns the physical register reserved for use as the resource descriptor for scratch accesses.

Definition SIMachineFunctionInfo.h:1022

llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs

ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const

Definition SIMachineFunctionInfo.h:804

llvm::SIMachineFunctionInfo::getScavengeFI

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)

Definition SIMachineFunctionInfo.cpp:613

llvm::SIMachineFunctionInfo::getMaskForVGPRBlockOps

uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const

Definition SIMachineFunctionInfo.h:639

llvm::SIMachineFunctionInfo::hasMaskForVGPRBlockOps

bool hasMaskForVGPRBlockOps(Register RegisterBlock) const

Definition SIMachineFunctionInfo.h:643

llvm::SIMachineFunctionInfo::hasPrologEpilogSGPRSpillEntry

bool hasPrologEpilogSGPRSpillEntry(Register Reg) const

Definition SIMachineFunctionInfo.h:717

llvm::SIMachineFunctionInfo::getGITPtrLoReg

Register getGITPtrLoReg(const MachineFunction &MF) const

Definition SIMachineFunctionInfo.cpp:644

llvm::SIMachineFunctionInfo::setVGPRForAGPRCopy

void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)

Definition SIMachineFunctionInfo.h:628

llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR

bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)

Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.

Definition SIMachineFunctionInfo.cpp:502

llvm::SIMachineFunctionInfo::splitWWMSpillRegisters

void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const

Definition SIMachineFunctionInfo.cpp:340

llvm::SIMachineFunctionInfo::getSGPRForEXECCopy

Register getSGPRForEXECCopy() const

Definition SIMachineFunctionInfo.h:800

llvm::SIMachineFunctionInfo::isWWMReservedRegister

bool isWWMReservedRegister(Register Reg) const

Definition SIMachineFunctionInfo.h:687

llvm::SIMachineFunctionInfo::getSGPRSpillToPhysicalVGPRLanes

ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const

Definition SIMachineFunctionInfo.h:766

llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy

Register getVGPRForAGPRCopy() const

Definition SIMachineFunctionInfo.h:624

llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane

bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)

Definition SIMachineFunctionInfo.cpp:455

llvm::SIMachineFunctionInfo::getFrameOffsetReg

Register getFrameOffsetReg() const

Definition SIMachineFunctionInfo.h:1031

llvm::SIMachineFunctionInfo::setLongBranchReservedReg

void setLongBranchReservedReg(Register Reg)

Definition SIMachineFunctionInfo.h:1045

llvm::SIMachineFunctionInfo::setHasSpilledVGPRs

void setHasSpilledVGPRs(bool Spill=true)

Definition SIMachineFunctionInfo.h:1077

llvm::SIMachineFunctionInfo::removeDeadFrameIndices

bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)

If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.

Definition SIMachineFunctionInfo.cpp:568

llvm::SIMachineFunctionInfo::setScratchReservedForDynamicVGPRs

void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)

Definition SIMachineFunctionInfo.h:855

llvm::SIMachineFunctionInfo::getPreloadedReg

MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const

Definition SIMachineFunctionInfo.h:981

llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills

bool checkIndexInPrologEpilogSGPRSpills(int FI) const

Definition SIMachineFunctionInfo.h:745

llvm::SIMachineFunctionInfo::getWWMReservedRegs

const ReservedRegSet & getWWMReservedRegs() const

Definition SIMachineFunctionInfo.h:685

llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR

Register getImplicitBufferPtrUserSGPR() const

Definition SIMachineFunctionInfo.h:1061

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSaveRestoreInfo

const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const

Definition SIMachineFunctionInfo.h:756

llvm::SIMachineFunctionInfo::setIsStackRealigned

void setIsStackRealigned(bool Realigned=true)

Definition SIMachineFunctionInfo.h:1093

llvm::SIMachineFunctionInfo::getGITPtrHigh

unsigned getGITPtrHigh() const

Definition SIMachineFunctionInfo.h:986

llvm::SIMachineFunctionInfo::hasSpilledSGPRs

bool hasSpilledSGPRs() const

Definition SIMachineFunctionInfo.h:1065

llvm::SIMachineFunctionInfo::addToPrologEpilogSGPRSpills

void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)

Definition SIMachineFunctionInfo.h:702

llvm::SIMachineFunctionInfo::getScratchSGPRCopyDstReg

Register getScratchSGPRCopyDstReg(Register Reg) const

Definition SIMachineFunctionInfo.h:725

llvm::SIMachineFunctionInfo::setScratchRSrcReg

void setScratchRSrcReg(Register Reg)

Definition SIMachineFunctionInfo.h:1026

llvm::SIMachineFunctionInfo::reserveWWMRegister

void reserveWWMRegister(Register Reg)

Definition SIMachineFunctionInfo.h:661

llvm::SIRegisterInfo

Definition SIRegisterInfo.h:40

llvm::SIRegisterInfo::getFrameRegister

Register getFrameRegister(const MachineFunction &MF) const override

Definition SIRegisterInfo.cpp:514

llvm::SIRegisterInfo::getRegClassForBlockOp

const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const

Definition SIRegisterInfo.h:168

llvm::SIRegisterInfo::addImplicitUsesForBlockCSRLoad

void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const

Definition SIRegisterInfo.cpp:1950

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition SmallVector.h:574

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition SmallVector.h:417

llvm::SmallVectorTemplateCommon::empty

bool empty() const

Definition SmallVector.h:83

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition SmallVector.h:1203

llvm::StackOffset

StackOffset holds a fixed and a scalable offset in bytes.

Definition TypeSize.h:31

llvm::StackOffset::getFixed

int64_t getFixed() const

Returns the fixed component of the stack.

Definition TypeSize.h:47

llvm::TargetFrameLowering::hasFP

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

Definition TargetFrameLowering.h:311

llvm::TargetFrameLowering::hasReservedCallFrame

virtual bool hasReservedCallFrame(const MachineFunction &MF) const

hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...

Definition TargetFrameLowering.h:320

llvm::TargetFrameLowering::determineCalleeSaves

virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const

This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...

Definition TargetFrameLoweringImpl.cpp:96

llvm::TargetFrameLowering::restoreCalleeSavedRegister

void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const

Definition TargetFrameLoweringImpl.cpp:205

llvm::TargetFrameLowering::spillCalleeSavedRegister

void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const

spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.

Definition TargetFrameLoweringImpl.cpp:187

llvm::TargetFrameLowering::getStackAlign

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

Definition TargetFrameLowering.h:107

llvm::TargetMachine::Options

TargetOptions Options

Definition TargetMachine.h:124

llvm::TargetOptions::DisableFramePointerElim

LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const

DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...

Definition TargetOptionsImpl.cpp:24

llvm::TargetRegisterClass

Definition TargetRegisterInfo.h:45

llvm::TargetRegisterClass::contains

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

Definition TargetRegisterInfo.h:95

llvm::TargetRegisterInfo

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Definition TargetRegisterInfo.h:242

llvm::cl::opt

Definition CommandLine.h:1455

uint32_t

uint64_t

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition ErrorHandling.h:164

TargetMachine.h

llvm::AMDGPUAS::CONSTANT_ADDRESS

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

Definition AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::PRIVATE_ADDRESS

@ PRIVATE_ADDRESS

Address space for private memory.

Definition AMDGPUAddrSpace.h:36

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

Definition AMDGPUMetadata.h:183

llvm::AMDGPU::Hwreg::ID_HW_ID2

@ ID_HW_ID2

Definition SIDefines.h:524

llvm::AMDGPU::Hwreg::OFFSET_ME_ID

@ OFFSET_ME_ID

Definition SIDefines.h:559

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule

unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)

Definition AMDGPUBaseInfo.cpp:1347

llvm::AMDGPU::convertSMRDOffsetUnits

uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)

Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.

Definition AMDGPUBaseInfo.cpp:3259

llvm::AMDGPU::isEntryFunctionCC

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

Definition AMDGPUBaseInfo.h:1461

llvm::AMDGPU::isInlinableLiteral32

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

Definition AMDGPUBaseInfo.cpp:2950

llvm::AMDGPU::isCompute

LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)

Definition AMDGPUBaseInfo.h:1456

llvm::ARM_MB::ST

@ ST

Definition ARMBaseInfo.h:73

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_CS

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

Definition CallingConv.h:197

llvm::RegState::ImplicitDefine

@ ImplicitDefine

Definition MachineInstrBuilder.h:66

llvm::RegState::Kill

@ Kill

The last use of a register.

Definition MachineInstrBuilder.h:51

llvm::RegState::Undef

@ Undef

Value of the register doesn't matter.

Definition MachineInstrBuilder.h:55

llvm::SIInstrFlags::FlatScratch

@ FlatScratch

Definition SIDefines.h:158

llvm::TargetStackID::Value

Value

Definition TargetFrameLowering.h:30

llvm::TargetStackID::SGPRSpill

@ SGPRSpill

Definition TargetFrameLowering.h:32

llvm::TargetStackID::ScalableVector

@ ScalableVector

Definition TargetFrameLowering.h:33

llvm::TargetStackID::NoAlloc

@ NoAlloc

Definition TargetFrameLowering.h:36

llvm::TargetStackID::WasmLocal

@ WasmLocal

Definition TargetFrameLowering.h:34

llvm::TargetStackID::Default

@ Default

Definition TargetFrameLowering.h:31

llvm::TargetStackID::ScalablePredicateVector

@ ScalablePredicateVector

Definition TargetFrameLowering.h:35

llvm::cl::ReallyHidden

@ ReallyHidden

Definition CommandLine.h:140

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition CommandLine.h:445

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition AddressRanges.h:18

llvm::Offset

@ Offset

Definition DWP.cpp:477

llvm::BuildMI

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

Definition MachineInstrBuilder.h:369

llvm::SGPRSaveKind::SPILL_TO_MEM

@ SPILL_TO_MEM

Definition SIMachineFunctionInfo.h:380

llvm::SGPRSaveKind::SPILL_TO_VGPR_LANE

@ SPILL_TO_VGPR_LANE

Definition SIMachineFunctionInfo.h:379

llvm::SGPRSaveKind::COPY_TO_SCRATCH_SGPR

@ COPY_TO_SCRATCH_SGPR

Definition SIMachineFunctionInfo.h:378

llvm::make_early_inc_range

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Definition STLExtras.h:632

llvm::alignDown

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

Definition MathExtras.h:546

llvm::countl_zero

int countl_zero(T Val)

Count number of 0's from the most significant bit to the least stopping at the first 1.

Definition bit.h:236

llvm::reverse

auto reverse(ContainerTy &&C)

Definition STLExtras.h:406

llvm::sort

void sort(IteratorTy Start, IteratorTy End)

Definition STLExtras.h:1622

llvm::Hi_32

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

Definition MathExtras.h:150

llvm::dbgs

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition Debug.cpp:207

llvm::make_first_range

auto make_first_range(ContainerTy &&c)

Given a container of pairs, return a range over the first elements.

Definition STLExtras.h:1397

llvm::report_fatal_error

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

Definition Error.cpp:167

llvm::is_sorted

bool is_sorted(R &&Range, Compare C)

Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...

Definition STLExtras.h:1920

llvm::isUInt

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

Definition MathExtras.h:189

llvm::Lo_32

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

Definition MathExtras.h:155

llvm::RecurKind::And

@ And

Bitwise or logical AND of integers.

Definition IVDescriptors.h:42

llvm::RecurKind::Add

@ Add

Sum of integers.

Definition IVDescriptors.h:37

llvm::getKillRegState

unsigned getKillRegState(bool B)

Definition MachineInstrBuilder.h:543

llvm::MCPhysReg

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

Definition MCRegister.h:21

llvm::alignTo

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

Definition Alignment.h:144

llvm::Op

DWARFExpression::Operation Op

Definition DWARFExpressionPrinter.cpp:22

llvm::ArrayRef

ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition STLExtras.h:1961

llvm::printReg

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.

Definition TargetRegisterInfo.cpp:105

llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT

@ FLAT_SCRATCH_INIT

Definition AMDGPUArgumentUsageInfo.h:112

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET

@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET

Definition AMDGPUArgumentUsageInfo.h:117

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER

@ PRIVATE_SEGMENT_BUFFER

Definition AMDGPUArgumentUsageInfo.h:107

llvm::AMDGPU::EncodingFields< HwregId, HwregOffset, HwregSize >::encode

static constexpr uint64_t encode(Fields... Values)

Definition AMDGPUBaseInfo.h:403

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition Alignment.h:39

llvm::Align::value

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

Definition Alignment.h:77

llvm::MIPatternMatch::And

Matching combinators.

Definition MIPatternMatch.h:314

llvm::MachinePointerInfo

This class contains a discriminated union of information about pointers in memory operands,...

Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

Definition MachineOperand.cpp:1079

llvm::cl::desc

Definition CommandLine.h:411

Generated on for LLVM by doxygen 1.14.0

LLVM: lib/Target/AMDGPU/SIFrameLowering.cpp Source File