1//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9// This file contains a pass that lowers homogeneous prolog/epilog instructions.
11//===----------------------------------------------------------------------===//
32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
33 "AArch64 homogeneous prolog/epilog lowering pass"
37 cl::desc(
"The minimum number of instructions that are outlined in a frame "
38 "helper (default = 2)"));
42class AArch64LowerHomogeneousPE {
50 bool runOnMachineFunction(MachineFunction &Fn);
54 MachineModuleInfo *MMI;
56 bool runOnMBB(MachineBasicBlock &
MBB);
60 /// Lower a HOM_Prolog pseudo instruction into a helper call
61 /// or a sequence of homogeneous stores.
62 /// When a fp setup follows, it can be optimized.
65 /// Lower a HOM_Epilog pseudo instruction into a helper call
66 /// or a sequence of homogeneous loads.
67 /// When a return follow, it can be optimized.
72class AArch64LowerHomogeneousPrologEpilog :
public ModulePass {
76 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {}
77 void getAnalysisUsage(AnalysisUsage &AU)
const override {
81 ModulePass::getAnalysisUsage(AU);
83 bool runOnModule(
Module &M)
override;
85 StringRef getPassName()
const override {
90}
// end anonymous namespace
92char AArch64LowerHomogeneousPrologEpilog::ID = 0;
95 "aarch64-lower-homogeneous-prolog-epilog",
98bool AArch64LowerHomogeneousPrologEpilog::runOnModule(
Module &M) {
103 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
104 return AArch64LowerHomogeneousPE(&M, MMI).run();
107bool AArch64LowerHomogeneousPE::run() {
116 Changed |= runOnMachineFunction(*MF);
123/// Return a frame helper name with the given CSRs and the helper type.
124/// For instance, a prolog helper that saves x19 and x20 is named as
125/// OUTLINED_FUNCTION_PROLOG_x19x20.
128 std::ostringstream RegStream;
131 RegStream <<
"OUTLINED_FUNCTION_PROLOG_";
134 RegStream <<
"OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset <<
"_";
137 RegStream <<
"OUTLINED_FUNCTION_EPILOG_";
140 RegStream <<
"OUTLINED_FUNCTION_EPILOG_TAIL_";
144 for (
auto Reg : Regs) {
145 if (
Reg == AArch64::NoRegister)
150 return RegStream.str();
153/// Create a Function for the unique frame helper with the given name.
154/// Return a newly created MachineFunction with an empty MachineBasicBlock.
160 assert(
F ==
nullptr &&
"Function has been created before");
163 assert(
F &&
"Function was null!");
165 // Use ODR linkage to avoid duplication.
169 // Set minsize, so we don't insert padding between outlined functions.
170 F->addFnAttr(Attribute::NoInline);
171 F->addFnAttr(Attribute::MinSize);
172 F->addFnAttr(Attribute::Naked);
175 // Remove unnecessary register liveness and set NoVRegs.
176 MF.
getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs();
179 // Create entry block.
182 Builder.CreateRetVoid();
184 // Insert the new block into the function.
191/// Emit a store-pair instruction for frame-setup.
192/// If Reg2 is AArch64::NoRegister, emit STR instead.
196 int Offset,
bool IsPreDec) {
197 assert(Reg1 != AArch64::NoRegister);
198 const bool IsPaired = Reg2 != AArch64::NoRegister;
199 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
204 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
206 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
209 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
211 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
213 // The implicit scale for Offset is 8.
214 TypeSize Scale(0U,
false), Width(0U,
false);
215 int64_t MinOffset, MaxOffset;
216 [[maybe_unused]]
bool Success =
219 Offset *= (8 / (int)Scale);
232/// Emit a load-pair instruction for frame-destroy.
233/// If Reg2 is AArch64::NoRegister, emit LDR instead.
237 int Offset,
bool IsPostDec) {
238 assert(Reg1 != AArch64::NoRegister);
239 const bool IsPaired = Reg2 != AArch64::NoRegister;
240 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
245 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
247 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
250 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
252 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
254 // The implicit scale for Offset is 8.
255 TypeSize Scale(0U,
false), Width(0U,
false);
256 int64_t MinOffset, MaxOffset;
257 [[maybe_unused]]
bool Success =
260 Offset *= (8 / (int)Scale);
273/// Return a unique function if a helper can be formed with the given Regs
275/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
276/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
277/// stp x20, x19, [sp, #16]
280/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
281/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
282/// stp x20, x19, [sp, #16]
286/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
288/// ldp x29, x30, [sp, #32]
289/// ldp x20, x19, [sp, #16]
290/// ldp x22, x21, [sp], #48
293/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
294/// ldp x29, x30, [sp, #32]
295/// ldp x20, x19, [sp, #16]
296/// ldp x22, x21, [sp], #48
299/// @param MMI machine module info
300/// @param Regs callee save regs that the helper will handle
301/// @param Type frame helper type
302/// @return a helper function
306 unsigned FpOffset = 0) {
309 auto *
F = M->getFunction(Name);
322 // Compute the remaining SP adjust beyond FP/LR.
323 auto LRIdx = std::distance(Regs.
begin(),
llvm::find(Regs, AArch64::LR));
325 // If the register stored to the lowest address is not LR, we must subtract
326 // more from SP here.
327 if (LRIdx !=
Size - 2) {
330 LRIdx -
Size + 2,
true);
333 // Store CSRs in the reverse order.
334 for (
int I =
Size - 3;
I >= 0;
I -= 2) {
335 // FP/LR has been stored at call-site.
336 if (Regs[
I - 1] == AArch64::LR)
363 for (
int I = 0;
I <
Size - 2;
I += 2)
366 // Restore the last CSR with post-increment of SP.
375 return M->getFunction(Name);
378/// This function checks if a frame helper should be used for
379/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
380/// @param MBB machine basic block
381/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
382/// @param Regs callee save registers that are saved or restored.
383/// @param Type frame helper type
384/// @return True if a use of helper is qualified.
389 const auto *
TRI =
MBB.getParent()->getSubtarget().getRegisterInfo();
390 auto RegCount = Regs.
size();
391 assert(RegCount > 0 && (RegCount % 2 == 0));
392 // # of instructions that will be outlined.
393 int InstCount = RegCount / 2;
395 // Do not use a helper call when not saving LR.
401 // Prolog helper cannot save FP/LR.
405 // Effectively no change in InstCount since FpAdjustment is included.
409 // Bail-out if X16 is live across the epilog helper because it is used in
410 // the helper to handle X30.
411 for (
auto NextMI = NextMBBI; NextMI !=
MBB.end(); NextMI++) {
412 if (NextMI->readsRegister(AArch64::W16,
TRI))
415 // Epilog may not be in the last block. Check the liveness in successors.
417 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
420 // No change in InstCount for the regular epilog case.
423 // EpilogTail helper includes the caller's return.
424 if (NextMBBI ==
MBB.end())
426 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
436/// Lower a HOM_Epilog pseudo instruction into a helper call while
437/// creating the helper on demand. Or emit a sequence of loads in place when not
438/// using a helper call.
440/// 1. With a helper including ret
441/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
444/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
448/// HOM_Epilog x30, x29, x19, x20, x21, x22
450/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
452/// 3. Without a helper
453/// HOM_Epilog x30, x29, x19, x20, x21, x22
455/// ldp x29, x30, [sp, #32]
456/// ldp x20, x19, [sp, #16]
457/// ldp x22, x21, [sp], #48
458bool AArch64LowerHomogeneousPE::lowerEpilog(
465 SmallVector<unsigned, 8> Regs;
466 bool HasUnpairedReg =
false;
467 for (
auto &MO :
MI.operands())
469 if (!MO.getReg().isValid()) {
470 // For now we are only expecting unpaired GP registers which should
471 // occur exactly once.
473 HasUnpairedReg =
true;
477 (void)HasUnpairedReg;
481 // Registers are in pair.
483 assert(
MI.getOpcode() == AArch64::HOM_Epilog);
487 // When MBB ends with a return, emit a tail-call to the epilog helper
488 auto *EpilogTailHelper =
496 NextMBBI = std::next(Return);
497 Return->removeFromParent();
500 // The default epilog helper case.
508 // Fall back to no-helper.
509 for (
int I = 0;
I <
Size - 2;
I += 2)
511 // Restore the last CSR with post-increment of SP.
519/// Lower a HOM_Prolog pseudo instruction into a helper call while
520/// creating the helper on demand. Or emit a sequence of stores in place when
521/// not using a helper call.
523/// 1. With a helper including frame-setup
524/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
526/// stp x29, x30, [sp, #-16]!
527/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
530/// HOM_Prolog x30, x29, x19, x20, x21, x22
532/// stp x29, x30, [sp, #-16]!
533/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
535/// 3. Without a helper
536/// HOM_Prolog x30, x29, x19, x20, x21, x22
538/// stp x22, x21, [sp, #-48]!
539/// stp x20, x19, [sp, #16]
540/// stp x29, x30, [sp, #32]
541bool AArch64LowerHomogeneousPE::lowerProlog(
548 SmallVector<unsigned, 8> Regs;
549 bool HasUnpairedReg =
false;
551 std::optional<int> FpOffset;
552 for (
auto &MO :
MI.operands()) {
554 if (MO.getReg().isValid()) {
555 if (MO.getReg() == AArch64::LR)
558 // For now we are only expecting unpaired GP registers which should
559 // occur exactly once.
561 HasUnpairedReg =
true;
564 }
else if (MO.isImm()) {
565 FpOffset = MO.getImm();
568 (void)HasUnpairedReg;
572 // Allow compact unwind case only for oww.
574 assert(
MI.getOpcode() == AArch64::HOM_Prolog);
578 // FP/LR is stored at the top of stack before the prolog helper call.
590 // FP/LR is stored at the top of stack before the prolog helper call.
599 // Fall back to no-helper.
601 for (
int I =
Size - 3;
I >= 0;
I -= 2)
617/// Process each machine instruction
618/// @param MBB machine basic block
619/// @param MBBI current instruction iterator
620/// @param NextMBBI next instruction iterator which can be updated
621/// @return True when IR is changed.
622bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &
MBB,
626 unsigned Opcode =
MI.getOpcode();
630 case AArch64::HOM_Prolog:
631 return lowerProlog(
MBB,
MBBI, NextMBBI);
632 case AArch64::HOM_Epilog:
633 return lowerEpilog(
MBB,
MBBI, NextMBBI);
638bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &
MBB) {
651bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
661 return new AArch64LowerHomogeneousPrologEpilog();
static Function * getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, SmallVectorImpl< unsigned > &Regs, FrameHelperType Type, unsigned FpOffset=0)
Return a unique function if a helper can be formed with the given Regs and frame type.
static bool shouldUseFrameHelper(MachineBasicBlock &MBB, MachineBasicBlock::iterator &NextMBBI, SmallVectorImpl< unsigned > &Regs, FrameHelperType Type)
This function checks if a frame helper should be used for HOM_Prolog/HOM_Epilog pseudo instruction ex...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME
static cl::opt< int > FrameHelperSizeThreshold("frame-helper-size-threshold", cl::init(2), cl::Hidden, cl::desc("The minimum number of instructions that are outlined in a frame " "helper (default = 2)"))
static std::string getFrameHelperName(SmallVectorImpl< unsigned > &Regs, FrameHelperType Type, unsigned FpOffset)
Return a frame helper name with the given CSRs and the helper type.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
static MachineFunction & createFrameHelperMachineFunction(Module *M, MachineModuleInfo *MMI, StringRef Name)
Create a Function for the unique frame helper with the given name.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static const char * getRegisterName(MCRegister Reg, unsigned AltIdx=AArch64::NoRegAltName)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
@ ExternalLinkage
Externally visible function.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI MachineBasicBlock * removeFromParent()
This method unlinks 'this' from the containing function, and returns it, but does not delete it.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction & getOrCreateMachineFunction(Function &F)
Returns the MachineFunction constructed for the IR function F.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
LLVM_ABI void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDefRegState(bool B)
ModulePass * createAArch64LowerHomogeneousPrologEpilogPass()
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.