1//===- RISCVOptWInstrs.cpp - MI W instruction optimizations ---------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===---------------------------------------------------------------------===//
9// This pass does some optimizations for *W instructions at the MI level.
11// First it removes unneeded sext.w instructions. Either because the sign
12// extended bits aren't consumed or because the input was already sign extended
13// by an earlier instruction.
16// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17// it removes the -w suffix from opw instructions whenever all users are
18// dependent only on the lower word of the result of the instruction.
19// The cases handled are:
20// * addw because c.add has a larger register encoding than c.addw.
21// * addiw because it helps reduce test differences between RV32 and RV64
22// w/o being a pessimization.
23// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
24// * slliw because c.slliw doesn't exist and c.slli does
26// 2. Or if explicit enabled or the target prefers instructions with W suffix,
27// it adds the W suffix to the instruction whenever all users are dependent
28// only on the lower word of the result of the instruction.
29// The cases handled are:
31// * slli with imm < 32.
33//===---------------------------------------------------------------------===//
45 #define DEBUG_TYPE "riscv-opt-w-instrs"
46 #define RISCV_OPT_W_INSTRS_NAME "RISC-V Optimize W Instructions"
48 STATISTIC(NumRemovedSExtW,
"Number of removed sign-extensions");
50 "Number of instructions transformed to W-ops");
52 "Number of instructions transformed to non-W-ops");
55 cl::desc(
"Disable removal of sext.w"),
69 bool runOnMachineFunction(MachineFunction &MF)
override;
70 bool removeSExtWInstrs(MachineFunction &MF,
const RISCVInstrInfo &
TII,
71 const RISCVSubtarget &ST, MachineRegisterInfo &
MRI);
72 bool canonicalizeWSuffixes(MachineFunction &MF,
const RISCVInstrInfo &
TII,
73 const RISCVSubtarget &ST,
74 MachineRegisterInfo &
MRI);
76 void getAnalysisUsage(AnalysisUsage &AU)
const override {
84}
// end anonymous namespace
86char RISCVOptWInstrs::ID = 0;
91 return new RISCVOptWInstrs();
112 auto NumDemandedBits =
114 return NumDemandedBits && Bits >= *NumDemandedBits;
117// Checks if all users only demand the lower \p OrigBits of the original
118// instruction's result.
119// TODO: handle multiple interdependent transformations
129 while (!Worklist.
empty()) {
132 unsigned Bits =
P.second;
137 // Only handle instructions with one def.
138 if (
MI->getNumExplicitDefs() != 1)
141 Register DestReg =
MI->getOperand(0).getReg();
145 for (
auto &UserOp :
MRI.use_nodbg_operands(DestReg)) {
147 unsigned OpIdx = UserOp.getOperandNo();
177 case RISCV::FCVT_H_W:
178 case RISCV::FCVT_H_W_INX:
179 case RISCV::FCVT_H_WU:
180 case RISCV::FCVT_H_WU_INX:
181 case RISCV::FCVT_S_W:
182 case RISCV::FCVT_S_W_INX:
183 case RISCV::FCVT_S_WU:
184 case RISCV::FCVT_S_WU_INX:
185 case RISCV::FCVT_D_W:
186 case RISCV::FCVT_D_W_INX:
187 case RISCV::FCVT_D_WU:
188 case RISCV::FCVT_D_WU_INX:
200 case RISCV::ZEXT_H_RV32:
201 case RISCV::ZEXT_H_RV64:
208 if (Bits >= (ST.getXLen() / 2))
213 // If we are shifting right by less than Bits, and users don't demand
214 // any bits that were shifted into [Bits-1:0], then we can consider this
224 // these overwrite higher input bits, otherwise the lower word of output
225 // depends only on the lower word of input. So check their uses read W.
228 if (Bits >= (ST.getXLen() - ShAmt))
235 if (Bits >= 32 - ShAmt)
260 // Operand 2 is the shift amount which uses log2(xlen) bits.
262 if (Bits >=
Log2_32(ST.getXLen()))
273 // Operand 2 is the shift amount which uses 6 bits.
279 case RISCV::SH1ADD_UW:
280 case RISCV::SH2ADD_UW:
281 case RISCV::SH3ADD_UW:
282 // Operand 1 is implicitly zero extended.
283 if (
OpIdx == 1 && Bits >= 32)
294 // The first argument is the value to store.
295 if (
OpIdx == 0 && Bits >= 8)
299 // The first argument is the value to store.
300 if (
OpIdx == 0 && Bits >= 16)
304 // The first argument is the value to store.
305 if (
OpIdx == 0 && Bits >= 32)
309 // For these, lower word of output in these operations, depends only on
310 // the lower word of input. So, we check all uses only read lower word.
338 // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
342 case RISCV::PseudoCCMOVGPR:
343 case RISCV::PseudoCCMOVGPRNoX0:
344 // Either operand 4 or operand 5 is returned by this instruction. If
345 // only the lower word of the result is used, then only the lower word
346 // of operand 4 and 5 is used.
352 case RISCV::CZERO_EQZ:
353 case RISCV::CZERO_NEZ:
354 case RISCV::VT_MASKC:
355 case RISCV::VT_MASKCN:
364 // Behavior of Msb < Lsb is not well documented.
365 if (Msb >= Lsb && Bits > Msb)
380// This function returns true if the machine instruction always outputs a value
381// where bits 63:32 match bit 31.
385 // Instructions that can be determined from opcode are marked in tablegen.
389 // Special cases that require checking operands.
390 switch (
MI.getOpcode()) {
391 // shifting right sufficiently makes the value 32-bit sign-extended
393 return MI.getOperand(2).getImm() >= 32;
395 return MI.getOperand(2).getImm() > 32;
396 // The LI pattern ADDI rd, X0, imm is sign extended.
398 return MI.getOperand(1).isReg() &&
MI.getOperand(1).getReg() == RISCV::X0;
399 // An ANDI with an 11 bit immediate will zero bits 63:11.
402 // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
405 // A bseti with X0 is sign extended if the immediate is less than 31.
407 return MI.getOperand(2).getImm() < 31 &&
408 MI.getOperand(1).getReg() == RISCV::X0;
409 // Copying from X0 produces zero.
411 return MI.getOperand(1).getReg() == RISCV::X0;
412 // Ignore the scratch register destination.
413 case RISCV::PseudoAtomicLoadNand32:
415 case RISCV::PseudoVMV_X_S: {
416 // vmv.x.s has at least 33 sign bits if log2(sew) <= 5.
417 int64_t Log2SEW =
MI.getOperand(2).getImm();
418 assert(Log2SEW >= 3 && Log2SEW <= 6 &&
"Unexpected Log2SEW");
421 case RISCV::TH_EXT: {
422 unsigned Msb =
MI.getOperand(2).getImm();
423 unsigned Lsb =
MI.getOperand(3).getImm();
424 return Msb >= Lsb && (Msb - Lsb + 1) <= 32;
426 case RISCV::TH_EXTU: {
427 unsigned Msb =
MI.getOperand(2).getImm();
428 unsigned Lsb =
MI.getOperand(3).getImm();
429 return Msb >= Lsb && (Msb - Lsb + 1) < 32;
442 auto AddRegToWorkList = [&](
Register SrcReg) {
449 if (!AddRegToWorkList(SrcReg))
452 while (!Worklist.
empty()) {
455 // If we already visited this register, we don't need to check it again.
463 int OpNo =
MI->findRegisterDefOperandIdx(
Reg,
/*TRI=*/nullptr);
464 assert(OpNo != -1 &&
"Couldn't find register");
466 // If this is a sign extending operation we don't need to look any further.
470 // Is this an instruction that propagates sign extend?
471 switch (
MI->getOpcode()) {
473 // Unknown opcode, give up.
480 // If this is the entry block and the register is livein, see if we know
481 // it is sign extended.
482 if (
MI->getParent() == &MF->
front()) {
488 Register CopySrcReg =
MI->getOperand(1).getReg();
489 if (CopySrcReg == RISCV::X10) {
490 // For a method return value, we check the ZExt/SExt flags in attribute.
491 // We assume the following code sequence for method call.
492 // PseudoCALL @bar, ...
493 // ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2
494 // %0:gpr = COPY $x10
496 // We use the PseudoCall to look up the IR function being called to find
497 // its return attributes.
499 auto II =
MI->getIterator();
500 if (
II ==
MBB->instr_begin() ||
501 (--
II)->getOpcode() != RISCV::ADJCALLSTACKUP)
517 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
518 unsigned BitWidth = IntTy->getBitWidth();
519 if ((
BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
520 (
BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
524 if (!AddRegToWorkList(CopySrcReg))
530 // For these, we just need to check if the 1st operand is sign extended.
534 if (
MI->getOperand(2).getImm() >= 31)
542 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
543 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
544 // Logical operations use a sign extended 12-bit immediate.
545 // Arithmetic shift right can only increase the number of sign bits.
546 if (!AddRegToWorkList(
MI->getOperand(1).getReg()))
550 case RISCV::PseudoCCADDW:
551 case RISCV::PseudoCCADDIW:
552 case RISCV::PseudoCCSUBW:
553 case RISCV::PseudoCCSLLW:
554 case RISCV::PseudoCCSRLW:
555 case RISCV::PseudoCCSRAW:
556 case RISCV::PseudoCCSLLIW:
557 case RISCV::PseudoCCSRLIW:
558 case RISCV::PseudoCCSRAIW:
559 // Returns operand 4 or an ADDW/SUBW/etc. of operands 5 and 6. We only
560 // need to check if operand 4 is sign extended.
561 if (!AddRegToWorkList(
MI->getOperand(4).getReg()))
575 case RISCV::PseudoCCMOVGPR:
576 case RISCV::PseudoCCMOVGPRNoX0:
577 case RISCV::PseudoCCAND:
578 case RISCV::PseudoCCOR:
579 case RISCV::PseudoCCXOR:
580 case RISCV::PseudoCCANDN:
581 case RISCV::PseudoCCORN:
582 case RISCV::PseudoCCXNOR:
584 // If all incoming values are sign-extended, the output of AND, OR, XOR,
585 // MIN, MAX, or PHI is also sign-extended.
587 // The input registers for PHI are operand 1, 3, ...
588 // The input registers for PseudoCCMOVGPR(NoX0) are 4 and 5.
589 // The input registers for PseudoCCAND/OR/XOR are 4, 5, and 6.
590 // The input registers for others are operand 1 and 2.
591 unsigned B = 1,
E = 3,
D = 1;
592 switch (
MI->getOpcode()) {
594 E =
MI->getNumOperands();
597 case RISCV::PseudoCCMOVGPR:
598 case RISCV::PseudoCCMOVGPRNoX0:
602 case RISCV::PseudoCCAND:
603 case RISCV::PseudoCCOR:
604 case RISCV::PseudoCCXOR:
605 case RISCV::PseudoCCANDN:
606 case RISCV::PseudoCCORN:
607 case RISCV::PseudoCCXNOR:
613 for (
unsigned I =
B;
I !=
E;
I +=
D) {
614 if (!
MI->getOperand(
I).isReg())
617 if (!AddRegToWorkList(
MI->getOperand(
I).getReg()))
624 case RISCV::CZERO_EQZ:
625 case RISCV::CZERO_NEZ:
626 case RISCV::VT_MASKC:
627 case RISCV::VT_MASKCN:
628 // Instructions return zero or operand 1. Result is sign extended if
629 // operand 1 is sign extended.
630 if (!AddRegToWorkList(
MI->getOperand(1).getReg()))
635 if (
MI->getOperand(1).isReg() &&
MI->getOperand(1).getReg().isVirtual()) {
637 if (SrcMI->getOpcode() == RISCV::LUI &&
638 SrcMI->getOperand(1).isImm()) {
639 uint64_t Imm = SrcMI->getOperand(1).getImm();
655 // With these opcode, we can "fix" them with the W-version
656 // if we know all users of the result only rely on bits 31:0
658 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
659 if (
MI->getOperand(2).getImm() >= 32)
675 // If we get here, then every node we visited produces a sign extended value
676 // or propagated sign extended values. So the result must be sign extended.
680 static unsigned getWOp(
unsigned Opcode) {
707 bool MadeChange =
false;
708 for (MachineBasicBlock &
MBB : MF) {
710 // We're looking for the sext.w pattern ADDIW rd, rs1, 0.
711 if (!RISCVInstrInfo::isSEXT_W(
MI))
716 SmallPtrSet<MachineInstr *, 4> FixableDefs;
718 // If all users only use the lower bits, this sext.w is redundant.
719 // Or if all definitions reaching MI sign-extend their output,
720 // then sext.w is redundant.
726 if (!
MRI.constrainRegClass(SrcReg,
MRI.getRegClass(DstReg)))
729 // Convert Fixable instructions to their W versions.
730 for (MachineInstr *Fixable : FixableDefs) {
732 Fixable->setDesc(
TII.get(
getWOp(Fixable->getOpcode())));
733 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
734 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
735 Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
737 ++NumTransformedToWInstrs;
741 MRI.replaceRegWith(DstReg, SrcReg);
742 MRI.clearKillFlags(SrcReg);
743 MI.eraseFromParent();
752// Strips or adds W suffixes to eligible instructions depending on the
753// subtarget preferences.
754bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF,
755 const RISCVInstrInfo &
TII,
756 const RISCVSubtarget &ST,
757 MachineRegisterInfo &
MRI) {
759 bool ShouldPreferW =
ST.preferWInst();
760 bool MadeChange =
false;
762 for (MachineBasicBlock &
MBB : MF) {
763 for (MachineInstr &
MI :
MBB) {
764 std::optional<unsigned> WOpc;
765 std::optional<unsigned> NonWOpc;
766 unsigned OrigOpc =
MI.getOpcode();
771 NonWOpc = RISCV::ADD;
774 NonWOpc = RISCV::ADDI;
777 NonWOpc = RISCV::MUL;
780 NonWOpc = RISCV::SLLI;
783 NonWOpc = RISCV::SUB;
798 // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits.
799 if (
MI.getOperand(2).getImm() >= 32)
811 MI.setDesc(
TII.get(NonWOpc.value()));
813 ++NumTransformedToNonWInstrs;
817 // LWU is always converted to LW when possible as 1) LW is compressible
818 // and 2) it helps minimise differences vs RV32.
819 if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() &&
822 MI.setDesc(
TII.get(WOpc.value()));
823 MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
824 MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
825 MI.clearFlag(MachineInstr::MIFlag::IsExact);
827 ++NumTransformedToWInstrs;
836bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
842 const RISCVInstrInfo &
TII = *
ST.getInstrInfo();
847 bool MadeChange =
false;
848 MadeChange |= removeSExtWInstrs(MF,
TII, ST,
MRI);
849 MadeChange |= canonicalizeWSuffixes(MF,
TII, ST,
MRI);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static cl::opt< bool > DisableSExtWRemoval("loongarch-disable-sextw-removal", cl::desc("Disable removal of sign-extend insn"), cl::init(false), cl::Hidden)
static bool hasAllWUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
static unsigned getWOp(unsigned Opcode)
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendingOpW(const MachineInstr &MI, unsigned OpNo)
static cl::opt< bool > DisableStripWSuffix("riscv-disable-strip-w-suffix", cl::desc("Disable strip W suffix"), cl::init(false), cl::Hidden)
static bool hasAllNBitUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST, const MachineRegisterInfo &MRI, unsigned OrigBits)
#define RISCV_OPT_W_INSTRS_NAME
static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp, unsigned Bits)
static cl::opt< bool > DisableSExtWRemoval("riscv-disable-sextw-removal", cl::desc("Disable removal of sext.w"), cl::init(false), cl::Hidden)
static unsigned getWOp(unsigned Opcode)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class holds the attributes for a particular argument, parameter, function, or return value.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isLiveIn(Register Reg) const
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
bool isSExt32Register(Register Reg) const
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
FunctionPass * createRISCVOptWInstrsPass()
constexpr unsigned BitWidth
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.