1//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
21 for (
auto Instr : Res) {
22 // Assume instructions that aren't listed aren't compressible.
23 bool Compressed =
false;
24 switch (Instr.getOpcode()) {
26 // One 48-bit instruction takes the space of 1.5 regular instructions.
36 Compressed =
isInt<6>(Instr.getImm());
39 // Two RVC instructions take the same space as one RVI instruction, but
40 // can take longer to execute than the single RVI instruction. Thus, we
41 // consider that two RVC instruction are slightly more costly than one
42 // RVI instruction. For longer sequences of RVC instructions the space
43 // savings can be worth it, though. The costs below try to model that.
45 Cost += 100;
// Baseline cost of one RVI instruction: 100%.
47 Cost += 70;
// 70% cost of baseline.
52// Recursively generate a sequence for materializing an integer.
55 bool IsRV64 = STI.
hasFeature(RISCV::Feature64Bit);
57 // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI.
64 if (!IsRV64 && STI.
hasFeature(RISCV::FeatureVendorXqcili)) {
65 bool FitsOneStandardInst = ((Val & 0xFFF) == 0) ||
isInt<12>(Val);
67 // 20-bit signed immediates that don't fit into `ADDI` or `LUI` should use
68 // `QC.LI` (a single 32-bit instruction).
69 if (!FitsOneStandardInst &&
isInt<20>(Val)) {
74 // 32-bit signed immediates that don't fit into `ADDI`, `LUI` or `QC.LI`
75 // should use `QC.E.LI` (a single 48-bit instruction).
76 if (!FitsOneStandardInst &&
isInt<32>(Val)) {
83 // Depending on the active bits in the immediate Value v, the following
84 // instruction sequences are emitted:
87 // v[0,12) != 0 && v[12,32) == 0 : ADDI
88 // v[0,12) == 0 && v[12,32) != 0 : LUI
89 // v[0,32) != 0 : LUI+ADDI(W)
90 int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
96 if (Lo12 || Hi20 == 0) {
97 unsigned AddiOpc = RISCV::ADDI;
99 // Use ADDIW rather than ADDI only when necessary for correctness. As
100 // noted in RISCVOptWInstrs, this helps reduce test differences vs
101 // RV32 without being a pessimization.
104 AddiOpc = RISCV::ADDIW;
111 assert(IsRV64 &&
"Can't emit >32-bit imm for non-RV64 target");
113 // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
114 // (i.e., LUI+ADDI+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
115 // that the first two instructions (LUI+ADDI) can contribute up to 32 bits
116 // while the following ADDI instructions contribute up to 12 bits each.
118 // On the first glance, implementing this seems to be possible by simply
119 // emitting the most significant 32 bits (LUI+ADDI(W)) followed by as many
120 // left shift (SLLI) and immediate additions (ADDI) as needed. However, due to
121 // the fact that ADDI performs a sign extended addition, doing it like that
122 // would only be possible when at most 11 bits of the ADDI instructions are
123 // used. Using all 12 bits of the ADDI instructions, like done by GAS,
124 // actually requires that the constant is processed starting with the least
127 // In the following, constants are processed from LSB to MSB but instruction
128 // emission is performed from MSB to LSB by recursively calling
129 // generateInstSeq. In each recursion, first the lowest 12 bits are removed
130 // from the constant and the optimal shift amount, which can be greater than
131 // 12 bits if the constant is sparse, is determined. Then, the shifted
132 // remaining constant is processed recursively and gets emitted as soon as it
133 // fits into 32 bits. The emission of the shifts and additions is subsequently
134 // performed when the recursion returns.
142 // Val might now be valid for LUI without needing a shift.
147 // If the remaining bits don't fit in 12 bits, we might be able to reduce
148 // the shift amount in order to use LUI which will zero the lower 12
150 if (ShiftAmount > 12 && !
isInt<12>(Val)) {
152 // Reduce the shift amount and add zeros to the LSBs so it will match
158 // Reduce the shift amount and add zeros to the LSBs so it will match
159 // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
166 // Try to use SLLI_UW for Val when it is uint32 but not int32.
169 // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
178 // Skip shift if we were able to use LUI directly.
180 unsigned Opc =
Unsigned ? RISCV::SLLI_UW : RISCV::SLLI;
189 // for case: 0b111..1..xxxxxx1..1..
192 if (TrailingOnes > 0 && TrailingOnes < 64 &&
193 (LeadingOnes + TrailingOnes) > (64 - 12))
194 return 64 - TrailingOnes;
196 // for case: 0bxxx1..1..1...xxx
199 if (UpperTrailingOnes < 32 &&
200 (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
201 return 32 - UpperTrailingOnes;
208 assert(Val > 0 &&
"Expected positive val");
212 // Fill in the bits that will be shifted out with 1s. An example where this
213 // helps is trailing one masks with 32 or more ones. This will generate
214 // ADDI -1 and an SRLI.
220 // Keep the new sequence if it is an improvement or the original is empty.
221 if ((TmpSeq.
size() + 1) < Res.
size() ||
227 // Some cases can benefit from filling the lower bits with zeros instead.
232 // Keep the new sequence if it is an improvement or the original is empty.
233 if ((TmpSeq.
size() + 1) < Res.
size() ||
239 // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
240 // the end of the sequence.
241 if (LeadingZeros == 32 && STI.
hasFeature(RISCV::FeatureStdExtZba)) {
242 // Bit 31 is set, so sign extend to fill the upper bits with 1s.
247 // Keep the new sequence if it is an improvement.
248 if ((TmpSeq.
size() + 1) < Res.
size() ||
261 // If the low 12 bits are non-zero, the first expansion may end with an ADDI
262 // or ADDIW. If there are trailing zeros, try generating a sign extended
263 // constant with no trailing zeros and use a final SLLI to restore them.
264 if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.
size() >= 2) {
266 int64_t ShiftedVal = Val >> TrailingZeros;
267 // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
268 // its more compressible. But only if LUI+ADDI(W) isn't fusable.
269 // NOTE: We don't check for C extension to minimize differences in generated
271 bool IsShiftedCompressible =
276 // Keep the new sequence if it is an improvement.
277 if ((TmpSeq.
size() + 1) < Res.
size() || IsShiftedCompressible) {
283 // If we have a 1 or 2 instruction sequence this is the best we can do. This
284 // will always be true for RV32 and will often be true for RV64.
289 "Expected RV32 to only need 2 instructions");
291 // If the lower 13 bits are something like 0x17ff, try to add 1 to change the
292 // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end
293 // of the sequence. Call generateInstSeqImpl on the new constant which may
294 // subtract 0xfffffffffffff800 to create another ADDI. This will leave a
295 // constant with more than 12 trailing zeros for the next recursive step.
296 if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) {
297 int64_t Imm12 = -(0x800 - (Val & 0xfff));
298 int64_t AdjustedVal = Val - Imm12;
302 // Keep the new sequence if it is an improvement.
303 if ((TmpSeq.
size() + 1) < Res.
size()) {
309 // If the constant is positive we might be able to generate a shifted constant
310 // with no leading zeros and use a final SRLI to restore them.
311 if (Val > 0 && Res.
size() > 2) {
315 // If the constant is negative, trying inverting and using our trailing zero
316 // optimizations. Use an xori to invert the final value.
317 if (Val < 0 && Res.
size() > 3) {
322 // Keep it if we found a sequence that is smaller after inverting.
329 // If the Low and High halves are the same, use pack. The pack instruction
330 // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
331 // lower half and rs2 in the upper half.
335 if (LoVal == HiVal) {
338 if ((TmpSeq.
size() + 1) < Res.
size()) {
345 // Perform optimization with BSETI in the Zbs extension.
347 // Create a simm32 value for LUI+ADDI(W) by forcing the upper 33 bits to
348 // zero. Xor that with original value to get which bits should be set by
361 Hi &= (
Hi - 1);
// Clear lowest set bit.
366 // Fold LI 1 + SLLI into BSETI.
370 Res.
front() =
Inst(RISCV::BSETI, Res.
front().getImm());
// Patch SLLI.
374 // Perform optimization with BCLRI in the Zbs extension.
376 // Create a simm32 value for LUI+ADDI(W) by forcing the upper 33 bits to
377 // one. Xor that with original value to get which bits should be cleared by
389 Hi &= (
Hi - 1);
// Clear lowest set bit.
395 // Perform optimization with SH*ADD in the Zba extension.
400 // Select the opcode and divisor.
401 if ((Val % 3) == 0 &&
isInt<32>(Val / 3)) {
404 }
else if ((Val % 5) == 0 &&
isInt<32>(Val / 5)) {
407 }
else if ((Val % 9) == 0 &&
isInt<32>(Val / 9)) {
411 // Build the new instruction sequence.
414 if ((TmpSeq.
size() + 1) < Res.
size()) {
419 // Try to use LUI+SH*ADD+ADDI.
420 int64_t Hi52 = ((
uint64_t)Val + 0x800ull) & ~0xfffull;
423 if (
isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
426 }
else if (
isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
429 }
else if (
isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
433 // Build the new instruction sequence.
435 // For Val that has zero Lo12 (implies Val equals to Hi52) should has
436 // already been processed to LUI+SH*ADD by previous optimization.
438 "unexpected instruction sequence for immediate materialisation");
441 if ((TmpSeq.
size() + 2) < Res.
size()) {
450 // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
453 STI.
hasFeature(RISCV::FeatureVendorXTHeadBb))) {
501 // Only the first instruction has X0 as its source.
507 unsigned &ShiftAmt,
unsigned &AddOpc) {
512 // Subtract the LoVal to emulate the effect of the final ADD.
516 // Use trailing zero counts to figure how far we need to shift LoVal to line
517 // up with the remaining constant.
518 // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
519 // final constant come from LoVal.
522 assert(TzLo < 32 && TzHi >= 32);
523 ShiftAmt = TzHi - TzLo;
526 if (Tmp == ((
uint64_t)LoVal << ShiftAmt))
529 // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
532 AddOpc = RISCV::ADD_UW;
540 bool CompressionCost,
bool FreeZeroes) {
541 bool IsRV64 = STI.
hasFeature(RISCV::Feature64Bit);
542 bool HasRVC = CompressionCost && STI.
hasFeature(RISCV::FeatureStdExtZca);
543 int PlatRegSize = IsRV64 ? 64 : 32;
545 // Split the constant into platform register sized chunks, and calculate cost
548 for (
unsigned ShiftVal = 0; ShiftVal <
Size; ShiftVal += PlatRegSize) {
555 return std::max(FreeZeroes ? 0 : 1,
Cost);
587}
// namespace llvm::RISCVMatInt
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI, RISCVMatInt::InstSeq &Res)
static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI, RISCVMatInt::InstSeq &Res)
static unsigned extractRotateInfo(int64_t Val)
static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Class for arbitrary precision integers.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
int64_t getSExtValue() const
Get sign extended value.
MCInstBuilder & addReg(MCRegister Reg)
Add a new register operand.
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
unsigned getOpcode() const
OpndKind getOpndKind() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
void generateMCInstSeq(int64_t Val, const MCSubtargetInfo &STI, MCRegister DestReg, SmallVectorImpl< MCInst > &Insts)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
int countl_one(T Value)
Count the number of ones from the most significant bit to the first zero bit.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr T rotl(T V, int R)