VectorUtils.cpp

1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file defines vectorizer utilities.

10//

11//===----------------------------------------------------------------------===//

12

13#include "llvm/Analysis/VectorUtils.h"

14#include "llvm/ADT/EquivalenceClasses.h"

15#include "llvm/ADT/SmallVector.h"

16#include "llvm/Analysis/DemandedBits.h"

17#include "llvm/Analysis/LoopInfo.h"

18#include "llvm/Analysis/LoopIterator.h"

19#include "llvm/Analysis/ScalarEvolution.h"

20#include "llvm/Analysis/ScalarEvolutionExpressions.h"

21#include "llvm/Analysis/TargetTransformInfo.h"

22#include "llvm/Analysis/ValueTracking.h"

23#include "llvm/IR/Constants.h"

24#include "llvm/IR/DerivedTypes.h"

25#include "llvm/IR/IRBuilder.h"

26#include "llvm/IR/MemoryModelRelaxationAnnotations.h"

27#include "llvm/IR/PatternMatch.h"

28#include "llvm/IR/Value.h"

29#include "llvm/Support/CommandLine.h"

30

31 #define DEBUG_TYPE "vectorutils"

32

33using namespace llvm;

34using namespace llvm::PatternMatch;

35

36/// Maximum factor for an interleaved memory access.

37 static cl::opt<unsigned> MaxInterleaveGroupFactor(

38 "max-interleave-group-factor", cl::Hidden,

39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),

40 cl::init(8));

41

42/// Return true if all of the intrinsic's arguments and return type are scalars

43/// for the scalar form of the intrinsic, and vectors for the vector form of the

44/// intrinsic (except operands that are marked as always being scalar by

45/// isVectorIntrinsicWithScalarOpAtArg).

46 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {

47 switch (ID) {

48 case Intrinsic::abs: // Begin integer bit-manipulation.

49 case Intrinsic::bswap:

50 case Intrinsic::bitreverse:

51 case Intrinsic::ctpop:

52 case Intrinsic::ctlz:

53 case Intrinsic::cttz:

54 case Intrinsic::fshl:

55 case Intrinsic::fshr:

56 case Intrinsic::smax:

57 case Intrinsic::smin:

58 case Intrinsic::umax:

59 case Intrinsic::umin:

60 case Intrinsic::sadd_sat:

61 case Intrinsic::ssub_sat:

62 case Intrinsic::uadd_sat:

63 case Intrinsic::usub_sat:

64 case Intrinsic::smul_fix:

65 case Intrinsic::smul_fix_sat:

66 case Intrinsic::umul_fix:

67 case Intrinsic::umul_fix_sat:

68 case Intrinsic::sqrt: // Begin floating-point.

69 case Intrinsic::asin:

70 case Intrinsic::acos:

71 case Intrinsic::atan:

72 case Intrinsic::atan2:

73 case Intrinsic::sin:

74 case Intrinsic::cos:

75 case Intrinsic::sincos:

76 case Intrinsic::sincospi:

77 case Intrinsic::tan:

78 case Intrinsic::sinh:

79 case Intrinsic::cosh:

80 case Intrinsic::tanh:

81 case Intrinsic::exp:

82 case Intrinsic::exp10:

83 case Intrinsic::exp2:

84 case Intrinsic::ldexp:

85 case Intrinsic::log:

86 case Intrinsic::log10:

87 case Intrinsic::log2:

88 case Intrinsic::fabs:

89 case Intrinsic::minnum:

90 case Intrinsic::maxnum:

91 case Intrinsic::minimum:

92 case Intrinsic::maximum:

93 case Intrinsic::minimumnum:

94 case Intrinsic::maximumnum:

95 case Intrinsic::modf:

96 case Intrinsic::copysign:

97 case Intrinsic::floor:

98 case Intrinsic::ceil:

99 case Intrinsic::trunc:

100 case Intrinsic::rint:

101 case Intrinsic::nearbyint:

102 case Intrinsic::round:

103 case Intrinsic::roundeven:

104 case Intrinsic::pow:

105 case Intrinsic::fma:

106 case Intrinsic::fmuladd:

107 case Intrinsic::is_fpclass:

108 case Intrinsic::powi:

109 case Intrinsic::canonicalize:

110 case Intrinsic::fptosi_sat:

111 case Intrinsic::fptoui_sat:

112 case Intrinsic::lround:

113 case Intrinsic::llround:

114 case Intrinsic::lrint:

115 case Intrinsic::llrint:

116 case Intrinsic::ucmp:

117 case Intrinsic::scmp:

118 return true;

119 default:

120 return false;

121 }

122}

123

124 bool llvm::isTriviallyScalarizable(Intrinsic::ID ID,

125 const TargetTransformInfo *TTI) {

126 if (isTriviallyVectorizable(ID))

127 return true;

128

129 if (TTI && Intrinsic::isTargetIntrinsic(ID))

130 return TTI->isTargetIntrinsicTriviallyScalarizable(ID);

131

132 // TODO: Move frexp to isTriviallyVectorizable.

133 // https://github.com/llvm/llvm-project/issues/112408

134 switch (ID) {

135 case Intrinsic::frexp:

136 case Intrinsic::uadd_with_overflow:

137 case Intrinsic::sadd_with_overflow:

138 case Intrinsic::ssub_with_overflow:

139 case Intrinsic::usub_with_overflow:

140 case Intrinsic::umul_with_overflow:

141 case Intrinsic::smul_with_overflow:

142 return true;

143 }

144 return false;

145}

146

147/// Identifies if the vector form of the intrinsic has a scalar operand.

148 bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,

149 unsigned ScalarOpdIdx,

150 const TargetTransformInfo *TTI) {

151

152 if (TTI && Intrinsic::isTargetIntrinsic(ID))

153 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);

154

155 // Vector predication intrinsics have the EVL as the last operand.

156 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)

157 return true;

158

159 switch (ID) {

160 case Intrinsic::abs:

161 case Intrinsic::vp_abs:

162 case Intrinsic::ctlz:

163 case Intrinsic::vp_ctlz:

164 case Intrinsic::cttz:

165 case Intrinsic::vp_cttz:

166 case Intrinsic::is_fpclass:

167 case Intrinsic::vp_is_fpclass:

168 case Intrinsic::powi:

169 case Intrinsic::vector_extract:

170 return (ScalarOpdIdx == 1);

171 case Intrinsic::smul_fix:

172 case Intrinsic::smul_fix_sat:

173 case Intrinsic::umul_fix:

174 case Intrinsic::umul_fix_sat:

175 return (ScalarOpdIdx == 2);

176 case Intrinsic::experimental_vp_splice:

177 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;

178 default:

179 return false;

180 }

181}

182

183 bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(

184 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {

185 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");

186

187 if (TTI && Intrinsic::isTargetIntrinsic(ID))

188 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);

189

190 if (VPCastIntrinsic::isVPCast(ID))

191 return OpdIdx == -1 || OpdIdx == 0;

192

193 switch (ID) {

194 case Intrinsic::fptosi_sat:

195 case Intrinsic::fptoui_sat:

196 case Intrinsic::lround:

197 case Intrinsic::llround:

198 case Intrinsic::lrint:

199 case Intrinsic::llrint:

200 case Intrinsic::vp_lrint:

201 case Intrinsic::vp_llrint:

202 case Intrinsic::ucmp:

203 case Intrinsic::scmp:

204 case Intrinsic::vector_extract:

205 return OpdIdx == -1 || OpdIdx == 0;

206 case Intrinsic::modf:

207 case Intrinsic::sincos:

208 case Intrinsic::sincospi:

209 case Intrinsic::is_fpclass:

210 case Intrinsic::vp_is_fpclass:

211 return OpdIdx == 0;

212 case Intrinsic::powi:

213 case Intrinsic::ldexp:

214 return OpdIdx == -1 || OpdIdx == 1;

215 default:

216 return OpdIdx == -1;

217 }

218}

219

220 bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(

221 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {

222

223 if (TTI && Intrinsic::isTargetIntrinsic(ID))

224 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);

225

226 switch (ID) {

227 case Intrinsic::frexp:

228 return RetIdx == 0 || RetIdx == 1;

229 default:

230 return RetIdx == 0;

231 }

232}

233

234/// Returns intrinsic ID for call.

235/// For the input call instruction it finds mapping intrinsic and returns

236/// its ID, in case it does not found it return not_intrinsic.

237 Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,

238 const TargetLibraryInfo *TLI) {

239 Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);

240 if (ID == Intrinsic::not_intrinsic)

241 return Intrinsic::not_intrinsic;

242

243 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||

244 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||

245 ID == Intrinsic::experimental_noalias_scope_decl ||

246 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)

247 return ID;

248 return Intrinsic::not_intrinsic;

249}

250

251 unsigned llvm::getInterleaveIntrinsicFactor(Intrinsic::ID ID) {

252 switch (ID) {

253 case Intrinsic::vector_interleave2:

254 return 2;

255 case Intrinsic::vector_interleave3:

256 return 3;

257 case Intrinsic::vector_interleave4:

258 return 4;

259 case Intrinsic::vector_interleave5:

260 return 5;

261 case Intrinsic::vector_interleave6:

262 return 6;

263 case Intrinsic::vector_interleave7:

264 return 7;

265 case Intrinsic::vector_interleave8:

266 return 8;

267 default:

268 return 0;

269 }

270}

271

272 unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {

273 switch (ID) {

274 case Intrinsic::vector_deinterleave2:

275 return 2;

276 case Intrinsic::vector_deinterleave3:

277 return 3;

278 case Intrinsic::vector_deinterleave4:

279 return 4;

280 case Intrinsic::vector_deinterleave5:

281 return 5;

282 case Intrinsic::vector_deinterleave6:

283 return 6;

284 case Intrinsic::vector_deinterleave7:

285 return 7;

286 case Intrinsic::vector_deinterleave8:

287 return 8;

288 default:

289 return 0;

290 }

291}

292

293 VectorType *llvm::getDeinterleavedVectorType(IntrinsicInst *DI) {

294 [[maybe_unused]] unsigned Factor =

295 getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());

296 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();

297 assert(Factor && Factor == DISubtypes.size() &&

298 "unexpected deinterleave factor or result type");

299 return cast<VectorType>(DISubtypes[0]);

300}

301

302/// Given a vector and an element number, see if the scalar value is

303/// already around as a register, for example if it were inserted then extracted

304/// from the vector.

305 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {

306 assert(V->getType()->isVectorTy() && "Not looking at a vector?");

307 VectorType *VTy = cast<VectorType>(V->getType());

308 // For fixed-length vector, return poison for out of range access.

309 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {

310 unsigned Width = FVTy->getNumElements();

311 if (EltNo >= Width)

312 return PoisonValue::get(FVTy->getElementType());

313 }

314

315 if (Constant *C = dyn_cast<Constant>(V))

316 return C->getAggregateElement(EltNo);

317

318 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {

319 // If this is an insert to a variable element, we don't know what it is.

320 if (!isa<ConstantInt>(III->getOperand(2)))

321 return nullptr;

322 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();

323

324 // If this is an insert to the element we are looking for, return the

325 // inserted value.

326 if (EltNo == IIElt)

327 return III->getOperand(1);

328

329 // Guard against infinite loop on malformed, unreachable IR.

330 if (III == III->getOperand(0))

331 return nullptr;

332

333 // Otherwise, the insertelement doesn't modify the value, recurse on its

334 // vector input.

335 return findScalarElement(III->getOperand(0), EltNo);

336 }

337

338 ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);

339 // Restrict the following transformation to fixed-length vector.

340 if (SVI && isa<FixedVectorType>(SVI->getType())) {

341 unsigned LHSWidth =

342 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();

343 int InEl = SVI->getMaskValue(EltNo);

344 if (InEl < 0)

345 return PoisonValue::get(VTy->getElementType());

346 if (InEl < (int)LHSWidth)

347 return findScalarElement(SVI->getOperand(0), InEl);

348 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);

349 }

350

351 // Extract a value from a vector add operation with a constant zero.

352 // TODO: Use getBinOpIdentity() to generalize this.

353 Value *Val; Constant *C;

354 if (match(V, m_Add(m_Value(Val), m_Constant(C))))

355 if (Constant *Elt = C->getAggregateElement(EltNo))

356 if (Elt->isNullValue())

357 return findScalarElement(Val, EltNo);

358

359 // If the vector is a splat then we can trivially find the scalar element.

360 if (isa<ScalableVectorType>(VTy))

361 if (Value *Splat = getSplatValue(V))

362 if (EltNo < VTy->getElementCount().getKnownMinValue())

363 return Splat;

364

365 // Otherwise, we don't know.

366 return nullptr;

367}

368

369 int llvm::getSplatIndex(ArrayRef<int> Mask) {

370 int SplatIndex = -1;

371 for (int M : Mask) {

372 // Ignore invalid (undefined) mask elements.

373 if (M < 0)

374 continue;

375

376 // There can be only 1 non-negative mask element value if this is a splat.

377 if (SplatIndex != -1 && SplatIndex != M)

378 return -1;

379

380 // Initialize the splat index to the 1st non-negative mask element.

381 SplatIndex = M;

382 }

383 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");

384 return SplatIndex;

385}

386

387/// Get splat value if the input is a splat vector or return nullptr.

388/// This function is not fully general. It checks only 2 cases:

389/// the input value is (1) a splat constant vector or (2) a sequence

390/// of instructions that broadcasts a scalar at element 0.

391 Value *llvm::getSplatValue(const Value *V) {

392 if (isa<VectorType>(V->getType()))

393 if (auto *C = dyn_cast<Constant>(V))

394 return C->getSplatValue();

395

396 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>

397 Value *Splat;

398 if (match(V,

399 m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()),

400 m_Value(), m_ZeroMask())))

401 return Splat;

402

403 return nullptr;

404}

405

406 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {

407 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");

408

409 if (isa<VectorType>(V->getType())) {

410 if (isa<UndefValue>(V))

411 return true;

412 // FIXME: We can allow undefs, but if Index was specified, we may want to

413 // check that the constant is defined at that index.

414 if (auto *C = dyn_cast<Constant>(V))

415 return C->getSplatValue() != nullptr;

416 }

417

418 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {

419 // FIXME: We can safely allow undefs here. If Index was specified, we will

420 // check that the mask elt is defined at the required index.

421 if (!all_equal(Shuf->getShuffleMask()))

422 return false;

423

424 // Match any index.

425 if (Index == -1)

426 return true;

427

428 // Match a specific element. The mask should be defined at and match the

429 // specified index.

430 return Shuf->getMaskValue(Index) == Index;

431 }

432

433 // The remaining tests are all recursive, so bail out if we hit the limit.

434 if (Depth++ == MaxAnalysisRecursionDepth)

435 return false;

436

437 // If both operands of a binop are splats, the result is a splat.

438 Value *X, *Y, *Z;

439 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))

440 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);

441

442 // If all operands of a select are splats, the result is a splat.

443 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))

444 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&

445 isSplatValue(Z, Index, Depth);

446

447 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).

448

449 return false;

450}

451

452 bool llvm::getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,

453 const APInt &DemandedElts, APInt &DemandedLHS,

454 APInt &DemandedRHS, bool AllowUndefElts) {

455 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);

456

457 // Early out if we don't demand any elements.

458 if (DemandedElts.isZero())

459 return true;

460

461 // Simple case of a shuffle with zeroinitializer.

462 if (all_of(Mask, [](int Elt) { return Elt == 0; })) {

463 DemandedLHS.setBit(0);

464 return true;

465 }

466

467 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {

468 int M = Mask[I];

469 assert((-1 <= M) && (M < (SrcWidth * 2)) &&

470 "Invalid shuffle mask constant");

471

472 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))

473 continue;

474

475 // For undef elements, we don't know anything about the common state of

476 // the shuffle result.

477 if (M < 0)

478 return false;

479

480 if (M < SrcWidth)

481 DemandedLHS.setBit(M);

482 else

483 DemandedRHS.setBit(M - SrcWidth);

484 }

485

486 return true;

487}

488

489 bool llvm::isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,

490 std::array<std::pair<int, int>, 2> &SrcInfo) {

491 const int SignalValue = NumElts * 2;

492 SrcInfo[0] = {-1, SignalValue};

493 SrcInfo[1] = {-1, SignalValue};

494 for (auto [i, M] : enumerate(Mask)) {

495 if (M < 0)

496 continue;

497 int Src = M >= (int)NumElts;

498 int Diff = (int)i - (M % NumElts);

499 bool Match = false;

500 for (int j = 0; j < 2; j++) {

501 auto &[SrcE, DiffE] = SrcInfo[j];

502 if (SrcE == -1) {

503 assert(DiffE == SignalValue);

504 SrcE = Src;

505 DiffE = Diff;

506 }

507 if (SrcE == Src && DiffE == Diff) {

508 Match = true;

509 break;

510 }

511 }

512 if (!Match)

513 return false;

514 }

515 // Avoid all undef masks

516 return SrcInfo[0].first != -1;

517}

518

519 void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,

520 SmallVectorImpl<int> &ScaledMask) {

521 assert(Scale > 0 && "Unexpected scaling factor");

522

523 // Fast-path: if no scaling, then it is just a copy.

524 if (Scale == 1) {

525 ScaledMask.assign(Mask.begin(), Mask.end());

526 return;

527 }

528

529 ScaledMask.clear();

530 for (int MaskElt : Mask) {

531 if (MaskElt >= 0) {

532 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&

533 "Overflowed 32-bits");

534 }

535 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)

536 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);

537 }

538}

539

540 bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,

541 SmallVectorImpl<int> &ScaledMask) {

542 assert(Scale > 0 && "Unexpected scaling factor");

543

544 // Fast-path: if no scaling, then it is just a copy.

545 if (Scale == 1) {

546 ScaledMask.assign(Mask.begin(), Mask.end());

547 return true;

548 }

549

550 // We must map the original elements down evenly to a type with less elements.

551 int NumElts = Mask.size();

552 if (NumElts % Scale != 0)

553 return false;

554

555 ScaledMask.clear();

556 ScaledMask.reserve(NumElts / Scale);

557

558 // Step through the input mask by splitting into Scale-sized slices.

559 do {

560 ArrayRef<int> MaskSlice = Mask.take_front(Scale);

561 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");

562

563 // The first element of the slice determines how we evaluate this slice.

564 int SliceFront = MaskSlice.front();

565 if (SliceFront < 0) {

566 // Negative values (undef or other "sentinel" values) must be equal across

567 // the entire slice.

568 if (!all_equal(MaskSlice))

569 return false;

570 ScaledMask.push_back(SliceFront);

571 } else {

572 // A positive mask element must be cleanly divisible.

573 if (SliceFront % Scale != 0)

574 return false;

575 // Elements of the slice must be consecutive.

576 for (int i = 1; i < Scale; ++i)

577 if (MaskSlice[i] != SliceFront + i)

578 return false;

579 ScaledMask.push_back(SliceFront / Scale);

580 }

581 Mask = Mask.drop_front(Scale);

582 } while (!Mask.empty());

583

584 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");

585

586 // All elements of the original mask can be scaled down to map to the elements

587 // of a mask with wider elements.

588 return true;

589}

590

591 bool llvm::widenShuffleMaskElts(ArrayRef<int> M,

592 SmallVectorImpl<int> &NewMask) {

593 unsigned NumElts = M.size();

594 if (NumElts % 2 != 0)

595 return false;

596

597 NewMask.clear();

598 for (unsigned i = 0; i < NumElts; i += 2) {

599 int M0 = M[i];

600 int M1 = M[i + 1];

601

602 // If both elements are undef, new mask is undef too.

603 if (M0 == -1 && M1 == -1) {

604 NewMask.push_back(-1);

605 continue;

606 }

607

608 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {

609 NewMask.push_back(M1 / 2);

610 continue;

611 }

612

613 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {

614 NewMask.push_back(M0 / 2);

615 continue;

616 }

617

618 NewMask.clear();

619 return false;

620 }

621

622 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");

623 return true;

624}

625

626 bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,

627 SmallVectorImpl<int> &ScaledMask) {

628 unsigned NumSrcElts = Mask.size();

629 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");

630

631 // Fast-path: if no scaling, then it is just a copy.

632 if (NumSrcElts == NumDstElts) {

633 ScaledMask.assign(Mask.begin(), Mask.end());

634 return true;

635 }

636

637 // Ensure we can find a whole scale factor.

638 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&

639 "Unexpected scaling factor");

640

641 if (NumSrcElts > NumDstElts) {

642 int Scale = NumSrcElts / NumDstElts;

643 return widenShuffleMaskElts(Scale, Mask, ScaledMask);

644 }

645

646 int Scale = NumDstElts / NumSrcElts;

647 narrowShuffleMaskElts(Scale, Mask, ScaledMask);

648 return true;

649}

650

651 void llvm::getShuffleMaskWithWidestElts(ArrayRef<int> Mask,

652 SmallVectorImpl<int> &ScaledMask) {

653 std::array<SmallVector<int, 16>, 2> TmpMasks;

654 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];

655 ArrayRef<int> InputMask = Mask;

656 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {

657 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {

658 InputMask = *Output;

659 std::swap(Output, Tmp);

660 }

661 }

662 ScaledMask.assign(InputMask.begin(), InputMask.end());

663}

664

665 void llvm::processShuffleMasks(

666 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

667 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

668 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,

669 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>

670 ManyInputsAction) {

671 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);

672 // Try to perform better estimation of the permutation.

673 // 1. Split the source/destination vectors into real registers.

674 // 2. Do the mask analysis to identify which real registers are

675 // permuted.

676 int Sz = Mask.size();

677 unsigned SzDest = Sz / NumOfDestRegs;

678 unsigned SzSrc = Sz / NumOfSrcRegs;

679 for (unsigned I = 0; I < NumOfDestRegs; ++I) {

680 auto &RegMasks = Res[I];

681 RegMasks.assign(2 * NumOfSrcRegs, {});

682 // Check that the values in dest registers are in the one src

683 // register.

684 for (unsigned K = 0; K < SzDest; ++K) {

685 int Idx = I * SzDest + K;

686 if (Idx == Sz)

687 break;

688 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)

689 continue;

690 int MaskIdx = Mask[Idx] % Sz;

691 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);

692 // Add a cost of PermuteTwoSrc for each new source register permute,

693 // if we have more than one source registers.

694 if (RegMasks[SrcRegIdx].empty())

695 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);

696 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;

697 }

698 }

699 // Process split mask.

700 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {

701 auto &Dest = Res[I];

702 int NumSrcRegs =

703 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });

704 switch (NumSrcRegs) {

705 case 0:

706 // No input vectors were used!

707 NoInputAction();

708 break;

709 case 1: {

710 // Find the only mask with at least single undef mask elem.

711 auto *It =

712 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });

713 unsigned SrcReg = std::distance(Dest.begin(), It);

714 SingleInputAction(*It, SrcReg, I);

715 break;

716 }

717 default: {

718 // The first mask is a permutation of a single register. Since we have >2

719 // input registers to shuffle, we merge the masks for 2 first registers

720 // and generate a shuffle of 2 registers rather than the reordering of the

721 // first register and then shuffle with the second register. Next,

722 // generate the shuffles of the resulting register + the remaining

723 // registers from the list.

724 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,

725 ArrayRef<int> SecondMask) {

726 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {

727 if (SecondMask[Idx] != PoisonMaskElem) {

728 assert(FirstMask[Idx] == PoisonMaskElem &&

729 "Expected undefined mask element.");

730 FirstMask[Idx] = SecondMask[Idx] + VF;

731 }

732 }

733 };

734 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {

735 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {

736 if (Mask[Idx] != PoisonMaskElem)

737 Mask[Idx] = Idx;

738 }

739 };

740 int SecondIdx;

741 bool NewReg = true;

742 do {

743 int FirstIdx = -1;

744 SecondIdx = -1;

745 MutableArrayRef<int> FirstMask, SecondMask;

746 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {

747 SmallVectorImpl<int> &RegMask = Dest[I];

748 if (RegMask.empty())

749 continue;

750

751 if (FirstIdx == SecondIdx) {

752 FirstIdx = I;

753 FirstMask = RegMask;

754 continue;

755 }

756 SecondIdx = I;

757 SecondMask = RegMask;

758 CombineMasks(FirstMask, SecondMask);

759 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);

760 NewReg = false;

761 NormalizeMask(FirstMask);

762 RegMask.clear();

763 SecondMask = FirstMask;

764 SecondIdx = FirstIdx;

765 }

766 if (FirstIdx != SecondIdx && SecondIdx >= 0) {

767 CombineMasks(SecondMask, FirstMask);

768 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);

769 NewReg = false;

770 Dest[FirstIdx].clear();

771 NormalizeMask(SecondMask);

772 }

773 } while (SecondIdx >= 0);

774 break;

775 }

776 }

777 }

778}

779

780 void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,

781 const APInt &DemandedElts,

782 APInt &DemandedLHS,

783 APInt &DemandedRHS) {

784 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");

785 int NumLanes = VectorBitWidth / 128;

786 int NumElts = DemandedElts.getBitWidth();

787 int NumEltsPerLane = NumElts / NumLanes;

788 int HalfEltsPerLane = NumEltsPerLane / 2;

789

790 DemandedLHS = APInt::getZero(NumElts);

791 DemandedRHS = APInt::getZero(NumElts);

792

793 // Map DemandedElts to the horizontal operands.

794 for (int Idx = 0; Idx != NumElts; ++Idx) {

795 if (!DemandedElts[Idx])

796 continue;

797 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;

798 int LocalIdx = Idx % NumEltsPerLane;

799 if (LocalIdx < HalfEltsPerLane) {

800 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);

801 } else {

802 LocalIdx -= HalfEltsPerLane;

803 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);

804 }

805 }

806}

807

808MapVector<Instruction *, uint64_t>

809 llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,

810 const TargetTransformInfo *TTI) {

811

812 // DemandedBits will give us every value's live-out bits. But we want

813 // to ensure no extra casts would need to be inserted, so every DAG

814 // of connected values must have the same minimum bitwidth.

815 EquivalenceClasses<Value *> ECs;

816 SmallVector<Instruction *, 16> Worklist;

817 SmallPtrSet<Instruction *, 4> Roots;

818 SmallPtrSet<Instruction *, 16> Visited;

819 DenseMap<Value *, uint64_t> DBits;

820 SmallPtrSet<Instruction *, 4> InstructionSet;

821 MapVector<Instruction *, uint64_t> MinBWs;

822

823 // Determine the roots. We work bottom-up, from truncs or icmps.

824 bool SeenExtFromIllegalType = false;

825 for (auto *BB : Blocks)

826 for (auto &I : *BB) {

827 InstructionSet.insert(&I);

828

829 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&

830 !TTI->isTypeLegal(I.getOperand(0)->getType()))

831 SeenExtFromIllegalType = true;

832

833 // Only deal with non-vector integers up to 64-bits wide.

834 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&

835 !I.getType()->isVectorTy() &&

836 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {

837 // Don't make work for ourselves. If we know the loaded type is legal,

838 // don't add it to the worklist.

839 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))

840 continue;

841

842 Worklist.push_back(&I);

843 Roots.insert(&I);

844 }

845 }

846 // Early exit.

847 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))

848 return MinBWs;

849

850 // Now proceed breadth-first, unioning values together.

851 while (!Worklist.empty()) {

852 Instruction *I = Worklist.pop_back_val();

853 Value *Leader = ECs.getOrInsertLeaderValue(I);

854

855 if (!Visited.insert(I).second)

856 continue;

857

858 // If we encounter a type that is larger than 64 bits, we can't represent

859 // it so bail out.

860 if (DB.getDemandedBits(I).getBitWidth() > 64)

861 return MapVector<Instruction *, uint64_t>();

862

863 uint64_t V = DB.getDemandedBits(I).getZExtValue();

864 DBits[Leader] |= V;

865 DBits[I] = V;

866

867 // Casts, loads and instructions outside of our range terminate a chain

868 // successfully.

869 if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||

870 !InstructionSet.count(I))

871 continue;

872

873 // Unsafe casts terminate a chain unsuccessfully. We can't do anything

874 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to

875 // transform anything that relies on them.

876 if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||

877 !I->getType()->isIntegerTy()) {

878 DBits[Leader] |= ~0ULL;

879 continue;

880 }

881

882 // We don't modify the types of PHIs. Reductions will already have been

883 // truncated if possible, and inductions' sizes will have been chosen by

884 // indvars.

885 if (isa<PHINode>(I))

886 continue;

887

888 // Don't modify the types of operands of a call, as doing that would cause a

889 // signature mismatch.

890 if (isa<CallBase>(I))

891 continue;

892

893 if (DBits[Leader] == ~0ULL)

894 // All bits demanded, no point continuing.

895 continue;

896

897 for (Value *O : I->operands()) {

898 ECs.unionSets(Leader, O);

899 if (auto *OI = dyn_cast<Instruction>(O))

900 Worklist.push_back(OI);

901 }

902 }

903

904 // Now we've discovered all values, walk them to see if there are

905 // any users we didn't see. If there are, we can't optimize that

906 // chain.

907 for (auto &I : DBits)

908 for (auto *U : I.first->users())

909 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)

910 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;

911

912 for (const auto &E : ECs) {

913 if (!E->isLeader())

914 continue;

915 uint64_t LeaderDemandedBits = 0;

916 for (Value *M : ECs.members(*E))

917 LeaderDemandedBits |= DBits[M];

918

919 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);

920 // Round up to a power of 2

921 MinBW = llvm::bit_ceil(MinBW);

922

923 // We don't modify the types of PHIs. Reductions will already have been

924 // truncated if possible, and inductions' sizes will have been chosen by

925 // indvars.

926 // If we are required to shrink a PHI, abandon this entire equivalence class.

927 bool Abort = false;

928 for (Value *M : ECs.members(*E))

929 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {

930 Abort = true;

931 break;

932 }

933 if (Abort)

934 continue;

935

936 for (Value *M : ECs.members(*E)) {

937 auto *MI = dyn_cast<Instruction>(M);

938 if (!MI)

939 continue;

940 Type *Ty = M->getType();

941 if (Roots.count(MI))

942 Ty = MI->getOperand(0)->getType();

943

944 if (MinBW >= Ty->getScalarSizeInBits())

945 continue;

946

947 // If any of M's operands demand more bits than MinBW then M cannot be

948 // performed safely in MinBW.

949 auto *Call = dyn_cast<CallBase>(MI);

950 auto Ops = Call ? Call->args() : MI->operands();

951 if (any_of(Ops, [&DB, MinBW](Use &U) {

952 auto *CI = dyn_cast<ConstantInt>(U);

953 // For constants shift amounts, check if the shift would result in

954 // poison.

955 if (CI &&

956 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&

957 U.getOperandNo() == 1)

958 return CI->uge(MinBW);

959 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());

960 return bit_ceil(BW) > MinBW;

961 }))

962 continue;

963

964 MinBWs[MI] = MinBW;

965 }

966 }

967

968 return MinBWs;

969}

970

971/// Add all access groups in @p AccGroups to @p List.

972template <typename ListT>

973 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {

974 // Interpret an access group as a list containing itself.

975 if (AccGroups->getNumOperands() == 0) {

976 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");

977 List.insert(AccGroups);

978 return;

979 }

980

981 for (const auto &AccGroupListOp : AccGroups->operands()) {

982 auto *Item = cast<MDNode>(AccGroupListOp.get());

983 assert(isValidAsAccessGroup(Item) && "List item must be an access group");

984 List.insert(Item);

985 }

986}

987

988 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {

989 if (!AccGroups1)

990 return AccGroups2;

991 if (!AccGroups2)

992 return AccGroups1;

993 if (AccGroups1 == AccGroups2)

994 return AccGroups1;

995

996 SmallSetVector<Metadata *, 4> Union;

997 addToAccessGroupList(Union, AccGroups1);

998 addToAccessGroupList(Union, AccGroups2);

999

1000 if (Union.size() == 0)

1001 return nullptr;

1002 if (Union.size() == 1)

1003 return cast<MDNode>(Union.front());

1004

1005 LLVMContext &Ctx = AccGroups1->getContext();

1006 return MDNode::get(Ctx, Union.getArrayRef());

1007}

1008

1009 MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,

1010 const Instruction *Inst2) {

1011 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();

1012 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();

1013

1014 if (!MayAccessMem1 && !MayAccessMem2)

1015 return nullptr;

1016 if (!MayAccessMem1)

1017 return Inst2->getMetadata(LLVMContext::MD_access_group);

1018 if (!MayAccessMem2)

1019 return Inst1->getMetadata(LLVMContext::MD_access_group);

1020

1021 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);

1022 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);

1023 if (!MD1 || !MD2)

1024 return nullptr;

1025 if (MD1 == MD2)

1026 return MD1;

1027

1028 // Use set for scalable 'contains' check.

1029 SmallPtrSet<Metadata *, 4> AccGroupSet2;

1030 addToAccessGroupList(AccGroupSet2, MD2);

1031

1032 SmallVector<Metadata *, 4> Intersection;

1033 if (MD1->getNumOperands() == 0) {

1034 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");

1035 if (AccGroupSet2.count(MD1))

1036 Intersection.push_back(MD1);

1037 } else {

1038 for (const MDOperand &Node : MD1->operands()) {

1039 auto *Item = cast<MDNode>(Node.get());

1040 assert(isValidAsAccessGroup(Item) && "List item must be an access group");

1041 if (AccGroupSet2.count(Item))

1042 Intersection.push_back(Item);

1043 }

1044 }

1045

1046 if (Intersection.size() == 0)

1047 return nullptr;

1048 if (Intersection.size() == 1)

1049 return cast<MDNode>(Intersection.front());

1050

1051 LLVMContext &Ctx = Inst1->getContext();

1052 return MDNode::get(Ctx, Intersection);

1053}

1054

1055/// Add metadata from \p Inst to \p Metadata, if it can be preserved after

1056/// vectorization.

1057 void llvm::getMetadataToPropagate(

1058 Instruction *Inst,

1059 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {

1060 Inst->getAllMetadataOtherThanDebugLoc(Metadata);

1061 static const unsigned SupportedIDs[] = {

1062 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

1063 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

1064 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,

1065 LLVMContext::MD_access_group, LLVMContext::MD_mmra};

1066

1067 // Remove any unsupported metadata kinds from Metadata.

1068 for (unsigned Idx = 0; Idx != Metadata.size();) {

1069 if (is_contained(SupportedIDs, Metadata[Idx].first)) {

1070 ++Idx;

1071 } else {

1072 // Swap element to end and remove it.

1073 std::swap(Metadata[Idx], Metadata.back());

1074 Metadata.pop_back();

1075 }

1076 }

1077}

1078

1079/// \returns \p I after propagating metadata from \p VL.

1080 Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {

1081 if (VL.empty())

1082 return Inst;

1083 SmallVector<std::pair<unsigned, MDNode *>> Metadata;

1084 getMetadataToPropagate(cast<Instruction>(VL[0]), Metadata);

1085

1086 for (auto &[Kind, MD] : Metadata) {

1087 for (int J = 1, E = VL.size(); MD && J != E; ++J) {

1088 const Instruction *IJ = cast<Instruction>(VL[J]);

1089 MDNode *IMD = IJ->getMetadata(Kind);

1090

1091 switch (Kind) {

1092 case LLVMContext::MD_mmra: {

1093 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);

1094 break;

1095 }

1096 case LLVMContext::MD_tbaa:

1097 MD = MDNode::getMostGenericTBAA(MD, IMD);

1098 break;

1099 case LLVMContext::MD_alias_scope:

1100 MD = MDNode::getMostGenericAliasScope(MD, IMD);

1101 break;

1102 case LLVMContext::MD_fpmath:

1103 MD = MDNode::getMostGenericFPMath(MD, IMD);

1104 break;

1105 case LLVMContext::MD_noalias:

1106 case LLVMContext::MD_nontemporal:

1107 case LLVMContext::MD_invariant_load:

1108 MD = MDNode::intersect(MD, IMD);

1109 break;

1110 case LLVMContext::MD_access_group:

1111 MD = intersectAccessGroups(Inst, IJ);

1112 break;

1113 default:

1114 llvm_unreachable("unhandled metadata");

1115 }

1116 }

1117

1118 Inst->setMetadata(Kind, MD);

1119 }

1120

1121 return Inst;

1122}

1123

1124Constant *

1125 llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,

1126 const InterleaveGroup<Instruction> &Group) {

1127 // All 1's means mask is not needed.

1128 if (Group.isFull())

1129 return nullptr;

1130

1131 // TODO: support reversed access.

1132 assert(!Group.isReverse() && "Reversed group not supported.");

1133

1134 SmallVector<Constant *, 16> Mask;

1135 for (unsigned i = 0; i < VF; i++)

1136 for (unsigned j = 0; j < Group.getFactor(); ++j) {

1137 unsigned HasMember = Group.getMember(j) ? 1 : 0;

1138 Mask.push_back(Builder.getInt1(HasMember));

1139 }

1140

1141 return ConstantVector::get(Mask);

1142}

1143

1144llvm::SmallVector<int, 16>

1145 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {

1146 SmallVector<int, 16> MaskVec;

1147 for (unsigned i = 0; i < VF; i++)

1148 for (unsigned j = 0; j < ReplicationFactor; j++)

1149 MaskVec.push_back(i);

1150

1151 return MaskVec;

1152}

1153

1154 llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,

1155 unsigned NumVecs) {

1156 SmallVector<int, 16> Mask;

1157 for (unsigned i = 0; i < VF; i++)

1158 for (unsigned j = 0; j < NumVecs; j++)

1159 Mask.push_back(j * VF + i);

1160

1161 return Mask;

1162}

1163

1164llvm::SmallVector<int, 16>

1165 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {

1166 SmallVector<int, 16> Mask;

1167 for (unsigned i = 0; i < VF; i++)

1168 Mask.push_back(Start + i * Stride);

1169

1170 return Mask;

1171}

1172

1173 llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,

1174 unsigned NumInts,

1175 unsigned NumUndefs) {

1176 SmallVector<int, 16> Mask;

1177 for (unsigned i = 0; i < NumInts; i++)

1178 Mask.push_back(Start + i);

1179

1180 for (unsigned i = 0; i < NumUndefs; i++)

1181 Mask.push_back(-1);

1182

1183 return Mask;

1184}

1185

1186 llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,

1187 unsigned NumElts) {

1188 // Avoid casts in the loop and make sure we have a reasonable number.

1189 int NumEltsSigned = NumElts;

1190 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");

1191

1192 // If the mask chooses an element from operand 1, reduce it to choose from the

1193 // corresponding element of operand 0. Undef mask elements are unchanged.

1194 SmallVector<int, 16> UnaryMask;

1195 for (int MaskElt : Mask) {

1196 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");

1197 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;

1198 UnaryMask.push_back(UnaryElt);

1199 }

1200 return UnaryMask;

1201}

1202

1203/// A helper function for concatenating vectors. This function concatenates two

1204/// vectors having the same element type. If the second vector has fewer

1205/// elements than the first, it is padded with undefs.

1206 static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,

1207 Value *V2) {

1208 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());

1209 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());

1210 assert(VecTy1 && VecTy2 &&

1211 VecTy1->getScalarType() == VecTy2->getScalarType() &&

1212 "Expect two vectors with the same element type");

1213

1214 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();

1215 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();

1216 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

1217

1218 if (NumElts1 > NumElts2) {

1219 // Extend with UNDEFs.

1220 V2 = Builder.CreateShuffleVector(

1221 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));

1222 }

1223

1224 return Builder.CreateShuffleVector(

1225 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));

1226}

1227

1228 Value *llvm::concatenateVectors(IRBuilderBase &Builder,

1229 ArrayRef<Value *> Vecs) {

1230 unsigned NumVecs = Vecs.size();

1231 assert(NumVecs > 1 && "Should be at least two vectors");

1232

1233 SmallVector<Value *, 8> ResList;

1234 ResList.append(Vecs.begin(), Vecs.end());

1235 do {

1236 SmallVector<Value *, 8> TmpList;

1237 for (unsigned i = 0; i < NumVecs - 1; i += 2) {

1238 Value *V0 = ResList[i], *V1 = ResList[i + 1];

1239 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&

1240 "Only the last vector may have a different type");

1241

1242 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));

1243 }

1244

1245 // Push the last vector if the total number of vectors is odd.

1246 if (NumVecs % 2 != 0)

1247 TmpList.push_back(ResList[NumVecs - 1]);

1248

1249 ResList = TmpList;

1250 NumVecs = ResList.size();

1251 } while (NumVecs > 1);

1252

1253 return ResList[0];

1254}

1255

1256 bool llvm::maskIsAllZeroOrUndef(Value *Mask) {

1257 assert(isa<VectorType>(Mask->getType()) &&

1258 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1259 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1260 1 &&

1261 "Mask must be a vector of i1");

1262

1263 auto *ConstMask = dyn_cast<Constant>(Mask);

1264 if (!ConstMask)

1265 return false;

1266 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))

1267 return true;

1268 if (isa<ScalableVectorType>(ConstMask->getType()))

1269 return false;

1270 for (unsigned

1271 I = 0,

1272 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1273 I != E; ++I) {

1274 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1275 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))

1276 continue;

1277 return false;

1278 }

1279 return true;

1280}

1281

1282 bool llvm::maskIsAllOneOrUndef(Value *Mask) {

1283 assert(isa<VectorType>(Mask->getType()) &&

1284 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1285 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1286 1 &&

1287 "Mask must be a vector of i1");

1288

1289 auto *ConstMask = dyn_cast<Constant>(Mask);

1290 if (!ConstMask)

1291 return false;

1292 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))

1293 return true;

1294 if (isa<ScalableVectorType>(ConstMask->getType()))

1295 return false;

1296 for (unsigned

1297 I = 0,

1298 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1299 I != E; ++I) {

1300 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1301 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))

1302 continue;

1303 return false;

1304 }

1305 return true;

1306}

1307

1308 bool llvm::maskContainsAllOneOrUndef(Value *Mask) {

1309 assert(isa<VectorType>(Mask->getType()) &&

1310 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1311 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1312 1 &&

1313 "Mask must be a vector of i1");

1314

1315 auto *ConstMask = dyn_cast<Constant>(Mask);

1316 if (!ConstMask)

1317 return false;

1318 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))

1319 return true;

1320 if (isa<ScalableVectorType>(ConstMask->getType()))

1321 return false;

1322 for (unsigned

1323 I = 0,

1324 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();

1325 I != E; ++I) {

1326 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1327 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))

1328 return true;

1329 }

1330 return false;

1331}

1332

1333/// TODO: This is a lot like known bits, but for

1334/// vectors. Is there something we can common this with?

1335 APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {

1336 assert(isa<FixedVectorType>(Mask->getType()) &&

1337 isa<IntegerType>(Mask->getType()->getScalarType()) &&

1338 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==

1339 1 &&

1340 "Mask must be a fixed width vector of i1");

1341

1342 const unsigned VWidth =

1343 cast<FixedVectorType>(Mask->getType())->getNumElements();

1344 APInt DemandedElts = APInt::getAllOnes(VWidth);

1345 if (auto *CV = dyn_cast<ConstantVector>(Mask))

1346 for (unsigned i = 0; i < VWidth; i++)

1347 if (CV->getAggregateElement(i)->isNullValue())

1348 DemandedElts.clearBit(i);

1349 return DemandedElts;

1350}

1351

1352bool InterleavedAccessInfo::isStrided(int Stride) {

1353 unsigned Factor = std::abs(Stride);

1354 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;

1355}

1356

1357void InterleavedAccessInfo::collectConstStrideAccesses(

1358 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

1359 const DenseMap<Value*, const SCEV*> &Strides) {

1360 auto &DL = TheLoop->getHeader()->getDataLayout();

1361

1362 // Since it's desired that the load/store instructions be maintained in

1363 // "program order" for the interleaved access analysis, we have to visit the

1364 // blocks in the loop in reverse postorder (i.e., in a topological order).

1365 // Such an ordering will ensure that any load/store that may be executed

1366 // before a second load/store will precede the second load/store in

1367 // AccessStrideInfo.

1368 LoopBlocksDFS DFS(TheLoop);

1369 DFS.perform(LI);

1370 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))

1371 for (auto &I : *BB) {

1372 Value *Ptr = getLoadStorePointerOperand(&I);

1373 if (!Ptr)

1374 continue;

1375 Type *ElementTy = getLoadStoreType(&I);

1376

1377 // Currently, codegen doesn't support cases where the type size doesn't

1378 // match the alloc size. Skip them for now.

1379 uint64_t Size = DL.getTypeAllocSize(ElementTy);

1380 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))

1381 continue;

1382

1383 // We don't check wrapping here because we don't know yet if Ptr will be

1384 // part of a full group or a group with gaps. Checking wrapping for all

1385 // pointers (even those that end up in groups with no gaps) will be overly

1386 // conservative. For full groups, wrapping should be ok since if we would

1387 // wrap around the address space we would do a memory access at nullptr

1388 // even without the transformation. The wrapping checks are therefore

1389 // deferred until after we've formed the interleaved groups.

1390 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,

1391 /*Assume=*/true, /*ShouldCheckWrap=*/false)

1392 .value_or(0);

1393

1394 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);

1395 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,

1396 getLoadStoreAlignment(&I));

1397 }

1398}

1399

1400// Analyze interleaved accesses and collect them into interleaved load and

1401// store groups.

1402//

1403// When generating code for an interleaved load group, we effectively hoist all

1404// loads in the group to the location of the first load in program order. When

1405// generating code for an interleaved store group, we sink all stores to the

1406// location of the last store. This code motion can change the order of load

1407// and store instructions and may break dependences.

1408//

1409// The code generation strategy mentioned above ensures that we won't violate

1410// any write-after-read (WAR) dependences.

1411//

1412// E.g., for the WAR dependence: a = A[i]; // (1)

1413// A[i] = b; // (2)

1414//

1415// The store group of (2) is always inserted at or below (2), and the load

1416// group of (1) is always inserted at or above (1). Thus, the instructions will

1417// never be reordered. All other dependences are checked to ensure the

1418// correctness of the instruction reordering.

1419//

1420// The algorithm visits all memory accesses in the loop in bottom-up program

1421// order. Program order is established by traversing the blocks in the loop in

1422// reverse postorder when collecting the accesses.

1423//

1424// We visit the memory accesses in bottom-up order because it can simplify the

1425// construction of store groups in the presence of write-after-write (WAW)

1426// dependences.

1427//

1428// E.g., for the WAW dependence: A[i] = a; // (1)

1429// A[i] = b; // (2)

1430// A[i + 1] = c; // (3)

1431//

1432// We will first create a store group with (3) and (2). (1) can't be added to

1433// this group because it and (2) are dependent. However, (1) can be grouped

1434// with other accesses that may precede it in program order. Note that a

1435// bottom-up order does not imply that WAW dependences should not be checked.

1436 void InterleavedAccessInfo::analyzeInterleaving(

1437 bool EnablePredicatedInterleavedMemAccesses) {

1438 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");

1439 const auto &Strides = LAI->getSymbolicStrides();

1440

1441 // Holds all accesses with a constant stride.

1442 MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;

1443 collectConstStrideAccesses(AccessStrideInfo, Strides);

1444

1445 if (AccessStrideInfo.empty())

1446 return;

1447

1448 // Collect the dependences in the loop.

1449 collectDependences();

1450

1451 // Holds all interleaved store groups temporarily.

1452 SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;

1453 // Holds all interleaved load groups temporarily.

1454 SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;

1455 // Groups added to this set cannot have new members added.

1456 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;

1457

1458 // Search in bottom-up program order for pairs of accesses (A and B) that can

1459 // form interleaved load or store groups. In the algorithm below, access A

1460 // precedes access B in program order. We initialize a group for B in the

1461 // outer loop of the algorithm, and then in the inner loop, we attempt to

1462 // insert each A into B's group if:

1463 //

1464 // 1. A and B have the same stride,

1465 // 2. A and B have the same memory object size, and

1466 // 3. A belongs in B's group according to its distance from B.

1467 //

1468 // Special care is taken to ensure group formation will not break any

1469 // dependences.

1470 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

1471 BI != E; ++BI) {

1472 Instruction *B = BI->first;

1473 StrideDescriptor DesB = BI->second;

1474

1475 // Initialize a group for B if it has an allowable stride. Even if we don't

1476 // create a group for B, we continue with the bottom-up algorithm to ensure

1477 // we don't break any of B's dependences.

1478 InterleaveGroup<Instruction> *GroupB = nullptr;

1479 if (isStrided(DesB.Stride) &&

1480 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

1481 GroupB = getInterleaveGroup(B);

1482 if (!GroupB) {

1483 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B

1484 << '\n');

1485 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);

1486 if (B->mayWriteToMemory())

1487 StoreGroups.insert(GroupB);

1488 else

1489 LoadGroups.insert(GroupB);

1490 }

1491 }

1492

1493 for (auto AI = std::next(BI); AI != E; ++AI) {

1494 Instruction *A = AI->first;

1495 StrideDescriptor DesA = AI->second;

1496

1497 // Our code motion strategy implies that we can't have dependences

1498 // between accesses in an interleaved group and other accesses located

1499 // between the first and last member of the group. Note that this also

1500 // means that a group can't have more than one member at a given offset.

1501 // The accesses in a group can have dependences with other accesses, but

1502 // we must ensure we don't extend the boundaries of the group such that

1503 // we encompass those dependent accesses.

1504 //

1505 // For example, assume we have the sequence of accesses shown below in a

1506 // stride-2 loop:

1507 //

1508 // (1, 2) is a group | A[i] = a; // (1)

1509 // | A[i-1] = b; // (2) |

1510 // A[i-3] = c; // (3)

1511 // A[i] = d; // (4) | (2, 4) is not a group

1512 //

1513 // Because accesses (2) and (3) are dependent, we can group (2) with (1)

1514 // but not with (4). If we did, the dependent access (3) would be within

1515 // the boundaries of the (2, 4) group.

1516 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,

1517 StrideEntry *A) -> Instruction * {

1518 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {

1519 Instruction *MemberOfGroupB = Group->getMember(Index);

1520 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(

1521 A, &*AccessStrideInfo.find(MemberOfGroupB)))

1522 return MemberOfGroupB;

1523 }

1524 return nullptr;

1525 };

1526

1527 auto GroupA = getInterleaveGroup(A);

1528 // If A is a load, dependencies are tolerable, there's nothing to do here.

1529 // If both A and B belong to the same (store) group, they are independent,

1530 // even if dependencies have not been recorded.

1531 // If both GroupA and GroupB are null, there's nothing to do here.

1532 if (A->mayWriteToMemory() && GroupA != GroupB) {

1533 Instruction *DependentInst = nullptr;

1534 // If GroupB is a load group, we have to compare AI against all

1535 // members of GroupB because if any load within GroupB has a dependency

1536 // on AI, we need to mark GroupB as complete and also release the

1537 // store GroupA (if A belongs to one). The former prevents incorrect

1538 // hoisting of load B above store A while the latter prevents incorrect

1539 // sinking of store A below load B.

1540 if (GroupB && LoadGroups.contains(GroupB))

1541 DependentInst = DependentMember(GroupB, &*AI);

1542 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))

1543 DependentInst = B;

1544

1545 if (DependentInst) {

1546 // A has a store dependence on B (or on some load within GroupB) and

1547 // is part of a store group. Release A's group to prevent illegal

1548 // sinking of A below B. A will then be free to form another group

1549 // with instructions that precede it.

1550 if (GroupA && StoreGroups.contains(GroupA)) {

1551 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "

1552 "dependence between "

1553 << *A << " and " << *DependentInst << '\n');

1554 StoreGroups.remove(GroupA);

1555 releaseGroup(GroupA);

1556 }

1557 // If B is a load and part of an interleave group, no earlier loads

1558 // can be added to B's interleave group, because this would mean the

1559 // DependentInst would move across store A. Mark the interleave group

1560 // as complete.

1561 if (GroupB && LoadGroups.contains(GroupB)) {

1562 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B

1563 << " as complete.\n");

1564 CompletedLoadGroups.insert(GroupB);

1565 }

1566 }

1567 }

1568 if (CompletedLoadGroups.contains(GroupB)) {

1569 // Skip trying to add A to B, continue to look for other conflicting A's

1570 // in groups to be released.

1571 continue;

1572 }

1573

1574 // At this point, we've checked for illegal code motion. If either A or B

1575 // isn't strided, there's nothing left to do.

1576 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

1577 continue;

1578

1579 // Ignore A if it's already in a group or isn't the same kind of memory

1580 // operation as B.

1581 // Note that mayReadFromMemory() isn't mutually exclusive to

1582 // mayWriteToMemory in the case of atomic loads. We shouldn't see those

1583 // here, canVectorizeMemory() should have returned false - except for the

1584 // case we asked for optimization remarks.

1585 if (isInterleaved(A) ||

1586 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||

1587 (A->mayWriteToMemory() != B->mayWriteToMemory()))

1588 continue;

1589

1590 // Check rules 1 and 2. Ignore A if its stride or size is different from

1591 // that of B.

1592 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

1593 continue;

1594

1595 // Ignore A if the memory object of A and B don't belong to the same

1596 // address space

1597 if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))

1598 continue;

1599

1600 // Calculate the distance from A to B.

1601 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(

1602 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));

1603 if (!DistToB)

1604 continue;

1605 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

1606

1607 // Check rule 3. Ignore A if its distance to B is not a multiple of the

1608 // size.

1609 if (DistanceToB % static_cast<int64_t>(DesB.Size))

1610 continue;

1611

1612 // All members of a predicated interleave-group must have the same predicate,

1613 // and currently must reside in the same BB.

1614 BasicBlock *BlockA = A->getParent();

1615 BasicBlock *BlockB = B->getParent();

1616 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

1617 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

1618 continue;

1619

1620 // The index of A is the index of B plus A's distance to B in multiples

1621 // of the size.

1622 int IndexA =

1623 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

1624

1625 // Try to insert A into B's group.

1626 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {

1627 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'

1628 << " into the interleave group with" << *B

1629 << '\n');

1630 InterleaveGroupMap[A] = GroupB;

1631

1632 // Set the first load in program order as the insert position.

1633 if (A->mayReadFromMemory())

1634 GroupB->setInsertPos(A);

1635 }

1636 } // Iteration over A accesses.

1637 } // Iteration over B accesses.

1638

1639 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,

1640 int Index,

1641 const char *FirstOrLast) -> bool {

1642 Instruction *Member = Group->getMember(Index);

1643 assert(Member && "Group member does not exist");

1644 Value *MemberPtr = getLoadStorePointerOperand(Member);

1645 Type *AccessTy = getLoadStoreType(Member);

1646 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,

1647 /*Assume=*/false, /*ShouldCheckWrap=*/true)

1648 .value_or(0))

1649 return false;

1650 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "

1651 << FirstOrLast

1652 << " group member potentially pointer-wrapping.\n");

1653 releaseGroup(Group);

1654 return true;

1655 };

1656

1657 // Remove interleaved groups with gaps whose memory

1658 // accesses may wrap around. We have to revisit the getPtrStride analysis,

1659 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does

1660 // not check wrapping (see documentation there).

1661 // FORNOW we use Assume=false;

1662 // TODO: Change to Assume=true but making sure we don't exceed the threshold

1663 // of runtime SCEV assumptions checks (thereby potentially failing to

1664 // vectorize altogether).

1665 // Additional optional optimizations:

1666 // TODO: If we are peeling the loop and we know that the first pointer doesn't

1667 // wrap then we can deduce that all pointers in the group don't wrap.

1668 // This means that we can forcefully peel the loop in order to only have to

1669 // check the first pointer for no-wrap. When we'll change to use Assume=true

1670 // we'll only need at most one runtime check per interleaved group.

1671 for (auto *Group : LoadGroups) {

1672 // Case 1: A full group. Can Skip the checks; For full groups, if the wide

1673 // load would wrap around the address space we would do a memory access at

1674 // nullptr even without the transformation.

1675 if (Group->isFull())

1676 continue;

1677

1678 // Case 2: If first and last members of the group don't wrap this implies

1679 // that all the pointers in the group don't wrap.

1680 // So we check only group member 0 (which is always guaranteed to exist),

1681 // and group member Factor - 1; If the latter doesn't exist we rely on

1682 // peeling (if it is a non-reversed access -- see Case 3).

1683 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1684 continue;

1685 if (Group->getMember(Group->getFactor() - 1))

1686 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");

1687 else {

1688 // Case 3: A non-reversed interleaved load group with gaps: We need

1689 // to execute at least one scalar epilogue iteration. This will ensure

1690 // we don't speculatively access memory out-of-bounds. We only need

1691 // to look for a member at index factor - 1, since every group must have

1692 // a member at index zero.

1693 if (Group->isReverse()) {

1694 LLVM_DEBUG(

1695 dbgs() << "LV: Invalidate candidate interleaved group due to "

1696 "a reverse access with gaps.\n");

1697 releaseGroup(Group);

1698 continue;

1699 }

1700 LLVM_DEBUG(

1701 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");

1702 RequiresScalarEpilogue = true;

1703 }

1704 }

1705

1706 for (auto *Group : StoreGroups) {

1707 // Case 1: A full group. Can Skip the checks; For full groups, if the wide

1708 // store would wrap around the address space we would do a memory access at

1709 // nullptr even without the transformation.

1710 if (Group->isFull())

1711 continue;

1712

1713 // Interleave-store-group with gaps is implemented using masked wide store.

1714 // Remove interleaved store groups with gaps if

1715 // masked-interleaved-accesses are not enabled by the target.

1716 if (!EnablePredicatedInterleavedMemAccesses) {

1717 LLVM_DEBUG(

1718 dbgs() << "LV: Invalidate candidate interleaved store group due "

1719 "to gaps.\n");

1720 releaseGroup(Group);

1721 continue;

1722 }

1723

1724 // Case 2: If first and last members of the group don't wrap this implies

1725 // that all the pointers in the group don't wrap.

1726 // So we check only group member 0 (which is always guaranteed to exist),

1727 // and the last group member. Case 3 (scalar epilog) is not relevant for

1728 // stores with gaps, which are implemented with masked-store (rather than

1729 // speculative access, as in loads).

1730 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1731 continue;

1732 for (int Index = Group->getFactor() - 1; Index > 0; Index--)

1733 if (Group->getMember(Index)) {

1734 InvalidateGroupIfMemberMayWrap(Group, Index, "last");

1735 break;

1736 }

1737 }

1738}

1739

1740 void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {

1741 // If no group had triggered the requirement to create an epilogue loop,

1742 // there is nothing to do.

1743 if (!requiresScalarEpilogue())

1744 return;

1745

1746 // Release groups requiring scalar epilogues. Note that this also removes them

1747 // from InterleaveGroups.

1748 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {

1749 if (!Group->requiresScalarEpilogue())

1750 return false;

1751 LLVM_DEBUG(

1752 dbgs()

1753 << "LV: Invalidate candidate interleaved group due to gaps that "

1754 "require a scalar epilogue (not allowed under optsize) and cannot "

1755 "be masked (not enabled). \n");

1756 releaseGroupWithoutRemovingFromSet(Group);

1757 return true;

1758 });

1759 assert(ReleasedGroup && "At least one group must be invalidated, as a "

1760 "scalar epilogue was required");

1761 (void)ReleasedGroup;

1762 RequiresScalarEpilogue = false;

1763}

1764

1765template <typename InstT>

1766 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {

1767 llvm_unreachable("addMetadata can only be used for Instruction");

1768}

1769

1770namespace llvm {

1771template <>

1772 void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {

1773 SmallVector<Value *, 4> VL(make_second_range(Members));

1774 propagateMetadata(NewInst, VL);

1775}

1776} // namespace llvm

assert

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

DL

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition ARMSLSHardening.cpp:73

A

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CommandLine.h

Constants.h

This file contains the declarations for the subclasses of Constant, which represent the different fla...

DemandedBits.h

DerivedTypes.h

EquivalenceClasses.h

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

IRBuilder.h

MI

IRTranslator LLVM IR MI

Definition IRTranslator.cpp:110

Value.h

InlinePriorityMode::Size

@ Size

Definition InlineOrder.cpp:25

TemplateParamKind::Type

@ Type

Definition ItaniumDemangle.h:1243

Ops

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Definition ItaniumDemangle.h:3368

LoopInfo.h

LoopIterator.h

I

#define I(x, y, z)

Definition MD5.cpp:58

MemoryModelRelaxationAnnotations.h

This file provides utility for Memory Model Relaxation Annotations (MMRAs).

PatternMatch.h

ScalarEvolutionExpressions.h

ScalarEvolution.h

SmallVector.h

This file defines the SmallVector class.

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition Debug.h:114

getScalarSizeInBits

static unsigned getScalarSizeInBits(Type *Ty)

Definition SystemZTargetTransformInfo.cpp:525

Y

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

getType

static SymbolRef::Type getType(const Symbol *Sym)

Definition TapiFile.cpp:39

Ptr

@ Ptr

Definition TargetLibraryInfo.cpp:77

TargetTransformInfo.h

This pass exposes codegen information to IR-level passes.

ValueTracking.h

concatenateTwoVectors

static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)

A helper function for concatenating vectors.

Definition VectorUtils.cpp:1206

MaxInterleaveGroupFactor

static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))

Maximum factor for an interleaved memory access.

addToAccessGroupList

static void addToAccessGroupList(ListT &List, MDNode *AccGroups)

Add all access groups in AccGroups to List.

Definition VectorUtils.cpp:973

VectorUtils.h

Node

Definition ItaniumDemangle.h:166

VectorType

Definition ItaniumDemangle.h:1189

llvm::APInt

Class for arbitrary precision integers.

Definition APInt.h:78

llvm::APInt::getAllOnes

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

Definition APInt.h:235

llvm::APInt::clearBit

void clearBit(unsigned BitPosition)

Set a given bit to 0.

Definition APInt.h:1407

llvm::APInt::setBit

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

Definition APInt.h:1331

llvm::APInt::isZero

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

Definition APInt.h:381

llvm::APInt::getBitWidth

unsigned getBitWidth() const

Return the number of bits in the APInt.

Definition APInt.h:1489

llvm::APInt::getZero

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

Definition APInt.h:201

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition APInt.h:1563

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition ArrayRef.h:41

llvm::ArrayRef::front

const T & front() const

front - Get the first element.

Definition ArrayRef.h:146

llvm::ArrayRef::end

iterator end() const

Definition ArrayRef.h:132

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition ArrayRef.h:143

llvm::ArrayRef::begin

iterator begin() const

Definition ArrayRef.h:131

llvm::ArrayRef::empty

bool empty() const

empty - Check if the array is empty.

Definition ArrayRef.h:138

llvm::BasicBlock

LLVM Basic Block Representation.

Definition BasicBlock.h:62

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition Instructions.h:1511

llvm::ConstantVector::get

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

Definition Constants.cpp:1426

llvm::Constant

This is an important base class in LLVM.

Definition Constant.h:43

llvm::DemandedBits

Definition DemandedBits.h:41

llvm::DenseMapBase::count

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Definition DenseMap.h:174

llvm::DenseMap

Definition DenseMap.h:713

llvm::EquivalenceClasses

EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...

Definition EquivalenceClasses.h:62

llvm::EquivalenceClasses::members

iterator_range< member_iterator > members(const ECValue &ECV) const

Definition EquivalenceClasses.h:174

llvm::EquivalenceClasses::getOrInsertLeaderValue

const ElemTy & getOrInsertLeaderValue(const ElemTy &V)

getOrInsertLeaderValue - Return the leader for the specified value that is in the set.

Definition EquivalenceClasses.h:199

llvm::EquivalenceClasses::unionSets

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

Definition EquivalenceClasses.h:300

llvm::IRBuilderBase

Common base class shared among various IRBuilders.

Definition IRBuilder.h:114

llvm::InsertElementInst

This instruction inserts a single (scalar) element into a VectorType value.

Definition Instructions.h:1868

llvm::Instruction

Definition Instruction.h:69

llvm::Instruction::mayReadOrWriteMemory

bool mayReadOrWriteMemory() const

Return true if this instruction may read or write memory.

Definition Instruction.h:808

llvm::Instruction::getMetadata

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

Definition Instruction.h:428

llvm::Instruction::setMetadata

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

Definition Metadata.cpp:1746

llvm::Instruction::getAllMetadataOtherThanDebugLoc

void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const

This does the same thing as getAllMetadata, except that it filters out the debug location.

Definition Instruction.h:453

llvm::InterleaveGroup

The group of interleaved loads/stores sharing the same stride and close to each other.

Definition VectorUtils.h:524

llvm::InterleaveGroup::getFactor

uint32_t getFactor() const

Definition VectorUtils.h:540

llvm::InterleaveGroup::getMember

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

Definition VectorUtils.h:594

llvm::InterleaveGroup::isFull

bool isFull() const

Return true if this group is full, i.e. it has no gaps.

Definition VectorUtils.h:637

llvm::InterleaveGroup::getIndex

uint32_t getIndex(const InstTy *Instr) const

Get the index for the given member.

Definition VectorUtils.h:601

llvm::InterleaveGroup::setInsertPos

void setInsertPos(InstTy *Inst)

Definition VectorUtils.h:611

llvm::InterleaveGroup::isReverse

bool isReverse() const

Definition VectorUtils.h:539

llvm::InterleaveGroup::addMetadata

void addMetadata(InstTy *NewInst) const

Add metadata (e.g.

Definition VectorUtils.cpp:1766

llvm::InterleaveGroup::insertMember

bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)

Try to insert a new member Instr with index Index and alignment NewAlign.

Definition VectorUtils.h:549

llvm::InterleavedAccessInfo::getInterleaveGroup

InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const

Get the interleave group that Instr belongs to.

Definition VectorUtils.h:714

llvm::InterleavedAccessInfo::requiresScalarEpilogue

bool requiresScalarEpilogue() const

Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...

Definition VectorUtils.h:725

llvm::InterleavedAccessInfo::isInterleaved

bool isInterleaved(Instruction *Instr) const

Check if Instr belongs to any interleave group.

Definition VectorUtils.h:706

llvm::InterleavedAccessInfo::analyzeInterleaving

LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)

Analyze the interleaved accesses and collect them in interleave groups.

Definition VectorUtils.cpp:1436

llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue

LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()

Invalidate groups that require a scalar epilogue (due to gaps).

Definition VectorUtils.cpp:1740

llvm::IntrinsicInst

A wrapper class for inspecting calls to intrinsic functions.

Definition IntrinsicInst.h:49

llvm::IntrinsicInst::getIntrinsicID

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

Definition IntrinsicInst.h:56

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition LLVMContext.h:68

llvm::MDNode

Metadata node.

Definition Metadata.h:1078

llvm::MDNode::getMostGenericAliasScope

static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

Definition Metadata.cpp:1136

llvm::MDNode::getMostGenericTBAA

static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)

Definition TypeBasedAliasAnalysis.cpp:497

llvm::MDNode::operands

ArrayRef< MDOperand > operands() const

Definition Metadata.h:1440

llvm::MDNode::get

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

Definition Metadata.h:1569

llvm::MDNode::getMostGenericFPMath

static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)

Definition Metadata.cpp:1168

llvm::MDNode::getNumOperands

unsigned getNumOperands() const

Return number of MDNode operands.

Definition Metadata.h:1448

llvm::MDNode::intersect

static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)

Definition Metadata.cpp:1123

llvm::MDNode::getContext

LLVMContext & getContext() const

Definition Metadata.h:1242

llvm::MDOperand

Tracking metadata reference owned by Metadata.

Definition Metadata.h:900

llvm::MMRAMetadata::combine

static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)

Combines A and B according to MMRA semantics.

Definition MemoryModelRelaxationAnnotations.cpp:78

llvm::MapVector

This class implements a map that also provides access to all stored values in a deterministic order.

Definition MapVector.h:36

llvm::MapVector::rend

reverse_iterator rend()

Definition MapVector.h:74

llvm::MapVector::find

iterator find(const KeyT &Key)

Definition MapVector.h:149

llvm::MapVector::empty

bool empty() const

Definition MapVector.h:77

llvm::MapVector::rbegin

reverse_iterator rbegin()

Definition MapVector.h:70

llvm::Metadata

Root of the metadata hierarchy.

Definition Metadata.h:64

llvm::MutableArrayRef

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

Definition ArrayRef.h:299

llvm::PoisonValue::get

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition Constants.cpp:1888

llvm::SCEVConstant

This class represents a constant integer value.

Definition ScalarEvolutionExpressions.h:61

llvm::SCEVConstant::getAPInt

const APInt & getAPInt() const

Definition ScalarEvolutionExpressions.h:71

llvm::SetVector::remove

bool remove(const value_type &X)

Remove an item from the set vector.

Definition SetVector.h:180

llvm::SetVector::insert

bool insert(const value_type &X)

Insert a new element into the SetVector.

Definition SetVector.h:150

llvm::SetVector::contains

bool contains(const key_type &key) const

Check if the SetVector contains the given key.

Definition SetVector.h:251

llvm::ShuffleVectorInst

This instruction constructs a fixed permutation of two input vectors.

Definition Instructions.h:1935

llvm::ShuffleVectorInst::getMaskValue

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

Definition Instructions.h:1985

llvm::ShuffleVectorInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition Instructions.h:1976

llvm::SmallPtrSetImpl::count

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

Definition SmallPtrSet.h:455

llvm::SmallPtrSetImpl::remove_if

bool remove_if(UnaryPredicate P)

Remove elements that match the given predicate.

Definition SmallPtrSet.h:420

llvm::SmallPtrSetImpl::insert

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

Definition SmallPtrSet.h:389

llvm::SmallPtrSetImpl::contains

bool contains(ConstPtrType Ptr) const

Definition SmallPtrSet.h:461

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition SmallPtrSet.h:527

llvm::SmallSetVector

A SetVector that performs no allocations if smaller than a certain size.

Definition SetVector.h:338

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition SmallVector.h:574

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition SmallVector.h:674

llvm::SmallVectorImpl::assign

void assign(size_type NumElts, ValueParamT Elt)

Definition SmallVector.h:705

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition SmallVector.h:664

llvm::SmallVectorImpl::append

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

Definition SmallVector.h:684

llvm::SmallVectorImpl::clear

void clear()

Definition SmallVector.h:611

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition SmallVector.h:417

llvm::SmallVectorTemplateCommon::size

size_t size() const

Definition SmallVector.h:80

llvm::SmallVectorTemplateCommon::front

reference front()

Definition SmallVector.h:303

llvm::SmallVectorTemplateCommon::empty

bool empty() const

Definition SmallVector.h:83

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition SmallVector.h:1203

llvm::TargetLibraryInfo

Provides information about what library functions are available for the current target.

Definition TargetLibraryInfo.h:285

llvm::TargetTransformInfo

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Definition TargetTransformInfo.h:223

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition Type.h:45

llvm::Type::subtypes

ArrayRef< Type * > subtypes() const

Definition Type.h:365

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition Use.h:35

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition User.h:232

llvm::VPCastIntrinsic::isVPCast

static LLVM_ABI bool isVPCast(Intrinsic::ID ID)

Definition IntrinsicInst.cpp:731

llvm::VPIntrinsic::getVectorLengthParamPos

static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)

Definition IntrinsicInst.cpp:413

llvm::Value

LLVM Value Representation.

Definition Value.h:75

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition Value.h:256

llvm::Value::getContext

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

Definition Value.cpp:1099

llvm::VectorType

Base class of all SIMD vector types.

Definition DerivedTypes.h:430

llvm::VectorType::getElementType

Type * getElementType() const

Definition DerivedTypes.h:463

llvm::cl::opt

Definition CommandLine.h:1455

llvm::function_ref

An efficient, type-erasing, non-owning reference to a callable.

Definition STLFunctionalExtras.h:37

uint32_t

uint64_t

Call

CallInst * Call

Definition ObjCARCOpts.cpp:2359

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition ErrorHandling.h:164

llvm::CallingConv::ID

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Definition CallingConv.h:24

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition CallingConv.h:34

llvm::Intrinsic::not_intrinsic

@ not_intrinsic

Definition Intrinsics.h:46

llvm::Intrinsic::ID

unsigned ID

Definition GenericSSAContext.h:28

llvm::Intrinsic::isTargetIntrinsic

LLVM_ABI bool isTargetIntrinsic(ID IID)

isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.

Definition Intrinsics.cpp:609

llvm::MIPatternMatch::m_ZeroInt

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

Definition MIPatternMatch.h:278

llvm::PatternMatch

Definition PatternMatch.h:47

llvm::PatternMatch::m_Add

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

Definition PatternMatch.h:1194

llvm::PatternMatch::m_BinOp

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

Definition PatternMatch.h:113

llvm::PatternMatch::m_Constant

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

Definition PatternMatch.h:178

llvm::PatternMatch::match

bool match(Val *V, const Pattern &P)

Definition PatternMatch.h:49

llvm::PatternMatch::m_Select

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

Definition PatternMatch.h:1940

llvm::PatternMatch::m_Shuffle

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

Definition PatternMatch.h:2052

llvm::PatternMatch::m_Value

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

Definition PatternMatch.h:105

llvm::PatternMatch::m_InsertElt

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

Definition PatternMatch.h:1970

llvm::cl::Hidden

@ Hidden

Definition CommandLine.h:139

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition CommandLine.h:445

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition AddressRanges.h:18

llvm::isTriviallyScalarizable

LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)

Identify if the intrinsic is trivially scalarizable.

Definition VectorUtils.cpp:124

llvm::Value

FunctionAddr VTableAddr Value

Definition InstrProf.h:137

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition STLExtras.h:1725

llvm::getLoadStoreAddressSpace

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

Definition Instructions.h:5138

llvm::getVectorIntrinsicIDForCall

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

Definition VectorUtils.cpp:237

llvm::possiblyDemandedEltsInMask

LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)

Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...

Definition VectorUtils.cpp:1335

llvm::Depth

@ Depth

Definition SIMachineScheduler.h:36

llvm::enumerate

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

Definition STLExtras.h:2472

llvm::dyn_cast

decltype(auto) dyn_cast(const From &Val)

dyn_cast<X> - Return the argument parameter cast to the specified type.

Definition Casting.h:643

llvm::getLoadStorePointerOperand

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

Definition Instructions.h:5092

llvm::createUnaryMask

LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)

Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...

Definition VectorUtils.cpp:1186

llvm::getMetadataToPropagate

LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)

Add metadata from Inst to Metadata, if it can be preserved after vectorization.

Definition VectorUtils.cpp:1057

llvm::make_range

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Definition iterator_range.h:70

llvm::bit_width

int bit_width(T Value)

Returns the number of bits needed to represent Value if Value is nonzero.

Definition bit.h:303

llvm::concatenateVectors

LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)

Concatenate a list of vectors.

Definition VectorUtils.cpp:1228

llvm::getLoadStoreAlignment

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

Definition Instructions.h:5118

llvm::widenShuffleMaskElts

LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

Definition VectorUtils.cpp:540

llvm::propagateMetadata

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

Definition VectorUtils.cpp:1080

llvm::getSplatValue

LLVM_ABI Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

Definition VectorUtils.cpp:391

llvm::bit_ceil

T bit_ceil(T Value)

Returns the smallest integral power of two no smaller than Value if Value is nonzero.

Definition bit.h:345

llvm::intersectAccessGroups

LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)

Compute the access-group list of access groups that Inst1 and Inst2 are both in.

Definition VectorUtils.cpp:1009

llvm::M1

unsigned M1(unsigned Val)

Definition VE.h:377

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition STLExtras.h:1732

llvm::getShuffleDemandedElts

LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)

Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...

Definition VectorUtils.cpp:452

llvm::isSplatValue

LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

Definition VectorUtils.cpp:406

llvm::createBitMaskForGaps

LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)

Create a mask that filters the members of an interleave group where there are gaps.

Definition VectorUtils.cpp:1125

llvm::MaxAnalysisRecursionDepth

constexpr unsigned MaxAnalysisRecursionDepth

Definition ValueTracking.h:47

llvm::createStrideMask

LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)

Create a stride shuffle mask.

Definition VectorUtils.cpp:1165

llvm::getHorizDemandedEltsForFirstOperand

LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)

Compute the demanded elements mask of horizontal binary operations.

Definition VectorUtils.cpp:780

llvm::createReplicatedMask

LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)

Create a mask with replicated elements.

Definition VectorUtils.cpp:1145

llvm::ComplexDeinterleavingOperation::Splat

@ Splat

Definition ComplexDeinterleavingPass.h:42

llvm::getDeinterleaveIntrinsicFactor

LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)

Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.

Definition VectorUtils.cpp:272

llvm::dbgs

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition Debug.cpp:207

llvm::getInterleaveIntrinsicFactor

LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)

Returns the corresponding factor of llvm.vector.interleaveN intrinsics.

Definition VectorUtils.cpp:251

llvm::isa

bool isa(const From &Val)

isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...

Definition Casting.h:547

llvm::maskIsAllOneOrUndef

LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1282

llvm::PoisonMaskElem

constexpr int PoisonMaskElem

Definition Instructions.h:1923

llvm::isValidAsAccessGroup

LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)

Return whether an MDNode might represent an access group.

Definition LoopInfo.cpp:1166

llvm::getIntrinsicForCallSite

LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)

Map a call instruction to an intrinsic ID.

Definition ValueTracking.cpp:4426

llvm::isVectorIntrinsicWithStructReturnOverloadAtField

LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

Definition VectorUtils.cpp:220

llvm::TTI

TargetTransformInfo TTI

Definition TargetTransformInfo.h:218

llvm::narrowShuffleMaskElts

LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

Definition VectorUtils.cpp:519

llvm::isMaskedSlidePair

LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)

Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...

Definition VectorUtils.cpp:489

llvm::getDeinterleavedVectorType

LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)

Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.

Definition VectorUtils.cpp:293

llvm::createInterleaveMask

LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)

Create an interleave shuffle mask.

Definition VectorUtils.cpp:1154

llvm::isVectorIntrinsicWithScalarOpAtArg

LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

Definition VectorUtils.cpp:148

llvm::replaceSymbolicStrideSCEV

LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

Definition LoopAccessAnalysis.cpp:155

llvm::findScalarElement

LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

Definition VectorUtils.cpp:305

llvm::uniteAccessGroups

LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)

Compute the union of two access-group lists.

Definition VectorUtils.cpp:988

llvm::M0

unsigned M0(unsigned Val)

Definition VE.h:376

llvm::make_second_range

auto make_second_range(ContainerTy &&c)

Given a container of pairs, return a range over the second elements.

Definition STLExtras.h:1407

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition STLExtras.h:1961

llvm::maskIsAllZeroOrUndef

LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1256

llvm::cast

decltype(auto) cast(const From &Val)

cast<X> - Return the argument parameter cast to the specified type.

Definition Casting.h:559

llvm::find_if

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Definition STLExtras.h:1758

llvm::getShuffleMaskWithWidestElts

LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...

Definition VectorUtils.cpp:651

llvm::is_contained

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Definition STLExtras.h:1897

llvm::getLoadStoreType

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

Definition Instructions.h:5147

llvm::processShuffleMasks

LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)

Splits and processes shuffle mask depending on the number of input and output registers.

Definition VectorUtils.cpp:665

llvm::all_equal

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

Definition STLExtras.h:2108

llvm::maskContainsAllOneOrUndef

LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1308

llvm::seq

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

Definition Sequence.h:305

llvm::isTriviallyVectorizable

LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)

Identify if the intrinsic is trivially vectorizable.

Definition VectorUtils.cpp:46

llvm::createSequentialMask

LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)

Create a sequential shuffle mask.

Definition VectorUtils.cpp:1173

llvm::getPtrStride

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

Definition LoopAccessAnalysis.cpp:1622

llvm::isVectorIntrinsicWithOverloadTypeAtArg

LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

Definition VectorUtils.cpp:183

llvm::computeMinimumValueSizes

LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)

Compute a map of integer instructions to their minimum legal type size.

Definition VectorUtils.cpp:809

llvm::scaleShuffleMaskElts

LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

Definition VectorUtils.cpp:626

llvm::getSplatIndex

LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)

If all non-negative Mask elements are the same value, return that value.

Definition VectorUtils.cpp:369

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)