1//===-------- LoopIdiomVectorize.cpp - Loop idiom vectorization -----------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9// This pass implements a pass that recognizes certain loop idioms and
10// transforms them into more optimized versions of the same loop. In cases
11// where this happens, it can be a significant performance win.
13// We currently support two loops:
15// 1. A loop that finds the first mismatched byte in an array and returns the
16// index, i.e. something like:
23// In this example we can actually vectorize the loop despite the early exit,
24// although the loop vectorizer does not support it. It requires some extra
25// checks to deal with the possibility of faulting loads when crossing page
26// boundaries. However, even with these checks it is still profitable to do the
31// * Add support for the inverse case where we scan for a matching element.
32// * Permit 64-bit induction variable types.
33// * Recognize loops that increment the IV *after* comparing bytes.
34// * Allow 32-bit sign-extends of the IV used by the GEP.
36// 2. A loop that finds the first matching character in an array among a set of
37// possible matches, e.g.:
39// for (; first != last; ++first)
40// for (s_it = s_first; s_it != s_last; ++s_it)
41// if (*first == *s_it)
45// This corresponds to std::find_first_of (for arrays of bytes) from the C++
46// standard library. This function can be implemented efficiently for targets
47// that support @llvm.experimental.vector.match. For example, on AArch64 targets
48// that implement SVE2, this lower to a MATCH instruction, which enables us to
49// perform up to 16x16=256 comparisons in one go. This can lead to very
50// significant speedups.
54// * Add support for `find_first_not_of' loops (i.e. with not-equal comparison).
55// * Make VF a configurable parameter (right now we assume 128-bit vectors).
56// * Potentially adjust the cost model to let the transformation kick-in even if
57// @llvm.experimental.vector.match doesn't have direct support in hardware.
59//===----------------------------------------------------------------------===//
61// NOTE: This Pass matches really specific loop patterns because it's only
62// supposed to be a temporary solution until our LoopVectorizer is powerful
63// enough to vectorize them automatically.
65//===----------------------------------------------------------------------===//
81 #define DEBUG_TYPE "loop-idiom-vectorize"
85 cl::desc(
"Disable Loop Idiom Vectorize Pass."));
89 cl::desc(
"The vectorization style for loop idiom transform."),
91 "Use masked vector intrinsics"),
93 "predicated",
"Use VP intrinsics")),
99 cl::desc(
"Proceed with Loop Idiom Vectorize Pass, but do "
100 "not convert byte-compare loop(s)."));
104 cl::desc(
"The vectorization factor for byte-compare patterns."),
110 cl::desc(
"Do not convert find-first-byte loop(s)."));
114 cl::desc(
"Verify loops generated Loop Idiom Vectorize Pass."));
117class LoopIdiomVectorize {
119 unsigned ByteCompareVF;
120 Loop *CurLoop =
nullptr;
126 // Blocks that will be used for inserting vectorized code.
128 BasicBlock *VectorLoopPreheaderBlock =
nullptr;
130 BasicBlock *VectorLoopMismatchBlock =
nullptr;
137 : VectorizeStyle(S), ByteCompareVF(VF), DT(DT), LI(LI),
TTI(
TTI),
DL(
DL) {
143 /// \name Countable Loop Idiom Handling
146 bool runOnCountableLoop();
147 bool runOnLoopBlock(BasicBlock *BB,
const SCEV *BECount,
148 SmallVectorImpl<BasicBlock *> &ExitBlocks);
150 bool recognizeByteCompare();
153 GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
154 Instruction *Index,
Value *Start,
Value *MaxLen);
157 GetElementPtrInst *GEPA,
158 GetElementPtrInst *GEPB,
Value *ExtStart,
160 Value *createPredicatedFindMismatch(
IRBuilder<> &Builder, DomTreeUpdater &DTU,
161 GetElementPtrInst *GEPA,
162 GetElementPtrInst *GEPB,
Value *ExtStart,
165 void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
166 PHINode *IndPhi,
Value *MaxLen, Instruction *Index,
167 Value *Start,
bool IncIdx, BasicBlock *FoundBB,
170 bool recognizeFindFirstByte();
173 unsigned VF,
Type *CharTy,
Value *IndPhi,
174 BasicBlock *ExitSucc, BasicBlock *ExitFail,
178 void transformFindFirstByte(PHINode *IndPhi,
unsigned VF,
Type *CharTy,
179 BasicBlock *ExitSucc, BasicBlock *ExitFail,
184}
// anonymous namespace
192 const auto *
DL = &L.getHeader()->getDataLayout();
198 unsigned BCVF = ByteCompareVF;
202 LoopIdiomVectorize LIV(VecStyle, BCVF, &AR.
DT, &AR.
LI, &AR.
TTI,
DL);
209//===----------------------------------------------------------------------===//
211// Implementation of LoopIdiomVectorize
213//===----------------------------------------------------------------------===//
215bool LoopIdiomVectorize::run(
Loop *L) {
218 Function &
F = *L->getHeader()->getParent();
222 if (
F.hasFnAttribute(Attribute::NoImplicitFloat)) {
224 <<
" due to its NoImplicitFloat attribute");
228 // If the loop could not be converted to canonical form, it must have an
229 // indirectbr in it, just give up.
230 if (!L->getLoopPreheader())
234 << CurLoop->getHeader()->getName() <<
"\n");
236 if (recognizeByteCompare())
239 if (recognizeFindFirstByte())
248 // Look through the incoming values to find ScalarRes, meaning this is a
249 // PHI collecting the results of the transformation.
251 for (
Value *
Op : PN.incoming_values())
252 if (
Op == ScalarRes) {
257 // Any PHI that depended upon the result of the transformation needs a new
258 // incoming value from IncBB.
260 PN.addIncoming(VectorRes, IncBB);
262 // There should be no other outside uses of other values in the
263 // original loop. Any incoming values should either:
264 // 1. Be for blocks outside the loop, which aren't interesting. Or ..
265 // 2. These are from blocks in the loop with values defined outside
266 // the loop. We should a similar incoming value from CmpBB.
268 if (L->contains(BB)) {
269 PN.addIncoming(PN.getIncomingValueForBlock(BB), IncBB);
276bool LoopIdiomVectorize::recognizeByteCompare() {
277 // Currently the transformation only works on scalable vector types, although
278 // there is no fundamental reason why it cannot be made to work for fixed
281 // We also need to know the minimum page size for the target in order to
282 // generate runtime memory checks to ensure the vector version won't fault.
283 if (!
TTI->supportsScalableVectors() || !
TTI->getMinPageSize().has_value() ||
289 // In LoopIdiomVectorize::run we have already checked that the loop
290 // has a preheader so we can assume it's in a canonical form.
291 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2)
298 auto LoopBlocks = CurLoop->getBlocks();
299 // The first block in the loop should contain only 4 instructions, e.g.
302 // %res.phi = phi i32 [ %start, %ph ], [ %inc, %while.body ]
303 // %inc = add i32 %res.phi, 1
304 // %cmp.not = icmp eq i32 %inc, %n
305 // br i1 %cmp.not, label %while.end, label %while.body
307 if (LoopBlocks[0]->sizeWithoutDebug() > 4)
310 // The second block should contain 7 instructions, e.g.
313 // %idx = zext i32 %inc to i64
314 // %idx.a = getelementptr inbounds i8, ptr %a, i64 %idx
315 // %load.a = load i8, ptr %idx.a
316 // %idx.b = getelementptr inbounds i8, ptr %b, i64 %idx
317 // %load.b = load i8, ptr %idx.b
318 // %cmp.not.ld = icmp eq i8 %load.a, %load.b
319 // br i1 %cmp.not.ld, label %while.cond, label %while.end
321 if (LoopBlocks[1]->sizeWithoutDebug() > 7)
324 // The incoming value to the PHI node from the loop should be an add of 1.
325 Value *StartIdx =
nullptr;
335 // Limit to 32-bit types for now
336 if (!Index || !
Index->getType()->isIntegerTy(32) ||
340 // If we match the pattern, PN and Index will be replaced with the result of
341 // the cttz.elts intrinsic. If any other instructions are used outside of
342 // the loop, we cannot replace it.
345 if (&
I != PN && &
I != Index)
350 // Match the branch instruction for the header
353 if (!
match(Header->getTerminator(),
357 !CurLoop->contains(WhileBB))
360 // WhileBB should contain the pattern of load & compare instructions. Match
361 // the pattern and find the GEP instructions used by the loads.
364 Value *LoadA, *LoadB;
369 !CurLoop->contains(TrueBB))
390 // Check we are loading i8 values from two loop invariant pointers
391 if (!CurLoop->isLoopInvariant(PtrA) || !CurLoop->isLoopInvariant(PtrB) ||
398 // Check that the index to the GEPs is the index we found earlier
407 // We only ever expect the pre-incremented index value to be used inside the
412 // Ensure that when the Found and End blocks are identical the PHIs have the
413 // supported format. We don't currently allow cases like this:
416 // br i1 %cmp.not, label %while.end, label %while.body
420 // br i1 %cmp.not2, label %while.cond, label %while.end
423 // %final_ptr = phi ptr [ %c, %while.body ], [ %d, %while.cond ]
425 // Where the incoming values for %final_ptr are unique and from each of the
426 // loop blocks, but not actually defined in the loop. This requires extra
427 // work setting up the byte.compare block, i.e. by introducing a select to
428 // choose the correct value.
429 // TODO: We could add support for this in future.
430 if (FoundBB == EndBB) {
432 Value *WhileCondVal = EndPN.getIncomingValueForBlock(Header);
433 Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB);
435 // The value of the index when leaving the while.cond block is always the
436 // same as the end value (MaxLen) so we permit either. The value when
437 // leaving the while.body block should only be the index. Otherwise for
438 // any other values we only allow ones that are same for both blocks.
439 if (WhileCondVal != WhileBodyVal &&
440 ((WhileCondVal != Index && WhileCondVal != MaxLen) ||
441 (WhileBodyVal != Index)))
449 // The index is incremented before the GEP/Load pair so we need to
450 // add 1 to the start value.
451 transformByteCompare(GEPA, GEPB, PN, MaxLen, Index, StartIdx,
/*IncIdx=*/true,
456Value *LoopIdiomVectorize::createMaskedFindMismatch(
469 Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd});
473 Builder.
CreateMul(VecLen, ConstantInt::get(I64Type, ByteCompareVF),
"",
474 /*HasNUW=*/true,
/*HasNSW=*/true);
480 Builder.
Insert(JumpToVectorLoop);
483 VectorLoopStartBlock}});
485 // Set up the first vector loop block by creating the PHIs, doing the vector
486 // loads and comparing the vectors.
488 PHINode *LoopPred = Builder.
CreatePHI(PredVTy, 2,
"mismatch_vec_loop_pred");
489 LoopPred->
addIncoming(InitialPred, VectorLoopPreheaderBlock);
490 PHINode *VectorIndexPhi = Builder.
CreatePHI(I64Type, 2,
"mismatch_vec_index");
491 VectorIndexPhi->
addIncoming(ExtStart, VectorLoopPreheaderBlock);
492 Type *VectorLoadType =
496 Value *VectorLhsGep =
499 Align(1), LoopPred, Passthru);
501 Value *VectorRhsGep =
504 Align(1), LoopPred, Passthru);
507 VectorMatchCmp = Builder.
CreateSelect(LoopPred, VectorMatchCmp, PFalse);
510 VectorLoopMismatchBlock, VectorLoopIncBlock, VectorMatchHasActiveLanes);
511 Builder.
Insert(VectorEarlyExit);
517 // Increment the index counter and calculate the predicate for the next
518 // iteration of the loop. We branch back to the start of the loop if there
519 // is at least one active lane.
521 Value *NewVectorIndexPhi =
522 Builder.
CreateAdd(VectorIndexPhi, VecLen,
"",
523 /*HasNUW=*/true,
/*HasNSW=*/true);
524 VectorIndexPhi->
addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
527 {PredVTy, I64Type}, {NewVectorIndexPhi, ExtEnd});
528 LoopPred->
addIncoming(NewPred, VectorLoopIncBlock);
530 Value *PredHasActiveLanes =
534 Builder.
Insert(VectorLoopBranchBack);
540 // If we found a mismatch then we need to calculate which lane in the vector
541 // had a mismatch and add that on to the current loop index.
543 PHINode *FoundPred = Builder.
CreatePHI(PredVTy, 1,
"mismatch_vec_found_pred");
544 FoundPred->
addIncoming(VectorMatchCmp, VectorLoopStartBlock);
546 Builder.
CreatePHI(PredVTy, 1,
"mismatch_vec_last_loop_pred");
547 LastLoopPred->
addIncoming(LoopPred, VectorLoopStartBlock);
549 Builder.
CreatePHI(I64Type, 1,
"mismatch_vec_found_index");
550 VectorFoundIndex->
addIncoming(VectorIndexPhi, VectorLoopStartBlock);
555 Value *VectorLoopRes64 = Builder.
CreateAdd(VectorFoundIndex, Ctz,
"",
556 /*HasNUW=*/true,
/*HasNSW=*/true);
557 return Builder.
CreateTrunc(VectorLoopRes64, ResType);
560Value *LoopIdiomVectorize::createPredicatedFindMismatch(
565 Type *ResType = I32Type;
571 Builder.
Insert(JumpToVectorLoop);
574 VectorLoopStartBlock}});
576 // Set up the first Vector loop block by creating the PHIs, doing the vector
577 // loads and comparing the vectors.
579 auto *VectorIndexPhi = Builder.
CreatePHI(I64Type, 2,
"mismatch_vector_index");
580 VectorIndexPhi->
addIncoming(ExtStart, VectorLoopPreheaderBlock);
582 // Calculate AVL by subtracting the vector loop index from the trip count
583 Value *AVL = Builder.
CreateSub(ExtEnd, VectorIndexPhi,
"avl",
/*HasNUW=*/true,
587 auto *VF = ConstantInt::get(I32Type, ByteCompareVF);
590 {I64Type}, {AVL, VF, Builder.
getTrue()});
591 Value *GepOffset = VectorIndexPhi;
593 Value *VectorLhsGep =
599 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->
getType()},
600 {VectorLhsGep, AllTrueMask, VL},
nullptr,
"lhs.load");
602 Value *VectorRhsGep =
605 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->
getType()},
606 {VectorRhsGep, AllTrueMask, VL},
nullptr,
"rhs.load");
608 Value *VectorMatchCmp =
609 Builder.
CreateICmpNE(VectorLhsLoad, VectorRhsLoad,
"mismatch.cmp");
611 Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->
getType()},
612 {VectorMatchCmp,
/*ZeroIsPoison=*/Builder.
getInt1(
false), AllTrueMask,
616 VectorLoopIncBlock, MismatchFound);
617 Builder.
Insert(VectorEarlyExit);
623 // Increment the index counter and calculate the predicate for the next
624 // iteration of the loop. We branch back to the start of the loop if there
625 // is at least one active lane.
628 Value *NewVectorIndexPhi =
629 Builder.
CreateAdd(VectorIndexPhi, VL64,
"",
630 /*HasNUW=*/true,
/*HasNSW=*/true);
631 VectorIndexPhi->
addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);
633 auto *VectorLoopBranchBack =
635 Builder.
Insert(VectorLoopBranchBack);
641 // If we found a mismatch then we need to calculate which lane in the vector
642 // had a mismatch and add that on to the current loop index.
645 // Add LCSSA phis for CTZ and VectorIndexPhi.
647 CTZLCSSAPhi->
addIncoming(CTZ, VectorLoopStartBlock);
648 auto *VectorIndexLCSSAPhi =
650 VectorIndexLCSSAPhi->
addIncoming(VectorIndexPhi, VectorLoopStartBlock);
653 Value *VectorLoopRes64 = Builder.
CreateAdd(VectorIndexLCSSAPhi, CTZI64,
"",
654 /*HasNUW=*/true,
/*HasNSW=*/true);
655 return Builder.
CreateTrunc(VectorLoopRes64, ResType);
658Value *LoopIdiomVectorize::expandFindMismatch(
664 // Get the arguments and types for the intrinsic.
665 BasicBlock *Preheader = CurLoop->getLoopPreheader();
671 // Split block in the original loop preheader.
672 EndBlock =
SplitBlock(Preheader, PHBranch, DT, LI,
nullptr,
"mismatch_end");
674 // Create the blocks that we're going to need:
675 // 1. A block for checking the zero-extended length exceeds 0
676 // 2. A block to check that the start and end addresses of a given array
677 // lie on the same page.
678 // 3. The vector loop preheader.
679 // 4. The first vector loop block.
680 // 5. The vector loop increment block.
681 // 6. A block we can jump to from the vector loop when a mismatch is found.
682 // 7. The first block of the scalar loop itself, containing PHIs , loads
684 // 8. A scalar loop increment block to increment the PHIs and go back
688 Ctx,
"mismatch_min_it_check", EndBlock->getParent(), EndBlock);
690 // Update the terminator added by SplitBlock to branch to the first block
694 Ctx,
"mismatch_mem_check", EndBlock->getParent(), EndBlock);
697 Ctx,
"mismatch_vec_loop_preheader", EndBlock->getParent(), EndBlock);
700 EndBlock->getParent(), EndBlock);
703 EndBlock->getParent(), EndBlock);
706 EndBlock->getParent(), EndBlock);
709 Ctx,
"mismatch_loop_pre", EndBlock->getParent(), EndBlock);
715 Ctx,
"mismatch_loop_inc", EndBlock->getParent(), EndBlock);
720 // Update LoopInfo with the new vector & scalar loops.
721 auto VectorLoop = LI->AllocateLoop();
722 auto ScalarLoop = LI->AllocateLoop();
724 if (CurLoop->getParentLoop()) {
725 CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI);
726 CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI);
727 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopPreheaderBlock,
729 CurLoop->getParentLoop()->addChildLoop(VectorLoop);
730 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopMismatchBlock, *LI);
731 CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI);
732 CurLoop->getParentLoop()->addChildLoop(ScalarLoop);
734 LI->addTopLevelLoop(VectorLoop);
735 LI->addTopLevelLoop(ScalarLoop);
738 // Add the new basic blocks to their associated loops.
739 VectorLoop->addBasicBlockToLoop(VectorLoopStartBlock, *LI);
740 VectorLoop->addBasicBlockToLoop(VectorLoopIncBlock, *LI);
742 ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI);
743 ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI);
745 // Set up some types and constants that we intend to reuse.
748 // Check the zero-extended iteration count > 0
752 // This check doesn't really cost us very much.
758 LLVMContext::MD_prof,
760 Builder.
Insert(MinItCheckBr);
766 // For each of the arrays, check the start/end addresses are on the same
770 // The early exit in the original loop means that when performing vector
771 // loads we are potentially reading ahead of the early exit. So we could
772 // fault if crossing a page boundary. Therefore, we create runtime memory
773 // checks based on the minimum page size as follows:
774 // 1. Calculate the addresses of the first memory accesses in the loop,
775 // i.e. LhsStart and RhsStart.
776 // 2. Get the last accessed addresses in the loop, i.e. LhsEnd and RhsEnd.
777 // 3. Determine which pages correspond to all the memory accesses, i.e
778 // LhsStartPage, LhsEndPage, RhsStartPage, RhsEndPage.
779 // 4. If LhsStartPage == LhsEndPage and RhsStartPage == RhsEndPage, then
780 // we know we won't cross any page boundaries in the loop so we can
781 // enter the vector loop! Otherwise we fall back on the scalar loop.
800 Value *CombinedPageCmp = Builder.
CreateOr(LhsPageCmp, RhsPageCmp);
802 LoopPreHeaderBlock, VectorLoopPreheaderBlock, CombinedPageCmp);
806 Builder.
Insert(CombinedPageCmpCmpBr);
812 // Set up the vector loop preheader, i.e. calculate initial loop predicate,
813 // zero-extend MaxLen to 64-bits, determine the number of vector elements
814 // processed in each iteration, etc.
817 // At this point we know two things must be true:
819 // 2. ExtMaxLen <= MinPageSize due to the page checks.
820 // Therefore, we know that we can use a 64-bit induction variable that
821 // starts from 0 -> ExtMaxLen and it will not overflow.
822 Value *VectorLoopRes =
nullptr;
823 switch (VectorizeStyle) {
826 createMaskedFindMismatch(Builder, DTU, GEPA, GEPB, ExtStart, ExtEnd);
829 VectorLoopRes = createPredicatedFindMismatch(Builder, DTU, GEPA, GEPB,
839 // Generate code for scalar loop.
850 // Otherwise compare the values
851 // Load bytes from each array and compare them.
863 // If we have a mismatch then exit the loop ...
865 Builder.
Insert(MatchCmpBr);
870 // Have we reached the maximum permitted length for the loop?
872 Value *PhiInc = Builder.
CreateAdd(IndexPhi, ConstantInt::get(ResType, 1),
"",
873 /*HasNUW=*/Index->hasNoUnsignedWrap(),
874 /*HasNSW=*/Index->hasNoSignedWrap());
883 // In the end block we need to insert a PHI node to deal with three cases:
884 // 1. We didn't find a mismatch in the scalar loop, so we return MaxLen.
885 // 2. We exitted the scalar loop early due to a mismatch and need to return
886 // the index that we found.
887 // 3. We didn't find a mismatch in the vector loop, so we return MaxLen.
888 // 4. We exitted the vector loop early due to a mismatch and need to return
889 // the index that we found.
890 Builder.
SetInsertPoint(EndBlock, EndBlock->getFirstInsertionPt());
895 ResPhi->
addIncoming(VectorLoopRes, VectorLoopMismatchBlock);
900 ScalarLoop->verifyLoop();
901 VectorLoop->verifyLoop();
902 if (!VectorLoop->isRecursivelyLCSSAForm(*DT, *LI))
904 if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI))
918 // Insert the byte compare code at the end of the preheader block
919 BasicBlock *Preheader = CurLoop->getLoopPreheader();
926 // Increment the pointer if this was done before the loads in the loop.
931 expandFindMismatch(Builder, DTU, GEPA, GEPB, Index, Start, MaxLen);
933 // Replaces uses of index & induction Phi with intrinsic (we already
934 // checked that the the first instruction of Header is the Phi above).
935 assert(IndPhi->
hasOneUse() &&
"Index phi node has more than one use!");
936 Index->replaceAllUsesWith(ByteCmpRes);
939 "Expected preheader to terminate with an unconditional branch.");
941 // If no mismatch was found, we can jump to the end block. Create a
942 // new basic block for the compare instruction.
945 CmpBB->moveBefore(EndBB);
947 // Replace the branch in the preheader with an always-true conditional branch.
948 // This ensures there is still a reference to the original loop.
955 // Create the branch to either the end or found block depending on the value
956 // returned by the intrinsic.
958 if (FoundBB != EndBB) {
969 // Ensure all Phis in the successors of CmpBB have an incoming value from it.
971 if (EndBB != FoundBB)
974 // The new CmpBB block isn't part of the loop, but will need to be added to
975 // the outer loop if there is one.
976 if (!CurLoop->isOutermost())
977 CurLoop->getParentLoop()->addBasicBlockToLoop(CmpBB, *LI);
980 CurLoop->getParentLoop()->verifyLoop();
981 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))
986bool LoopIdiomVectorize::recognizeFindFirstByte() {
987 // Currently the transformation only works on scalable vector types, although
988 // there is no fundamental reason why it cannot be made to work for fixed
989 // vectors. We also need to know the target's minimum page size in order to
990 // generate runtime memory checks to ensure the vector version won't fault.
991 if (!
TTI->supportsScalableVectors() || !
TTI->getMinPageSize().has_value() ||
995 // Define some constants we need throughout.
999 // We are expecting the four blocks defined below: Header, MatchBB, InnerBB,
1000 // and OuterBB. For now, we will bail our for almost anything else. The Four
1001 // blocks contain one nested loop.
1002 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 4 ||
1003 CurLoop->getSubLoops().size() != 1)
1006 auto *InnerLoop = CurLoop->getSubLoops().front();
1011 // Check instruction counts.
1012 auto LoopBlocks = CurLoop->getBlocks();
1013 if (LoopBlocks[0]->sizeWithoutDebug() > 3 ||
1014 LoopBlocks[1]->sizeWithoutDebug() > 4 ||
1015 LoopBlocks[2]->sizeWithoutDebug() > 3 ||
1016 LoopBlocks[3]->sizeWithoutDebug() > 3)
1019 // Check that no instruction other than IndPhi has outside uses.
1027 // Match the branch instruction in the header. We are expecting an
1028 // unconditional branch to the inner loop.
1031 // %14 = phi ptr [ %24, %OuterBB ], [ %3, %Header.preheader ]
1032 // %15 = load i8, ptr %14, align 1
1033 // br label %MatchBB
1036 !InnerLoop->contains(MatchBB))
1039 // MatchBB should be the entrypoint into the inner loop containing the
1040 // comparison between a search element and a needle.
1043 // %20 = phi ptr [ %7, %Header ], [ %17, %InnerBB ]
1044 // %21 = load i8, ptr %20, align 1
1045 // %22 = icmp eq i8 %15, %21
1046 // br i1 %22, label %ExitSucc, label %InnerBB
1048 Value *LoadSearch, *LoadNeedle;
1056 // We expect outside uses of `IndPhi' in ExitSucc (and only there).
1064 // Match the loads and check they are simple.
1065 Value *Search, *Needle;
1072 // Check we are loading valid characters.
1077 // Pick the vectorisation factor based on CharTy, work out the cost of the
1078 // match intrinsic and decide if we should use it.
1079 // Note: For the time being we assume 128-bit vectors.
1089 // The loads come from two PHIs, each with two incoming values.
1096 // One PHI comes from the outer loop (PSearch), the other one from the inner
1097 // loop (PNeedle). PSearch effectively corresponds to IndPhi.
1098 if (InnerLoop->contains(PSearch))
1100 if (PSearch != &Header->front() || PNeedle != &MatchBB->
front())
1103 // The incoming values of both PHI nodes should be a gep of 1.
1119 // Check the GEPs result type matches `CharTy'.
1126 // InnerBB should increment the address of the needle pointer.
1129 // %17 = getelementptr inbounds i8, ptr %20, i64 1
1130 // %18 = icmp eq ptr %17, %10
1131 // br i1 %18, label %OuterBB, label %MatchBB
1138 !CurLoop->contains(OuterBB))
1141 // OuterBB should increment the address of the search element pointer.
1144 // %24 = getelementptr inbounds i8, ptr %14, i64 1
1145 // %25 = icmp eq ptr %24, %6
1146 // br i1 %25, label %ExitFail, label %Header
1155 if (!CurLoop->isLoopInvariant(SearchStart) ||
1156 !CurLoop->isLoopInvariant(SearchEnd) ||
1157 !CurLoop->isLoopInvariant(NeedleStart) ||
1158 !CurLoop->isLoopInvariant(NeedleEnd))
1161 LLVM_DEBUG(
dbgs() <<
"Found idiom in loop: \n" << *CurLoop <<
"\n\n");
1163 transformFindFirstByte(IndPhi, VF, CharTy, ExitSucc, ExitFail, SearchStart,
1164 SearchEnd, NeedleStart, NeedleEnd);
1168Value *LoopIdiomVectorize::expandFindFirstByte(
1173 // Set up some types and constants that we intend to reuse.
1178 auto *ConstVF = ConstantInt::get(I64Ty, VF);
1180 // Other common arguments.
1181 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1185 // Split block in the original loop preheader.
1186 // SPH is the new preheader to the old scalar loop.
1188 nullptr,
"scalar_preheader");
1190 // Create the blocks that we're going to use.
1192 // We will have the following loops:
1193 // (O) Outer loop where we iterate over the elements of the search array.
1194 // (I) Inner loop where we iterate over the elements of the needle array.
1196 // Overall, the blocks do the following:
1197 // (0) Check if the arrays can't cross page boundaries. If so go to (1),
1198 // otherwise fall back to the original scalar loop.
1199 // (1) Load the search array. Go to (2).
1200 // (2) (a) Load the needle array.
1201 // (b) Splat the first element to the inactive lanes.
1202 // (c) Check if any elements match. If so go to (3), otherwise go to (4).
1203 // (3) Compute the index of the first match and exit.
1204 // (4) Check if we've reached the end of the needle array. If not loop back to
1205 // (2), otherwise go to (5).
1206 // (5) Check if we've reached the end of the search array. If not loop back to
1207 // (1), otherwise exit.
1208 // Blocks (0,3) are not part of any loop. Blocks (1,5) and (2,4) belong to
1209 // the outer and inner loops, respectively.
1222 // Update LoopInfo with the new loops.
1223 auto OuterLoop = LI->AllocateLoop();
1224 auto InnerLoop = LI->AllocateLoop();
1226 if (
auto ParentLoop = CurLoop->getParentLoop()) {
1227 ParentLoop->addBasicBlockToLoop(BB0, *LI);
1228 ParentLoop->addChildLoop(OuterLoop);
1229 ParentLoop->addBasicBlockToLoop(BB3, *LI);
1231 LI->addTopLevelLoop(OuterLoop);
1234 // Add the inner loop to the outer.
1235 OuterLoop->addChildLoop(InnerLoop);
1237 // Add the new basic blocks to the corresponding loops.
1238 OuterLoop->addBasicBlockToLoop(BB1, *LI);
1239 OuterLoop->addBasicBlockToLoop(BB5, *LI);
1240 InnerLoop->addBasicBlockToLoop(BB2, *LI);
1241 InnerLoop->addBasicBlockToLoop(BB4, *LI);
1243 // Update the terminator added by SplitBlock to branch to the first block.
1248 // (0) Check if we could be crossing a page boundary; if so, fallback to the
1249 // old scalar loops. Also create a predicate of VF elements to be used in the
1252 Value *ISearchStart =
1256 Value *INeedleStart =
1261 Builder.
CreateIntrinsic(Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1262 {ConstantInt::get(I64Ty, 0), ConstVF});
1266 Value *SearchStartPage =
1267 Builder.
CreateLShr(ISearchStart, AddrShiftAmt,
"search_start_page");
1268 Value *SearchEndPage =
1269 Builder.
CreateLShr(ISearchEnd, AddrShiftAmt,
"search_end_page");
1270 Value *NeedleStartPage =
1271 Builder.
CreateLShr(INeedleStart, AddrShiftAmt,
"needle_start_page");
1272 Value *NeedleEndPage =
1273 Builder.
CreateLShr(INeedleEnd, AddrShiftAmt,
"needle_end_page");
1274 Value *SearchPageCmp =
1275 Builder.
CreateICmpNE(SearchStartPage, SearchEndPage,
"search_page_cmp");
1276 Value *NeedlePageCmp =
1277 Builder.
CreateICmpNE(NeedleStartPage, NeedleEndPage,
"needle_page_cmp");
1279 Value *CombinedPageCmp =
1280 Builder.
CreateOr(SearchPageCmp, NeedlePageCmp,
"combined_page_cmp");
1287 // (1) Load the search array and branch to the inner loop.
1291 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1294 PredSearch = Builder.
CreateAnd(PredVF, PredSearch,
"search_masked");
1296 CharVTy, Search,
Align(1), PredSearch, Passthru,
"search_load_vec");
1304 // (2.a) Load the needle array.
1306 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
1309 PredNeedle = Builder.
CreateAnd(PredVF, PredNeedle,
"needle_masked");
1311 CharVTy, Needle,
Align(1), PredNeedle, Passthru,
"needle_load_vec");
1313 // (2.b) Splat the first element to the inactive lanes.
1317 Needle0,
"needle0");
1318 LoadNeedle = Builder.
CreateSelect(PredNeedle, LoadNeedle, Needle0Splat,
1323 // (2.c) Test if there's a match.
1325 Intrinsic::experimental_vector_match, {CharVTy, LoadNeedle->
getType()},
1326 {LoadSearch, LoadNeedle, PredSearch},
nullptr,
"match_pred");
1332 // (3) We found a match. Compute the index of its location and exit.
1338 Intrinsic::experimental_cttz_elts, {I64Ty, MatchPred->
getType()},
1339 {MatchPredLCSSA,
/*ZeroIsPoison=*/Builder.
getInt1(
true)},
nullptr,
1342 Builder.
CreateGEP(CharTy, MatchLCSSA, MatchCnt,
"match_res");
1346 // (4) Check if we've reached the end of the needle array.
1349 Builder.
CreateGEP(CharTy, Needle, ConstVF,
"needle_next_vec");
1354 // (5) Check if we've reached the end of the search array.
1357 Builder.
CreateGEP(CharTy, Search, ConstVF,
"search_next_vec");
1363 // Set up the PHI nodes.
1368 // These are needed to retain LCSSA form.
1372 // Ensure all Phis in the successors of BB3/BB5 have an incoming value from
1375 if (ExitSucc != ExitFail)
1379 OuterLoop->verifyLoop();
1380 InnerLoop->verifyLoop();
1381 if (!OuterLoop->isRecursivelyLCSSAForm(*DT, *LI))
1388void LoopIdiomVectorize::transformFindFirstByte(
1392 // Insert the find first byte code at the end of the preheader block.
1393 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1399 expandFindFirstByte(Builder, DTU, VF, CharTy, IndPhi, ExitSucc, ExitFail,
1400 SearchStart, SearchEnd, NeedleStart, NeedleEnd);
1403 "Expected preheader to terminate with an unconditional branch.");
1406 CurLoop->getParentLoop()->verifyLoop();
1407 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static cl::opt< bool > VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false), cl::desc("Verify loops generated Loop Idiom Vectorize Pass."))
static cl::opt< bool > DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden, cl::init(false), cl::desc("Disable Loop Idiom Vectorize Pass."))
static void fixSuccessorPhis(Loop *L, Value *ScalarRes, Value *VectorRes, BasicBlock *SuccBB, BasicBlock *IncBB)
static cl::opt< LoopIdiomVectorizeStyle > LITVecStyle("loop-idiom-vectorize-style", cl::Hidden, cl::desc("The vectorization style for loop idiom transform."), cl::values(clEnumValN(LoopIdiomVectorizeStyle::Masked, "masked", "Use masked vector intrinsics"), clEnumValN(LoopIdiomVectorizeStyle::Predicated, "predicated", "Use VP intrinsics")), cl::init(LoopIdiomVectorizeStyle::Masked))
static cl::opt< bool > DisableFindFirstByte("disable-loop-idiom-vectorize-find-first-byte", cl::Hidden, cl::init(false), cl::desc("Do not convert find-first-byte loop(s)."))
static cl::opt< unsigned > ByteCmpVF("loop-idiom-vectorize-bytecmp-vf", cl::Hidden, cl::desc("The vectorization factor for byte-compare patterns."), cl::init(16))
static cl::opt< bool > DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden, cl::init(false), cl::desc("Proceed with Loop Idiom Vectorize Pass, but do " "not convert byte-compare loop(s)."))
static bool isSimple(Instruction *I)
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Instruction & front() const
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
bool isUnconditional() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Value * getPointerOperand()
Type * getResultElementType() const
unsigned getNumIndices() const
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateVScale(Type *Ty, const Twine &Name="")
Create a call to llvm.vscale.<Ty>().
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Class to represent scalable SIMD vectors.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
br_match m_UnconditionalBr(BasicBlock *&Succ)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
auto m_GEP(const OperandTypes &...Ops)
Matches GetElementPtrInst.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI