1//===- LoopFlatten.cpp - Loop flattening pass------------------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9// This pass flattens pairs nested loops into a single loop.
11// The intention is to optimise loop nests like this, which together access an
14// for (int i = 0; i < N; ++i)
15// for (int j = 0; j < M; ++j)
20// for (int i = 0; i < (N*M); ++i)
23// It can also flatten loops where the induction variables are not used in the
24// loop. This is only worth doing if the induction variables are only used in an
25// expression like i*M+j. If they had any other uses, we would have to insert a
26// div/mod to reconstruct the original values, so this wouldn't be profitable.
28// We also need to prove that N*M will not overflow. The preferred solution is
29// to widen the IV, which avoids overflow checks, so that is tried first. If
30// the IV cannot be widened, then we try to determine that this new tripcount
31// expression won't overflow.
33// Q: Does LoopFlatten use SCEV?
34// Short answer: Yes and no.
37// For this transformation to be valid, we require all uses of the induction
38// variables to be linear expressions of the form i*M+j. The different Loop
39// APIs are used to get some loop components like the induction variable,
40// compare statement, etc. In addition, we do some pattern matching to find the
41// linear expressions and other loop components like the loop increment. The
42// latter are examples of expressions that do use the induction variable, but
43// are safe to ignore when we check all uses to be of the form i*M+j. We keep
44// track of all of this in bookkeeping struct FlattenInfo.
45// We assume the loops to be canonical, i.e. starting at 0 and increment with
46// 1. This makes RHS of the compare the loop tripcount (with the right
47// predicate). We use SCEV to then sanity check that this tripcount matches
48// with the tripcount as computed by SCEV.
50//===----------------------------------------------------------------------===//
81 #define DEBUG_TYPE "loop-flatten"
83 STATISTIC(NumFlattened,
"Number of loops flattened");
87 cl::desc(
"Limit on the cost of instructions that can be repeated due to "
93 cl::desc(
"Assume that the product of the two iteration "
94 "trip counts will never overflow"));
98 cl::desc(
"Widen the loop induction variables, if possible, so "
99 "overflow checks won't reject flattening"));
103 cl::desc(
"Version loops if flattened loop could overflow"));
106// We require all uses of both induction variables to match this pattern:
108// (OuterPHI * InnerTripCount) + InnerPHI
110// I.e., it needs to be a linear expression of the induction variables and the
111// inner loop trip count. We keep track of all different expressions on which
112// checks will be performed in this bookkeeping struct.
115 Loop *OuterLoop =
nullptr;
// The loop pair to be flattened.
116 Loop *InnerLoop =
nullptr;
118 PHINode *InnerInductionPHI =
nullptr;
// These PHINodes correspond to loop
119 PHINode *OuterInductionPHI =
nullptr;
// induction variables, which are
120 // expected to start at zero and
121 // increment by one on each loop.
123 Value *InnerTripCount =
nullptr;
// The product of these two tripcounts
124 Value *OuterTripCount =
nullptr;
// will be the new flattened loop
125 // tripcount. Also used to recognise a
126 // linear expression that will be replaced.
129 // of the form i*M+j that will be
132 BinaryOperator *InnerIncrement =
nullptr;
// Uses of induction variables in
133 BinaryOperator *OuterIncrement =
nullptr;
// loop control statements that
134 BranchInst *InnerBranch =
nullptr;
// are safe to ignore.
136 BranchInst *OuterBranch =
nullptr;
// The instruction that needs to be
137 // updated with new tripcount.
141 bool Widened =
false;
// Whether this holds the flatten info before or after
144 PHINode *NarrowInnerInductionPHI =
nullptr;
// Holds the old/narrow induction
145 PHINode *NarrowOuterInductionPHI =
nullptr;
// phis, i.e. the Phis before IV
146 // has been applied. Used to skip
147 // checks on phi nodes.
149 Value *NewTripCount =
nullptr;
// The tripcount of the flattened loop.
151 FlattenInfo(
Loop *OL,
Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
153 bool isNarrowInductionPhi(PHINode *Phi) {
154 // This can't be the narrow phi if we haven't widened the IV first.
157 return NarrowInnerInductionPHI ==
Phi || NarrowOuterInductionPHI ==
Phi;
159 bool isInnerLoopIncrement(User *U) {
160 return InnerIncrement ==
U;
162 bool isOuterLoopIncrement(User *U) {
163 return OuterIncrement ==
U;
165 bool isInnerLoopTest(User *U) {
166 return InnerBranch->getCondition() ==
U;
169 bool checkOuterInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
170 for (User *U : OuterInductionPHI->users()) {
171 if (isOuterLoopIncrement(U))
174 auto IsValidOuterPHIUses = [&] (
User *
U) ->
bool {
175 LLVM_DEBUG(
dbgs() <<
"Found use of outer induction variable: ";
U->dump());
176 if (!ValidOuterPHIUses.
count(U)) {
177 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
185 for (
auto *K :
V->users()) {
186 if (!IsValidOuterPHIUses(K))
192 if (!IsValidOuterPHIUses(U))
198 bool matchLinearIVUser(User *U,
Value *InnerTripCount,
199 SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
200 LLVM_DEBUG(
dbgs() <<
"Checking linear i*M+j expression for: ";
U->dump());
201 Value *MatchedMul =
nullptr;
202 Value *MatchedItCount =
nullptr;
209 // Matches the same pattern as above, except it also looks for truncs
210 // on the phi, which can be the result of widening the induction variables.
217 // Matches the pattern ptr+i*M+j, with the two additions being done via GEP.
229 // The mul should not have any other uses. Widening may leave trivially dead
230 // uses, which can be ignored.
232 return !isInstructionTriviallyDead(cast<Instruction>(U));
238 // Look through extends if the IV has been widened. Don't look through
239 // extends if we already looked through a trunc.
240 if (Widened && (IsAdd || IsGEP) &&
242 assert(MatchedItCount->
getType() == InnerInductionPHI->getType() &&
243 "Unexpected type mismatch in types after widening");
250 InnerTripCount->dump());
252 if ((IsAdd || IsAddTrunc || IsGEP) && MatchedItCount == InnerTripCount) {
254 ValidOuterPHIUses.
insert(MatchedMul);
255 LinearIVUses.insert(U);
259 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
263 bool checkInnerInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
264 Value *SExtInnerTripCount = InnerTripCount;
269 for (User *U : InnerInductionPHI->users()) {
271 if (isInnerLoopIncrement(U)) {
272 LLVM_DEBUG(
dbgs() <<
"Use is inner loop increment, continuing\n");
276 // After widening the IVs, a trunc instruction might have been introduced,
277 // so look through truncs.
281 U = *
U->user_begin();
284 // If the use is in the compare (which is also the condition of the inner
285 // branch) then the compare has been altered by another transformation e.g
286 // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
287 // a constant. Ignore this use as the compare gets removed later anyway.
288 if (isInnerLoopTest(U)) {
293 if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses)) {
315// Given the RHS of the loop latch compare instruction, verify with SCEV
316// that this is indeed the loop tripcount.
317// TODO: This used to be a straightforward check but has grown to be quite
318// complicated now. It is therefore worth revisiting what the additional
319// benefits are of this (compared to relying on canonical loops and pattern
327 LLVM_DEBUG(
dbgs() <<
"Backedge-taken count is not predictable\n");
331 // Evaluating in the trip count's type can not overflow here as the overflow
332 // checks are performed in checkOverflow, but are first tried to avoid by
334 const SCEV *SCEVTripCount =
336 BackedgeTakenCount->
getType(), L);
339 if (SCEVRHS == SCEVTripCount)
343 const SCEV *BackedgeTCExt =
nullptr;
345 const SCEV *SCEVTripCountExt;
346 // Find the extended backedge taken count and extended trip count using
347 // SCEV. One of these should now match the RHS of the compare.
351 if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
356 // If the RHS of the compare is equal to the backedge taken count we need
357 // to add one to get the trip count.
358 if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
362 IterationInstructions);
366 // If the RHS isn't a constant then check that the reason it doesn't match
367 // the SCEV trip count is because the RHS is a ZExt or SExt instruction
368 // (and take the trip count to be the RHS).
374 if (!TripCountInst) {
379 SE->
getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
380 LLVM_DEBUG(
dbgs() <<
"Could not find valid extended trip count\n");
386// Finds the induction variable, increment and trip count for a simple loop that
392 LLVM_DEBUG(
dbgs() <<
"Finding components of loop: " << L->getName() <<
"\n");
394 if (!L->isLoopSimplifyForm()) {
399 // Currently, to simplify the implementation, the Loop induction variable must
400 // start at zero and increment with a step size of one.
401 if (!L->isCanonical(*SE)) {
406 // There must be exactly one exiting block, and it must be the same at the
409 if (L->getExitingBlock() != Latch) {
414 // Find the induction PHI. If there is no induction PHI, we can't do the
415 // transformation. TODO: could other variables trigger this? Do we have to
416 // search for the best one?
417 InductionPHI = L->getInductionVariable(*SE);
432 // Find Compare and make sure it is valid. getLatchCmpInst checks that the
433 // back branch of the latch is conditional.
434 ICmpInst *Compare = L->getLatchCmpInst();
435 if (!Compare || !IsValidPredicate(Compare->getUnsignedPredicate()) ||
436 Compare->hasNUsesOrMore(2)) {
441 IterationInstructions.
insert(BackBranch);
443 IterationInstructions.
insert(Compare);
446 // Find increment and trip count.
447 // There are exactly 2 incoming values to the induction phi; one from the
448 // pre-header and one from the latch. The incoming latch value is the
449 // increment variable.
457 // The trip count is the RHS of the compare. If this doesn't match the trip
458 // count computed by SCEV then this is because the trip count variable
459 // has been widened so the types don't match, or because it is a constant and
460 // another transformation has changed the compare (e.g. icmp ult %inc,
461 // tripcount -> icmp ult %j, tripcount-1), or both.
462 Value *
RHS = Compare->getOperand(1);
469 // All PHIs in the inner and outer headers must either be:
470 // - The induction PHI, which we are going to rewrite as one induction in
471 // the new loop. This is already checked by findLoopComponents.
472 // - An outer header PHI with all incoming values from outside the loop.
473 // LoopSimplify guarantees we have a pre-header, so we don't need to
474 // worry about that here.
475 // - Pairs of PHIs in the inner and outer headers, which implement a
476 // loop-carried dependency that will still be valid in the new loop. To
477 // be valid, this variable must be modified only in the inner loop.
479 // The set of PHI nodes in the outer loop header that we know will still be
480 // valid after the transformation. These will not need to be modified (with
481 // the exception of the induction variable), but we do need to check that
482 // there are no unsafe PHI nodes.
484 SafeOuterPHIs.
insert(FI.OuterInductionPHI);
486 // Check that all PHI nodes in the inner loop header match one of the valid
489 // The induction PHIs break these rules, and that's OK because we treat
490 // them specially when doing the transformation.
491 if (&InnerPHI == FI.InnerInductionPHI)
493 if (FI.isNarrowInductionPhi(&InnerPHI))
496 // Each inner loop PHI node must have two incoming values/blocks - one
497 // from the pre-header, and one from the latch.
498 assert(InnerPHI.getNumIncomingValues() == 2);
499 Value *PreHeaderValue =
502 InnerPHI.getIncomingValueForBlock(FI.InnerLoop->
getLoopLatch());
504 // The incoming value from the outer loop must be the PHI node in the
505 // outer loop header, with no modifications made in the top of the outer
513 // The other incoming value must come from the inner loop, without any
514 // modifications in the tail end of the outer loop. We are in LCSSA form,
515 // so this will actually be a PHI in the inner loop's exit block, which
516 // only uses values from inside the inner loop.
524 // The value used by the LCSSA PHI must be the same one that the inner
528 dbgs() <<
"LCSSA PHI incoming value does not match latch value\n");
535 SafeOuterPHIs.
insert(OuterPHI);
536 FI.InnerPHIsToTransform.insert(&InnerPHI);
540 if (FI.isNarrowInductionPhi(&OuterPHI))
542 if (!SafeOuterPHIs.
count(&OuterPHI)) {
543 LLVM_DEBUG(
dbgs() <<
"found unsafe PHI in outer loop: "; OuterPHI.dump());
556 // Check for instructions in the outer but not inner loop. If any of these
557 // have side-effects then this transformation is not legal, and if there is
558 // a significant amount of code here which can't be optimised out that it's
559 // not profitable (as these instructions would get executed for each
560 // iteration of the inner loop).
569 LLVM_DEBUG(
dbgs() <<
"Cannot flatten because instruction may have "
574 // The execution count of the outer loop's iteration instructions
575 // (increment, compare and branch) will be increased, but the
576 // equivalent instructions will be removed from the inner loop, so
577 // they make a net difference of zero.
578 if (IterationInstructions.
count(&
I))
580 // The unconditional branch to the inner loop's header will turn into
581 // a fall-through, so adds no cost.
586 // Multiplies of the outer iteration variable and inner iteration
587 // count will be optimised out.
594 RepeatedInstrCost += Cost;
598 LLVM_DEBUG(
dbgs() <<
"Cost of instructions that will be repeated: "
599 << RepeatedInstrCost <<
"\n");
600 // Bail out if flattening the loops would cause instructions in the outer
601 // loop but not in the inner loop to be executed extra times.
603 LLVM_DEBUG(
dbgs() <<
"checkOuterLoopInsts: not profitable, bailing.\n");
613// We require all uses of both induction variables to match this pattern:
615// (OuterPHI * InnerTripCount) + InnerPHI
617// Any uses of the induction variables not matching that pattern would
618// require a div/mod to reconstruct in the flattened loop, so the
619// transformation wouldn't be profitable.
621 // Check that all uses of the inner loop's induction variable match the
622 // expected pattern, recording the uses of the outer IV.
624 if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
627 // Check that there are no uses of the outer IV other than the ones found
628 // as part of the pattern above.
629 if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
633 dbgs() <<
"Found " << FI.LinearIVUses.size()
634 <<
" value(s) that can be replaced:\n";
635 for (
Value *V : FI.LinearIVUses) {
642// Return an OverflowResult dependant on if overflow of the multiplication of
643// InnerTripCount and OuterTripCount can be assumed not to happen.
649 // For debugging/testing.
653 // Check if the multiply could not overflow due to known ranges of the
656 FI.InnerTripCount, FI.OuterTripCount,
663 for (
Value *GEPUser :
GEP->users()) {
670 // The IV is used as the operand of a GEP which dominates the loop
671 // latch, and the IV is at least as wide as the address space of the
672 // GEP. In this case, the GEP would wrap around the address space
673 // before the IV increment wraps, which would be UB.
674 if (
GEP->isInBounds() &&
675 GEPOperand->getType()->getIntegerBitWidth() >=
676 DL.getPointerTypeSizeInBits(
GEP->getType())) {
678 dbgs() <<
"use of linear IV would be UB if overflow occurred: ";
686 // Check if any IV user is, or is used by, a GEP that would cause UB if the
687 // multiply overflows.
688 for (
Value *V : FI.LinearIVUses) {
690 if (
GEP->getNumIndices() == 1 && CheckGEP(
GEP,
GEP->getOperand(1)))
694 if (CheckGEP(
GEP, V))
706 FI.InnerInductionPHI, FI.InnerTripCount,
707 FI.InnerIncrement, FI.InnerBranch, SE, FI.Widened))
710 FI.OuterInductionPHI, FI.OuterTripCount,
711 FI.OuterIncrement, FI.OuterBranch, SE, FI.Widened))
714 // Both of the loop trip count values must be invariant in the outer loop
715 // (non-instructions are all inherently invariant).
728 // FIXME: it should be possible to handle different types correctly.
729 if (FI.InnerInductionPHI->
getType() != FI.OuterInductionPHI->
getType())
735 // Find the values in the loop that can be replaced with the linearized
736 // induction variable, and check that there are no other uses of the inner
737 // or outer induction variable. If there were, we could still do this
738 // transformation, but we'd have to insert a div/mod to calculate the
739 // original IVs, so it wouldn't be profitable.
752 LLVM_DEBUG(
dbgs() <<
"Checks all passed, doing the transformation\n");
758 Remark <<
"Flattened into outer loop";
762 if (!FI.NewTripCount) {
763 FI.NewTripCount = BinaryOperator::CreateMul(
764 FI.InnerTripCount, FI.OuterTripCount,
"flatten.tripcount",
767 FI.NewTripCount->
dump());
770 // Fix up PHI nodes that take values from the inner loop back-edge, which
771 // we are about to remove.
774 // The old Phi will be optimised away later, but for now we can't leave
775 // leave it in an invalid state, so are updating them too.
779 // Modify the trip count of the outer loop to be the product of the two
783 // Replace the inner loop backedge with an unconditional branch to the exit.
789 Term->eraseFromParent();
791 // Update the DomTree and MemorySSA.
796 // Replace all uses of the polynomial calculated from the two induction
797 // variables with the one new one.
799 for (
Value *V : FI.LinearIVUses) {
800 Value *OuterValue = FI.OuterInductionPHI;
802 OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->
getType(),
806 // Replace the GEP with one that uses OuterValue as the offset.
809 // When the base of the GEP doesn't dominate the outer induction phi then
810 // we need to insert the new GEP where the old GEP was.
814 Builder.CreateGEP(
GEP->getSourceElementType(),
Base, OuterValue,
816 GEP->isInBounds() && InnerGEP->isInBounds());
824 // Tell LoopInfo, SCEV and the pass manager that the inner loop has been
825 // deleted, and invalidate any outer loop information.
829 U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->
getName());
830 LI->
erase(FI.InnerLoop);
832 // Increment statistic value.
848 auto &
DL = M->getDataLayout();
849 auto *InnerType = FI.InnerInductionPHI->
getType();
850 auto *OuterType = FI.OuterInductionPHI->
getType();
851 unsigned MaxLegalSize =
DL.getLargestLegalIntTypeSizeInBits();
852 auto *MaxLegalType =
DL.getLargestLegalIntType(M->getContext());
854 // If both induction types are less than the maximum legal integer width,
855 // promote both to the widest type available so we know calculating
856 // (OuterTripCount * InnerTripCount) as the new trip count is safe.
857 if (InnerType != OuterType ||
858 InnerType->getScalarSizeInBits() >= MaxLegalSize ||
859 MaxLegalType->getScalarSizeInBits() <
860 InnerType->getScalarSizeInBits() * 2) {
867 unsigned ElimExt = 0;
868 unsigned Widened = 0;
872 createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts, ElimExt, Widened,
873 true /* HasGuards */,
true /* UsePostIncrementRanges */);
883 if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType,
false},
Deleted))
885 // Add the narrow phi to list, so that it will be adjusted later when the
886 // the transformation is performed.
888 FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
890 if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType,
false},
Deleted))
893 assert(Widened &&
"Widened IV expected");
896 // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
897 FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
898 FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
900 // After widening, rediscover all the loop components.
910 dbgs() <<
"Loop flattening running on outer loop "
918 // Check if we can widen the induction variables to avoid overflow checks.
921 // It can happen that after widening of the IV, flattening may not be
922 // possible/happening, e.g. when it is deemed unprofitable. So bail here if
924 // TODO: IV widening without performing the actual flattening transformation
925 // is not ideal. While this codegen change should not matter much, it is an
926 // unnecessary change which is better to avoid. It's unlikely this happens
927 // often, because if it's unprofitibale after widening, it should be
928 // unprofitabe before widening as checked in the first round of checks. But
929 // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
930 // relaxed. Because this is making a code change (the IV widening, but not
931 // the flattening), we return true here.
932 if (FI.Widened && !CanFlatten)
935 // If we have widened and can perform the transformation, do that here.
939 // Otherwise, if we haven't widened the IV, check if the new iteration
940 // variable might overflow. In this case, we need to version the loop, and
941 // select the original version at runtime if the iteration space is too
946 LLVM_DEBUG(
dbgs() <<
"Multiply would always overflow, so not profitable\n");
952 LLVM_DEBUG(
dbgs() <<
"Multiply might overflow, not flattening\n");
954 }
else if (!
DL.isLegalInteger(
956 // If the trip count type isn't legal then it won't be possible to check
957 // for overflow using only a single multiply instruction, so don't
960 dbgs() <<
"Can't check overflow efficiently, not flattening\n");
963 LLVM_DEBUG(
dbgs() <<
"Multiply might overflow, versioning loop\n");
965 // Version the loop. The overflow check isn't a runtime pointer check, so we
966 // pass an empty list of runtime pointer checks, causing LoopVersioning to
967 // emit 'false' as the branch condition, and add our own check afterwards.
973 // Check for overflow by calculating the new tripcount using
974 // umul_with_overflow and then checking if it overflowed.
977 "Expected LoopVersioning to generate a conditional branch");
979 "Expected branch condition to be false");
982 Intrinsic::umul_with_overflow, FI.OuterTripCount->
getType(),
983 {FI.OuterTripCount, FI.InnerTripCount},
984 /*FMFSource=*/nullptr,
"flatten.mul");
985 FI.NewTripCount = Builder.CreateExtractValue(
Call, 0,
"flatten.tripcount");
986 Value *Overflow = Builder.CreateExtractValue(
Call, 1,
"flatten.overflow");
989 LLVM_DEBUG(
dbgs() <<
"Multiply cannot overflow, modifying loop in-place\n");
1001 std::optional<MemorySSAUpdater> MSSAU;
1008 // The loop flattening pass requires loops to be
1009 // in simplified form, and also needs LCSSA. Running
1010 // this pass will simplify all loops that contain inner loops,
1011 // regardless of whether anything ends up being flattened.
1018 FlattenInfo FI(OuterLoop, InnerLoop);
1021 MSSAU ? &*MSSAU :
nullptr, LAIM.
getInfo(*OuterLoop));
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
static bool verifyTripCount(Value *RHS, Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
static bool setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment, SmallPtrSetImpl< Instruction * > &IterationInstructions)
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU)
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI, LPMUpdater *U, MemorySSAUpdater *MSSAU, const LoopAccessInfo &LAI)
static bool checkIVUsers(FlattenInfo &FI)
static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
static cl::opt< bool > VersionLoops("loop-flatten-version-loops", cl::Hidden, cl::init(true), cl::desc("Version loops if flattened loop could overflow"))
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened)
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI)
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "trip counts will never overflow"))
static bool checkOuterLoopInsts(FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
A parsed version of the target data layout string in and methods for querying it.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LLVM_ABI void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
This class represents a loop nest and can be used to query its properties.
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
void versionLoop()
Performs the CFG manipulation part of versioning the loop including the DominatorTree and LoopInfo up...
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
An analysis that produces MemorySSA for a function.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
LLVM_ABI Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class uses information about analyze scalars to rewrite expressions in canonical form.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
const ParentTy * getParent() const
self_iterator getIterator()
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_GEP(const OperandTypes &...Ops)
Matches GetElementPtrInst.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
LLVM_ABI bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, const Loop *L)
Return true if this function can prove that the instruction I is executed for every iteration of the ...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const SimplifyQuery &SQ, bool IsNSW=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
@ Increment
Incrementally increasing token ID.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Collect information about induction variables that are used by sign/zero extend operations.