1//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9// Rewrite call/invoke instructions so as to make potential relocations
10// performed by the garbage collector explicit in the IR.
12//===----------------------------------------------------------------------===//
76 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
80// Print the liveset found at the insert location
86// Print out the base pointers for debugging
90// Cost threshold measuring when it is profitable to rematerialize value instead
96#ifdef EXPENSIVE_CHECKS
113/// The IR fed into RewriteStatepointsForGC may have had attributes and
114/// metadata implying dereferenceability that are no longer valid/correct after
115/// RewriteStatepointsForGC has run. This is because semantically, after
116/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
117/// heap. stripNonValidData (conservatively) restores
118/// correctness by erasing all attributes in the module that externally imply
119/// dereferenceability. Similar reasoning also applies to the noalias
120/// attributes and metadata. gc.statepoint can touch the entire heap including
122/// Apart from attributes and metadata, we also remove instructions that imply
123/// constant physical memory: llvm.invariant.start.
126// Find the GC strategy for a function, or null if it doesn't have one.
136 // Nothing to do for declarations.
137 if (
F.isDeclaration() ||
F.empty())
140 // Policy choice says not to rewrite - the most common reason is that we're
141 // compiling code without a GCStrategy.
153 // stripNonValidData asserts that shouldRewriteStatepointsIn
154 // returns true for at least one function in the module. Since at least
155 // one function changed, we know that the precondition is satisfied.
166struct GCPtrLivenessData {
167 /// Values defined in this block.
170 /// Values used in this block (and thus live); does not included values
171 /// killed within this block.
174 /// Values live into this basic block (i.e. used by any
175 /// instruction in this basic block or ones reachable from here)
178 /// Values live out of this basic block (i.e. live into
179 /// any successor block)
183// The type of the internal cache used inside the findBasePointers family
184// of functions. From the callers perspective, this is an opaque type and
185// should not be inspected.
187// In the actual implementation this caches two relations:
188// - The base relation itself (i.e. this pointer is based on that one)
189// - The base defining value relation (i.e. before base_phi insertion)
190// Generally, after the execution of a full findBasePointer call, only the
191// base relation will remain. Internally, we add a mixture of the two
192// types, then update all the second type to the first type
197using RematerializedValueMapTy =
200struct PartiallyConstructedSafepointRecord {
201 /// The set of values known to be live across this safepoint
202 StatepointLiveSetTy LiveSet;
204 /// The *new* gc.statepoint instruction itself. This produces the token
205 /// that normal path gc.relocates and the gc.result are tied to.
208 /// Instruction to which exceptional gc relocates are attached
209 /// Makes it easier to iterate through them during relocationViaAlloca.
212 /// Record live values we are rematerialized instead of relocating.
213 /// They are not included into 'LiveSet' field.
214 /// Maps rematerialized copy to it's original value.
215 RematerializedValueMapTy RematerializedValues;
218struct RematerizlizationCandidateRecord {
219 // Chain from derived pointer to base.
228}
// end anonymous namespace
231 std::optional<OperandBundleUse> DeoptBundle =
236 "Found non-leaf call without deopt info!");
240 return DeoptBundle->Inputs;
243/// Compute the live-in set for every basic block in the function
247/// Given results from the dataflow liveness computation, find the set of live
248/// Values at a particular instruction.
253 assert(GC &&
"GC Strategy for isGCPointerType cannot be null");
258 // conservative - same as StatepointLowering
259 return GC->isGCManagedPointer(
T).value_or(
true);
262// Return true if this type is one which a) is a gc pointer or contains a GC
263// pointer and b) is of a type this code expects to encounter as a live value.
264// (The insertion code will assert that a type which matches (a) and not (b)
265// is not encountered.)
267 // We fully support gc pointers
270 // We partially support vectors of gc pointers. The code will assert if it
271 // can't handle something.
279/// Returns true if this type contains a gc pointer whether we know how to
280/// handle that type or not.
290 [GC](
Type *Ty) { return containsGCPtrType(Ty, GC); });
294// Returns true if this is a type which a) is a gc pointer or contains a GC
295// pointer and b) is of a type which the code doesn't expect (i.e. first class
296// aggregates). Used to trip assertions.
302// Return the name of the value suffixed with the provided value, or if the
303// value didn't have a name, the default value specified.
306 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.
str();
309// Conservatively identifies any definitions which might be live at the
310// given instruction. The analysis is performed immediately before the
311// given instruction. Values defined by that instruction are not considered
312// live. Values used by that instruction are considered live.
315 PartiallyConstructedSafepointRecord &Result,
GCStrategy *GC) {
316 StatepointLiveSetTy LiveSet;
320 dbgs() <<
"Live Variables:\n";
321 for (
Value *V : LiveSet)
322 dbgs() <<
" " << V->getName() <<
" " << *V <<
"\n";
325 dbgs() <<
"Safepoint For: " <<
Call->getCalledOperand()->getName() <<
"\n";
326 dbgs() <<
"Number live values: " << LiveSet.size() <<
"\n";
328 Result.LiveSet = LiveSet;
331/// Returns true if V is a known base.
334/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
335/// in the cache before.
337 IsKnownBaseMapTy &KnownBases);
340 IsKnownBaseMapTy &KnownBases);
342/// Return a base defining value for the 'Index' element of the given vector
343/// instruction 'I'. If Index is null, returns a BDV for the entire vector
344/// 'I'. As an optimization, this method will try to determine when the
345/// element is known to already be a base pointer. If this can be established,
346/// the second value in the returned pair will be true. Note that either a
347/// vector or a pointer typed value can be returned. For the former, the
348/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
349/// If the later, the return pointer is a BDV (or possibly a base) for the
350/// particular element in 'I'.
352 IsKnownBaseMapTy &KnownBases) {
353 // Each case parallels findBaseDefiningValue below, see that code for
354 // detailed motivation.
356 auto Cached = Cache.find(
I);
357 if (Cached != Cache.end())
358 return Cached->second;
361 // An incoming argument to the function is a base pointer
368 // Base of constant vector consists only of constant null pointers.
369 // For reasoning see similar case inside 'findBaseDefiningValue' function.
383 // We don't know whether this vector contains entirely base pointers or
384 // not. To be conservatively correct, we treat it as a BDV and will
385 // duplicate code as needed to construct a parallel vector of bases.
392 // We don't know whether this vector contains entirely base pointers or
393 // not. To be conservatively correct, we treat it as a BDV and will
394 // duplicate code as needed to construct a parallel vector of bases.
395 // TODO: There a number of local optimizations which could be applied here
396 // for particular sufflevector patterns.
402 // The behavior of getelementptr instructions is the same for vector and
403 // non-vector data types.
411 // The behavior of freeze instructions is the same for vector and
412 // non-vector data types.
419 // If the pointer comes through a bitcast of a vector of pointers to
420 // a vector of another type of pointer, then look through the bitcast
427 // We assume that functions in the source language only return base
428 // pointers. This should probably be generalized via attributes to support
429 // both source language and internal functions.
436 // A PHI or Select is a base defining value. The outer findBasePointer
437 // algorithm is responsible for constructing a base value for this BDV.
439 "unknown vector instruction - no base found for vector element");
445/// Helper function for findBasePointer - Will return a value which either a)
446/// defines the base pointer for the input, b) blocks the simple search
447/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
448/// from pointer to vector type or back.
450 IsKnownBaseMapTy &KnownBases) {
451 assert(
I->getType()->isPtrOrPtrVectorTy() &&
452 "Illegal to ask for the base pointer of a non-pointer type");
453 auto Cached = Cache.find(
I);
454 if (Cached != Cache.end())
455 return Cached->second;
457 if (
I->getType()->isVectorTy())
461 // An incoming argument to the function is a base pointer
462 // We should have never reached here if this argument isn't an gc value
469 // We assume that objects with a constant base (e.g. a global) can't move
470 // and don't need to be reported to the collector because they are always
471 // live. Besides global references, all kinds of constants (e.g. undef,
472 // constant expressions, null pointers) can be introduced by the inliner or
473 // the optimizer, especially on dynamically dead paths.
474 // Here we treat all of them as having single null base. By doing this we
475 // trying to avoid problems reporting various conflicts in a form of
476 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
477 // See constant.ll file for relevant test cases.
485 // inttoptrs in an integral address space are currently ill-defined. We
486 // treat them as defining base pointers here for consistency with the
487 // constant rule above and because we don't really have a better semantic
488 // to give them. Note that the optimizer is always free to insert undefined
489 // behavior on dynamically dead paths as well.
497 Value *Def = CI->stripPointerCasts();
498 // If stripping pointer casts changes the address space there is an
499 // addrspacecast in between.
502 "unsupported addrspacecast");
503 // If we find a cast instruction here, it means we've found a cast which is
504 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
505 // handle int->ptr conversion.
513 // The value loaded is an gc base itself
520 // The base of this GEP is the base
534 switch (
II->getIntrinsicID()) {
536 // fall through to general call handling
538 case Intrinsic::experimental_gc_statepoint:
540 case Intrinsic::experimental_gc_relocate:
541 // Rerunning safepoint insertion after safepoints are already
542 // inserted is not supported. It could probably be made to work,
543 // but why are you doing this? There's no good reason.
545 case Intrinsic::gcroot:
546 // Currently, this mechanism hasn't been extended to work with gcroot.
547 // There's no reason it couldn't be, but I haven't thought about the
548 // implications much.
550 "interaction with the gcroot mechanism is not supported");
551 case Intrinsic::experimental_gc_get_pointer_base:
557 // We assume that functions in the source language only return base
558 // pointers. This should probably be generalized via attributes to support
559 // both source language and internal functions.
566 // TODO: I have absolutely no idea how to implement this part yet. It's not
567 // necessarily hard, I just haven't really looked at it yet.
571 // A CAS is effectively a atomic store and load combined under a
572 // predicate. From the perspective of base pointers, we just treat it
581 "Only Xchg is allowed for pointer values");
582 // A RMW Xchg is a combined atomic load and store, so we can treat the
583 // loaded value as a base pointer.
589 // The aggregate ops. Aggregates can either be in the heap or on the
590 // stack, but in either case, this is simply a field load. As a result,
591 // this is a defining definition of the base just like a load is.
598 // We should never see an insert vector since that would require we be
599 // tracing back a struct value not a pointer value.
601 "Base pointer for a struct is meaningless");
603 // This value might have been generated by findBasePointer() called when
604 // substituting gc.get.pointer.base() intrinsic.
610 // An extractelement produces a base result exactly when it's input does.
611 // We may need to insert a parallel instruction to extract the appropriate
612 // element out of the base vector corresponding to the input. Given this,
613 // it's analogous to the phi and select case even though it's not a merge.
615 // Note: There a lot of obvious peephole cases here. This are deliberately
616 // handled after the main base pointer inference algorithm to make writing
617 // test cases to exercise that code easier.
620 // The last two cases here don't return a base pointer. Instead, they
621 // return a value which dynamically selects from among several base
622 // derived pointers (each with it's own base potentially). It's the job of
623 // the caller to resolve these.
625 "missing instruction case in findBaseDefiningValue");
629/// Returns the base defining value for this value.
631 IsKnownBaseMapTy &KnownBases) {
632 if (!Cache.contains(
I)) {
636 << Cache[
I]->getName() <<
", is known base = "
637 << KnownBases[
I] <<
"\n");
640 assert(KnownBases.contains(Cache[
I]) &&
641 "Cached value must be present in known bases map");
645/// Return a base pointer for this value if known. Otherwise, return it's
646/// base defining value.
648 IsKnownBaseMapTy &KnownBases) {
650 auto Found = Cache.find(Def);
651 if (Found != Cache.end()) {
652 // Either a base-of relation, or a self reference. Caller must check.
653 return Found->second;
655 // Only a BDV available
660/// This value is a base pointer that is not generated by RS4GC, i.e. it already
661/// exists in the code.
663 // no recursion possible
671 auto It = KnownBases.find(V);
672 assert(It != KnownBases.end() &&
"Value not present in the map");
677 IsKnownBaseMapTy &KnownBases) {
679 auto It = KnownBases.find(V);
680 if (It != KnownBases.end())
681 assert(It->second == IsKnownBase &&
"Changing already present value");
683 KnownBases[V] = IsKnownBase;
686// Returns true if First and Second values are both scalar or both vector.
694/// Models the state of a single base defining value in the findBasePointer
695/// algorithm for determining where a new instruction is needed to propagate
696/// the base of this BDV.
700 // Starting state of lattice
702 // Some specific base value -- does *not* mean that instruction
703 // propagates the base of the object
704 // ex: gep %arg, 16 -> %arg is the base value
706 // Need to insert a node to represent a merge.
714 explicit BDVState(
Value *OriginalValue)
715 : OriginalValue(OriginalValue) {}
716 explicit BDVState(
Value *OriginalValue, StatusTy Status,
Value *BaseValue =
nullptr)
717 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) {
721 StatusTy getStatus()
const {
return Status; }
722 Value *getOriginalValue()
const {
return OriginalValue; }
723 Value *getBaseValue()
const {
return BaseValue; }
725 bool isBase()
const {
return getStatus() ==
Base; }
726 bool isUnknown()
const {
return getStatus() ==
Unknown; }
727 bool isConflict()
const {
return getStatus() == Conflict; }
729 // Values of type BDVState form a lattice, and this function implements the
732 void meet(
const BDVState &
Other) {
733 auto markConflict = [&]() {
734 Status = BDVState::Conflict;
737 // Conflict is a final state.
740 // if we are not known - just take other state.
742 Status =
Other.getStatus();
743 BaseValue =
Other.getBaseValue();
747 assert(isBase() &&
"Unknown state");
748 // If other is unknown - just keep our state.
749 if (
Other.isUnknown())
751 // If other is conflict - it is a final state.
752 if (
Other.isConflict())
753 return markConflict();
754 // Other is base as well.
756 // If bases are different - Conflict.
757 if (getBaseValue() !=
Other.getBaseValue())
758 return markConflict();
759 // We are identical, do nothing.
763 return OriginalValue ==
Other.OriginalValue && BaseValue ==
Other.BaseValue &&
764 Status ==
Other.Status;
767 bool operator!=(
const BDVState &other)
const {
return !(*
this == other); }
775 void print(raw_ostream &OS)
const {
776 switch (getStatus()) {
787 OS <<
" (base " << getBaseValue() <<
" - "
788 << (getBaseValue() ? getBaseValue()->getName() :
"nullptr") <<
")"
789 <<
" for " << OriginalValue->getName() <<
":";
793 AssertingVH<Value> OriginalValue;
// instruction this state corresponds to
795 AssertingVH<Value> BaseValue =
nullptr;
// Non-null only if Status == Base.
798}
// end anonymous namespace
807/// For a given value or instruction, figure out what base ptr its derived from.
808/// For gc objects, this is simply itself. On success, returns a value which is
809/// the base pointer. (This is reliable and can be used for relocation.) On
810/// failure, returns nullptr.
812 IsKnownBaseMapTy &KnownBases) {
818 // Here's the rough algorithm:
819 // - For every SSA value, construct a mapping to either an actual base
820 // pointer or a PHI which obscures the base pointer.
821 // - Construct a mapping from PHI to unknown TOP state. Use an
822 // optimistic algorithm to propagate base pointer information. Lattice
827 // When algorithm terminates, all PHIs will either have a single concrete
828 // base or be in a conflict state.
829 // - For every conflict, insert a dummy PHI node without arguments. Add
830 // these to the base[Instruction] = BasePtr mapping. For every
831 // non-conflict, add the actual base.
832 // - For every conflict, add arguments for the base[a] of each input
835 // Note: A simpler form of this would be to add the conflict form of all
836 // PHIs without running the optimistic algorithm. This would be
837 // analogous to pessimistic data flow and would likely lead to an
838 // overall worse solution.
841 auto isExpectedBDVType = [](
Value *BDV) {
848 // Once populated, will contain a mapping from each potentially non-base BDV
849 // to a lattice value (described above) which corresponds to that BDV.
850 // We use the order of insertion (DFS over the def/use graph) to provide a
851 // stable deterministic ordering for visiting DenseMaps (which are unordered)
852 // below. This is important for deterministic compilation.
856 auto VerifyStates = [&]() {
857 for (
auto &Entry : States) {
858 assert(Entry.first == Entry.second.getOriginalValue());
863 auto visitBDVOperands = [](
Value *BDV, std::function<void (
Value*)>
F) {
865 for (
Value *InVal : PN->incoming_values())
868 F(
SI->getTrueValue());
869 F(
SI->getFalseValue());
871 F(EE->getVectorOperand());
873 F(IE->getOperand(0));
874 F(IE->getOperand(1));
876 // For a canonical broadcast, ignore the undef argument
877 // (without this, we insert a parallel base shuffle for every broadcast)
878 F(SV->getOperand(0));
879 if (!SV->isZeroEltSplat())
880 F(SV->getOperand(1));
887 // Recursively fill in all base defining values reachable from the initial
888 // one for which we don't already know a definite base value for
892 States.
insert({Def, BDVState(Def)});
893 while (!Worklist.
empty()) {
897 auto visitIncomingValue = [&](
Value *InVal) {
900 // Known bases won't need new instructions introduced and can be
901 // ignored safely. However, this can only be done when InVal and Base
902 // are both scalar or both vector. Otherwise, we need to find a
903 // correct BDV for InVal, by creating an entry in the lattice
906 assert(isExpectedBDVType(
Base) &&
"the only non-base values "
907 "we see should be base defining values");
912 visitBDVOperands(Current, visitIncomingValue);
919 for (
const auto &Pair : States) {
920 LLVM_DEBUG(
dbgs() <<
" " << Pair.second <<
" for " << *Pair.first <<
"\n");
924 // Iterate forward through the value graph pruning any node from the state
925 // list where all of the inputs are base pointers. The purpose of this is to
926 // reuse existing values when the derived pointer we were asked to materialize
927 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
932 for (
auto Pair : States) {
933 Value *BDV = Pair.first;
934 auto canPruneInput = [&](
Value *V) {
935 // If the input of the BDV is the BDV itself we can prune it. This is
936 // only possible if the BDV is a PHI node.
937 if (V->stripPointerCasts() == BDV)
940 if (V->stripPointerCasts() != VBDV)
942 // The assumption is that anything not in the state list is
943 // propagates a base pointer.
944 return States.count(VBDV) == 0;
947 bool CanPrune =
true;
948 visitBDVOperands(BDV, [&](
Value *
Op) {
949 CanPrune = CanPrune && canPruneInput(
Op);
956 // Cache the fact V is it's own base for later usage.
961 // Did we manage to prove that Def itself must be a base pointer?
962 if (!States.
count(Def))
965 // Return a phi state for a base defining value. We'll generate a new
966 // base state for known bases and expect to find a cached state otherwise.
968 auto I = States.
find(BaseValue);
969 if (
I != States.
end())
972 return BDVState(BaseValue, BDVState::Base, BaseValue);
975 // Even though we have identified a concrete base (or a conflict) for all live
976 // pointers at this point, there are cases where the base is of an
977 // incompatible type compared to the original instruction. We conservatively
978 // mark those as conflicts to ensure that corresponding BDVs will be generated
979 // in the next steps.
981 // this is a rather explicit check for all cases where we should mark the
982 // state as a conflict to force the latter stages of the algorithm to emit
984 // TODO: in many cases the instructions emited for the conflicting states
985 // will be identical to the I itself (if the I's operate on their BDVs
986 // themselves). We should exploit this, but can't do it here since it would
987 // break the invariant about the BDVs not being known to be a base.
988 // TODO: the code also does not handle constants at all - the algorithm relies
989 // on all constants having the same BDV and therefore constant-only insns
990 // will never be in conflict, but this check is ignored here. If the
991 // constant conflicts will be to BDVs themselves, they will be identical
992 // instructions and will get optimized away (as in the above TODO)
994 // II and EE mixes vector & scalar so is always a conflict
997 // Shuffle vector is always a conflict as it creates new vector from
1001 // Any instructions where the computed base type differs from the
1002 // instruction type. An example is where an extract instruction is used by a
1003 // select. Here the select's BDV is a vector (because of extract's BDV),
1004 // while the select itself is a scalar type. Note that the IE and EE
1005 // instruction check is not fully subsumed by the vector<->scalar check at
1006 // the end, this is due to the BDV algorithm being ignorant of BDV types at
1013 bool Progress =
true;
1016 const size_t OldSize = States.
size();
1019 // We're only changing values in this loop, thus safe to keep iterators.
1020 // Since this is computing a fixed point, the order of visit does not
1021 // effect the result. TODO: We could use a worklist here and make this run
1023 for (
auto Pair : States) {
1024 Value *BDV = Pair.first;
1025 // Only values that do not have known bases or those that have differing
1026 // type (scalar versus vector) from a possible known base should be in the
1030 "why did it get added?");
1032 BDVState NewState(BDV);
1033 visitBDVOperands(BDV, [&](
Value *
Op) {
1035 auto OpState = GetStateForBDV(BDV,
Op);
1036 NewState.meet(OpState);
1039 // if the instruction has known base, but should in fact be marked as
1040 // conflict because of incompatible in/out types, we mark it as such
1041 // ensuring that it will propagate through the fixpoint iteration
1043 auto BV = NewState.getBaseValue();
1044 if (BV && MarkConflict(
I, BV))
1045 NewState = BDVState(
I, BDVState::Conflict);
1047 BDVState OldState = Pair.second;
1048 if (OldState != NewState) {
1050 States[BDV] = NewState;
1054 assert(OldSize == States.size() &&
1055 "fixed point shouldn't be adding any new nodes to state");
1061 for (
const auto &Pair : States) {
1062 LLVM_DEBUG(
dbgs() <<
" " << Pair.second <<
" for " << *Pair.first <<
"\n");
1065 // since we do the conflict marking as part of the fixpoint iteration this
1066 // loop only asserts that invariants are met
1067 for (
auto Pair : States) {
1069 BDVState State = Pair.second;
1070 auto *BaseValue = State.getBaseValue();
1071 // Only values that do not have known bases or those that have differing
1072 // type (scalar versus vector) from a possible known base should be in the
1076 "why did it get added?");
1077 assert(!State.isUnknown() &&
"Optimistic algorithm didn't complete!");
1081 // Insert Phis for all conflicts
1082 // TODO: adjust naming patterns to avoid this order of iteration dependency
1083 for (
auto Pair : States) {
1085 BDVState State = Pair.second;
1086 // Only values that do not have known bases or those that have differing
1087 // type (scalar versus vector) from a possible known base should be in the
1091 "why did it get added?");
1092 assert(!State.isUnknown() &&
"Optimistic algorithm didn't complete!");
1094 // Since we're joining a vector and scalar base, they can never be the
1095 // same. As a result, we should always see insert element having reached
1096 // the conflict state.
1099 if (!State.isConflict())
1102 auto getMangledName = [](
Instruction *
I) -> std::string {
1118 BaseInst->
setName(getMangledName(
I));
1119 // Add metadata marking this as a base value
1121 States[
I] = BDVState(
I, BDVState::Conflict, BaseInst);
1122 setKnownBase(BaseInst,
/* IsKnownBase */true, KnownBases);
1129 // Returns a instruction which produces the base pointer for a given
1130 // instruction. The instruction is assumed to be an input to one of the BDVs
1131 // seen in the inference algorithm above. As such, we must either already
1132 // know it's base defining value is a base, or have inserted a new
1133 // instruction to propagate the base of it's BDV and have entered that newly
1134 // introduced instruction into the state table. In either case, we are
1135 // assured to be able to determine an instruction which produces it's base
1140 if (
auto It = States.
find(BDV); It == States.
end()) {
1144 // Either conflict or base.
1145 Base = It->second.getBaseValue();
1148 // The cast is needed since base traversal may strip away bitcasts
1149 if (
Base->getType() !=
Input->getType() && InsertPt)
1151 InsertPt->getIterator());
1155 // Fixup all the inputs of the new PHIs. Visit order needs to be
1156 // deterministic and predictable because we're naming newly created
1158 for (
auto Pair : States) {
1160 BDVState State = Pair.second;
1162 // Only values that do not have known bases or those that have differing
1163 // type (scalar versus vector) from a possible known base should be in the
1167 "why did it get added?");
1168 assert(!State.isUnknown() &&
"Optimistic algorithm didn't complete!");
1169 if (!State.isConflict())
1176 // The IR verifier requires phi nodes with multiple entries from the
1177 // same basic block to have the same incoming value for each of those
1178 // entries. Since we're inserting bitcasts in the loop, make sure we
1179 // do so at least once per incoming block.
1181 for (
unsigned i = 0; i < NumPHIValues; i++) {
1184 auto [It, Inserted] = BlockToValue.
try_emplace(InBB);
1189 Value *OldBase = It->second;
1190 Value *
Base = getBaseForInput(InVal,
nullptr);
1192 // We can't use `stripPointerCasts` instead of this function because
1193 // `stripPointerCasts` doesn't handle vectors of pointers.
1194 auto StripBitCasts = [](
Value *V) ->
Value * {
1196 V = BC->getOperand(0);
1199 // In essence this assert states: the only way two values
1200 // incoming from the same basic block may be different is by
1201 // being different bitcasts of the same value. A cleanup
1202 // that remains TODO is changing findBaseOrBDV to return an
1203 // llvm::Value of the correct type (and still remain pure).
1204 // This will remove the need to add bitcasts.
1205 assert(StripBitCasts(
Base) == StripBitCasts(OldBase) &&
1206 "findBaseOrBDV should be pure!");
1210 BasePHI->setIncomingValue(i,
Base);
1216 // Find the instruction which produces the base for each input.
1217 // We may need to insert a bitcast.
1218 BaseSI->setTrueValue(getBaseForInput(
SI->getTrueValue(), BaseSI));
1219 BaseSI->setFalseValue(getBaseForInput(
SI->getFalseValue(), BaseSI));
1220 }
else if (
auto *BaseEE =
1223 // Find the instruction which produces the base for each input. We may
1224 // need to insert a bitcast.
1225 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1228 auto UpdateOperand = [&](
int OperandIdx) {
1229 Value *InVal = BdvIE->getOperand(OperandIdx);
1230 Value *
Base = getBaseForInput(InVal, BaseIE);
1231 BaseIE->setOperand(OperandIdx,
Base);
1233 UpdateOperand(0);
// vector operand
1234 UpdateOperand(1);
// scalar operand
1238 auto UpdateOperand = [&](
int OperandIdx) {
1239 Value *InVal = BdvSV->getOperand(OperandIdx);
1240 Value *
Base = getBaseForInput(InVal, BaseSV);
1241 BaseSV->setOperand(OperandIdx,
Base);
1243 UpdateOperand(0);
// vector operand
1244 if (!BdvSV->isZeroEltSplat())
1245 UpdateOperand(1);
// vector operand
1247 // Never read, so just use poison
1248 Value *InVal = BdvSV->getOperand(1);
1258 // get the data layout to compare the sizes of base/derived pointer values
1259 [[maybe_unused]]
auto &
DL =
1261 // Cache all of our results so we can cheaply reuse them
1262 // NOTE: This is actually two caches: one of the base defining value
1263 // relation and one of the base pointer relation! FIXME
1264 for (
auto Pair : States) {
1265 auto *BDV = Pair.first;
1266 Value *
Base = Pair.second.getBaseValue();
1268 // Whenever we have a derived ptr(s), their base
1269 // ptr(s) must be of the same size, not necessarily the same type
1271 DL.getTypeAllocSize(
Base->getType()) &&
1272 "Derived and base values should have same size");
1273 // Only values that do not have known bases or those that have differing
1274 // type (scalar versus vector) from a possible known base should be in the
1278 "why did it get added?");
1281 dbgs() <<
"Updating base value cache"
1282 <<
" for: " << BDV->
getName() <<
" from: "
1283 << (Cache.count(BDV) ? Cache[BDV]->getName().str() :
"none")
1284 <<
" to: " <<
Base->getName() <<
"\n");
1288 assert(Cache.count(Def));
1292// For a set of live pointers (base and/or derived), identify the base
1293// pointer of the object which they are derived from. This routine will
1294// mutate the IR graph as needed to make the 'base' pointer live at the
1295// definition site of 'derived'. This ensures that any use of 'derived' can
1296// also use 'base'. This may involve the insertion of a number of
1297// additional PHI nodes.
1299// preconditions: live is a set of pointer type Values
1301// side effects: may insert PHI nodes into the existing CFG, will preserve
1302// CFG, will not remove or mutate any existing nodes
1304// post condition: PointerToBase contains one (derived, base) pair for every
1305// pointer in live. Note that derived can be equal to base if the original
1306// pointer was a base pointer.
1309 DefiningValueMapTy &DVCache,
1310 IsKnownBaseMapTy &KnownBases) {
1311 for (
Value *ptr : live) {
1313 assert(base &&
"failed to find base pointer");
1314 PointerToBase[ptr] = base;
1318 "The base we found better dominate the derived pointer");
1322/// Find the required based pointers (and adjust the live set) for the given
1326 PartiallyConstructedSafepointRecord &result,
1327 PointerToBaseTy &PointerToBase,
1328 IsKnownBaseMapTy &KnownBases) {
1329 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
1330 // We assume that all pointers passed to deopt are base pointers; as an
1331 // optimization, we can use this to avoid separately materializing the base
1332 // pointer graph. This is only relevant since we're very conservative about
1333 // generating new conflict nodes during base pointer insertion. If we were
1334 // smarter there, this would be irrelevant.
1336 for (
Value *V : Opt->Inputs) {
1337 if (!PotentiallyDerivedPointers.count(V))
1339 PotentiallyDerivedPointers.remove(V);
1340 PointerToBase[V] = V;
1346/// Given an updated version of the dataflow liveness results, update the
1347/// liveset and base pointer maps for the call site CS.
1350 PartiallyConstructedSafepointRecord &result,
1351 PointerToBaseTy &PointerToBase,
1357 PointerToBaseTy &PointerToBase,
GCStrategy *GC) {
1358 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1359 // again. The old values are still live and will help it stabilize quickly.
1360 GCPtrLivenessData RevisedLivenessData;
1362 for (
size_t i = 0; i < records.
size(); i++) {
1363 struct PartiallyConstructedSafepointRecord &
info = records[i];
1369// Utility function which clones all instructions from "ChainToBase"
1370// and inserts them before "InsertBefore". Returns rematerialized value
1371// which should be used after statepoint.
1375 Value *AlternateLiveBase) {
1378 // Walk backwards to visit top-most instructions first.
1380 // Only GEP's and casts are supported as we need to be careful to not
1381 // introduce any new uses of pointers not in the liveset.
1382 // Note that it's fine to introduce new uses of pointers which were
1383 // otherwise not used after this statepoint.
1388 ClonedValue->
setName(Instr->getName() +
".remat");
1390 // If it is not first instruction in the chain then it uses previously
1391 // cloned value. We should update it to use cloned value.
1392 if (LastClonedValue) {
1397 // Assert that cloned instruction does not use any instructions from
1398 // this chain other than LastClonedValue
1400 "incorrect use in rematerialization chain");
1401 // Assert that the cloned instruction does not use the RootOfChain
1402 // or the AlternateLiveBase.
1403 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
1407 // For the first instruction, replace the use of unrelocated base i.e.
1408 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1409 // live set. They have been proved to be the same PHI nodes. Note
1410 // that the *only* use of the RootOfChain in the ChainToBase list is
1411 // the first Value in the list.
1412 if (RootOfChain != AlternateLiveBase)
1416 LastClonedValue = ClonedValue;
1420 return LastClonedValue;
1423// When inserting gc.relocate and gc.result calls, we need to ensure there are
1424// no uses of the original value / return value between the gc.statepoint and
1425// the gc.relocate / gc.result call. One case which can arise is a phi node
1426// starting one of the successor blocks. We also need to be able to insert the
1427// gc.relocates only on the path which goes through the statepoint. We might
1428// need to split an edge to make this possible.
1436 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1440 "All PHI nodes should have been removed!");
1442 // At this point, we can safely insert a gc.relocate or gc.result as the first
1443 // instruction in Ret if needed.
1447// List of all function attributes which must be stripped when lowering from
1448// abstract machine model to physical machine model. Essentially, these are
1449// all the effects a safepoint might have which we ignored in the abstract
1450// machine model for purposes of optimization. We have to strip these on
1451// both function declarations and call sites.
1453 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
1455// Create new attribute set containing only attributes which can be transferred
1456// from the original call to the safepoint.
1458 AttributeList StatepointAL) {
1459 AttributeList OrigAL =
Call->getAttributes();
1460 if (OrigAL.isEmpty())
1461 return StatepointAL;
1463 // Remove the readonly, readnone, and statepoint function attributes.
1465 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
1467 FnAttrs.removeAttribute(Attr);
1471 FnAttrs.removeAttribute(
A);
1474 StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs);
1476 // The memory intrinsics do not have a 1:1 correspondence of the original
1477 // call arguments to the produced statepoint. Do not transfer the argument
1478 // attributes to avoid putting them on incorrect arguments.
1480 return StatepointAL;
1482 // Attach the argument attributes from the original call at the corresponding
1483 // arguments in the statepoint. Note that any argument attributes that are
1484 // invalid after lowering are stripped in stripNonValidDataFromBody.
1486 StatepointAL = StatepointAL.addParamAttributes(
1488 AttrBuilder(Ctx, OrigAL.getParamAttrs(
I)));
1490 // Return attributes are later attached to the gc.result intrinsic.
1491 return StatepointAL;
1494/// Helper function to place all gc relocates necessary for the given
1497/// liveVariables - list of variables to be relocated.
1498/// basePtrs - base pointers.
1499/// statepointToken - statepoint instruction to which relocates should be
1501/// Builder - Llvm IR builder to be used to construct new calls.
1511 assert(ValIt != LiveVec.
end() &&
"Val not found in LiveVec!");
1512 size_t Index = std::distance(LiveVec.
begin(), ValIt);
1513 assert(Index < LiveVec.
size() &&
"Bug in std::find?");
1518 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1519 // element type is i8 addrspace(1)*). We originally generated unique
1520 // declarations for each pointer type, but this proved problematic because
1521 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1522 // towards a single unified pointer type anyways, we can just cast everything
1523 // to an i8* of the right address space. A bitcast is added later to convert
1524 // gc_relocate to the actual value's type.
1525 auto getGCRelocateDecl = [&](
Type *Ty) {
1527 auto AS = Ty->getScalarType()->getPointerAddressSpace();
1533 M, Intrinsic::experimental_gc_relocate, {NewTy});
1536 // Lazily populated map from input types to the canonicalized form mentioned
1537 // in the comment above. This should probably be cached somewhere more
1542 // Generate the gc.relocate call and save the result
1544 Value *LiveIdx = Builder.getInt32(i);
1547 auto [It, Inserted] = TypeToDeclMap.
try_emplace(Ty);
1549 It->second = getGCRelocateDecl(Ty);
1550 Function *GCRelocateDecl = It->second;
1552 // only specify a debug name if we can give a useful one
1554 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1556 // Trick CodeGen into thinking there are lots of free registers at this
1564/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1565/// avoids having to worry about keeping around dangling pointers to Values.
1566class DeferredReplacement {
1567 AssertingVH<Instruction> Old;
1568 AssertingVH<Instruction>
New;
1569 bool IsDeoptimize =
false;
1571 DeferredReplacement() =
default;
1574 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1575 assert(Old != New && Old && New &&
1576 "Cannot RAUW equal values or to / from null!");
1578 DeferredReplacement
D;
1584 static DeferredReplacement createDelete(Instruction *ToErase) {
1585 DeferredReplacement
D;
1590 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1593 assert(
F &&
F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1594 "Only way to construct a deoptimize deferred replacement");
1596 DeferredReplacement
D;
1598 D.IsDeoptimize =
true;
1602 /// Does the task represented by this instance.
1603 void doReplacement() {
1607 assert(OldI != NewI &&
"Disallowed at construction?!");
1608 assert((!IsDeoptimize || !New) &&
1609 "Deoptimize intrinsics are not replaced!");
1618 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1619 // not necessarily be followed by the matching return.
1621 new UnreachableInst(RI->getContext(), RI->getIterator());
1622 RI->eraseFromParent();
1629}
// end anonymous namespace
1632 const char *DeoptLowering =
"deopt-lowering";
1633 if (
Call->hasFnAttr(DeoptLowering)) {
1634 // FIXME: Calls have a *really* confusing interface around attributes
1636 const AttributeList &CSAS =
Call->getAttributes();
1637 if (CSAS.hasFnAttr(DeoptLowering))
1638 return CSAS.getFnAttr(DeoptLowering).getValueAsString();
1640 assert(
F &&
F->hasFnAttribute(DeoptLowering));
1641 return F->getFnAttribute(DeoptLowering).getValueAsString();
1643 return "live-through";
1650 PartiallyConstructedSafepointRecord &Result,
1651 std::vector<DeferredReplacement> &Replacements,
1652 const PointerToBaseTy &PointerToBase,
1656 // Then go ahead and use the builder do actually do the inserts. We insert
1657 // immediately before the previous instruction under the assumption that all
1658 // arguments will be available here. We can't insert afterwards since we may
1659 // be replacing a terminator.
1668 std::optional<ArrayRef<Use>> DeoptArgs;
1670 DeoptArgs = Bundle->Inputs;
1671 std::optional<ArrayRef<Use>> TransitionArgs;
1673 TransitionArgs = Bundle->Inputs;
1674 // TODO: This flag no longer serves a purpose and can be removed later
1678 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1679 // with a return value, we lower then as never returning calls to
1680 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1681 bool IsDeoptimize =
false;
1682 bool IsMemIntrinsic =
false;
1691 // Pass through the requested lowering if any. The default is live-through.
1693 if (DeoptLowering ==
"live-in")
1696 assert(DeoptLowering ==
"live-through" &&
"Unsupported value!");
1701 auto IID =
F->getIntrinsicID();
1702 if (IID == Intrinsic::experimental_deoptimize) {
1703 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1704 // __llvm_deoptimize symbol. We want to resolve this now, since the
1705 // verifier does not allow taking the address of an intrinsic function.
1708 for (
Value *Arg : CallArgs)
1711 /* isVarArg = */ false);
1713 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1714 // calls to @llvm.experimental.deoptimize with different argument types in
1715 // the same module. This is fine -- we assume the frontend knew what it
1716 // was doing when generating this kind of IR.
1717 CallTarget =
F->getParent()
1718 ->getOrInsertFunction(
"__llvm_deoptimize", FTy);
1720 IsDeoptimize =
true;
1721 }
else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
1722 IID == Intrinsic::memmove_element_unordered_atomic) {
1723 IsMemIntrinsic =
true;
1725 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1726 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1727 // Specifically, these calls should be lowered to the
1728 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1729 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1730 // verifier does not allow taking the address of an intrinsic function.
1732 // Moreover we need to shuffle the arguments for the call in order to
1733 // accommodate GC. The underlying source and destination objects might be
1734 // relocated during copy operation should the GC occur. To relocate the
1735 // derived source and destination pointers the implementation of the
1736 // intrinsic should know the corresponding base pointers.
1738 // To make the base pointers available pass them explicitly as arguments:
1739 // memcpy(dest_derived, source_derived, ...) =>
1740 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1741 auto &Context =
Call->getContext();
1742 auto &
DL =
Call->getDataLayout();
1743 auto GetBaseAndOffset = [&](
Value *Derived) {
1745 // Optimizations in unreachable code might substitute the real pointer
1746 // with undef, poison or null-derived constant. Return null base for
1747 // them to be consistent with the handling in the main algorithm in
1748 // findBaseDefiningValue.
1753 assert(PointerToBase.count(Derived));
1754 Base = PointerToBase.find(Derived)->second;
1756 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
1758 Value *Base_int = Builder.CreatePtrToInt(
1760 Value *Derived_int = Builder.CreatePtrToInt(
1762 return std::make_pair(
Base, Builder.CreateSub(Derived_int, Base_int));
1765 auto *Dest = CallArgs[0];
1766 Value *DestBase, *DestOffset;
1767 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
1769 auto *Source = CallArgs[1];
1770 Value *SourceBase, *SourceOffset;
1771 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
1773 auto *LengthInBytes = CallArgs[2];
1784 for (
Value *Arg : CallArgs)
1787 /* isVarArg = */ false);
1790 uint64_t ElementSize = ElementSizeCI->getZExtValue();
1791 if (IID == Intrinsic::memcpy_element_unordered_atomic) {
1792 switch (ElementSize) {
1794 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1796 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1798 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1800 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1802 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1807 assert(IID == Intrinsic::memmove_element_unordered_atomic);
1808 switch (ElementSize) {
1810 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1812 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1814 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1816 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1818 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1826 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
1830 // Create the statepoint given all the arguments
1833 CallInst *SPCall = Builder.CreateGCStatepointCall(
1834 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1835 TransitionArgs, DeoptArgs, GCLive,
"safepoint_token");
1840 // Set up function attrs directly on statepoint and return attrs later for
1841 // gc_result intrinsic.
1847 // Put the following gc_result and gc_relocate calls immediately after the
1848 // the old call (which we're about to delete)
1849 assert(CI->getNextNode() &&
"Not a terminator, must have next!");
1850 Builder.SetInsertPoint(CI->getNextNode());
1851 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
1855 // Insert the new invoke into the old block. We'll remove the old one in a
1856 // moment at which point this will become the new terminator for the
1858 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
1859 StatepointID, NumPatchBytes, CallTarget,
II->getNormalDest(),
1860 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1861 GCLive,
"statepoint_token");
1865 // Set up function attrs directly on statepoint and return attrs later for
1866 // gc_result intrinsic.
1872 // Generate gc relocates in exceptional path
1876 "can't safely insert in this block!");
1879 Builder.SetCurrentDebugLocation(
II->getDebugLoc());
1881 // Attach exceptional gc relocates to the landingpad.
1883 Result.UnwindToken = ExceptionalToken;
1887 // Generate gc relocates and returns for normal block
1891 "can't safely insert in this block!");
1895 // gc relocates will be generated later as if it were regular call
1898 assert(Token &&
"Should be set in one of the above branches!");
1901 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1902 // transform the tail-call like structure to a call to a void function
1903 // followed by unreachable to get better codegen.
1904 Replacements.push_back(
1905 DeferredReplacement::createDeoptimizeReplacement(
Call));
1907 Token->
setName(
"statepoint_token");
1908 if (!
Call->getType()->isVoidTy() && !
Call->use_empty()) {
1910 CallInst *GCResult = Builder.CreateGCResult(Token,
Call->getType(), Name);
1912 AttributeList::get(GCResult->
getContext(), AttributeList::ReturnIndex,
1913 Call->getAttributes().getRetAttrs()));
1915 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1916 // live set of some other safepoint, in which case that safepoint's
1917 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1918 // llvm::Instruction. Instead, we defer the replacement and deletion to
1919 // after the live sets have been made explicit in the IR, and we no longer
1920 // have raw pointers to worry about.
1921 Replacements.emplace_back(
1922 DeferredReplacement::createRAUW(
Call, GCResult));
1924 Replacements.emplace_back(DeferredReplacement::createDelete(
Call));
1928 Result.StatepointToken = Token;
1930 // Second, create a gc.relocate for every live variable
1934// Replace an existing gc.statepoint with a new one and a set of gc.relocates
1935// which make the relocations happening at this safepoint explicit.
1937// WARNING: Does not do any fixup to adjust users of the original live
1938// values. That's the callers responsibility.
1941 PartiallyConstructedSafepointRecord &Result,
1942 std::vector<DeferredReplacement> &Replacements,
1943 const PointerToBaseTy &PointerToBase,
GCStrategy *GC) {
1944 const auto &LiveSet = Result.LiveSet;
1946 // Convert to vector for efficient cross referencing.
1948 LiveVec.
reserve(LiveSet.size());
1949 BaseVec.
reserve(LiveSet.size());
1950 for (
Value *L : LiveSet) {
1952 assert(PointerToBase.count(L));
1953 Value *
Base = PointerToBase.find(L)->second;
1958 // Do the actual rewriting and delete the old statepoint
1963// Helper function for the relocationViaAlloca.
1965// It receives iterator to the statepoint gc relocates and emits a store to the
1966// assigned location (via allocaMap) for the each one of them. It adds the
1967// visited values into the visitedLiveValues set, which we will later use them
1968// for validation checking.
1973 for (
User *U : GCRelocs) {
1980 Value *Alloca = AllocaMap[OriginalValue];
1982 // Emit store into the related alloca.
1984 "Should always have one since it's not a terminator");
1988 VisitedLiveValues.
insert(OriginalValue);
1993// Helper function for the "relocationViaAlloca". Similar to the
1994// "insertRelocationStores" but works for rematerialized values.
1996 const RematerializedValueMapTy &RematerializedValues,
1999 for (
auto RematerializedValuePair: RematerializedValues) {
2000 Instruction *RematerializedValue = RematerializedValuePair.first;
2001 Value *OriginalValue = RematerializedValuePair.second;
2004 "Can not find alloca for rematerialized value");
2005 Value *Alloca = AllocaMap[OriginalValue];
2007 new StoreInst(RematerializedValue, Alloca,
2011 VisitedLiveValues.
insert(OriginalValue);
2016/// Do all the relocation update via allocas and mem2reg
2021 // record initial number of (static) allocas; we'll check we have the same
2022 // number when we get done.
2023 int InitialAllocaNum = 0;
2029 // TODO-PERF: change data structures, reserve
2032 // Used later to chack that we have enough allocas to store all values
2033 std::size_t NumRematerializedValues = 0;
2036 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2037 // "PromotableAllocas"
2039 auto emitAllocaFor = [&](
Value *LiveValue) {
2041 new AllocaInst(LiveValue->getType(),
DL.getAllocaAddrSpace(),
"",
2042 F.getEntryBlock().getFirstNonPHIIt());
2043 AllocaMap[LiveValue] = Alloca;
2047 // Emit alloca for each live gc pointer
2048 for (
Value *V : Live)
2051 // Emit allocas for rematerialized values
2052 for (
const auto &
Info : Records)
2053 for (
auto RematerializedValuePair :
Info.RematerializedValues) {
2054 Value *OriginalValue = RematerializedValuePair.second;
2055 if (AllocaMap.
contains(OriginalValue))
2058 emitAllocaFor(OriginalValue);
2059 ++NumRematerializedValues;
2062 // The next two loops are part of the same conceptual operation. We need to
2063 // insert a store to the alloca after the original def and at each
2064 // redefinition. We need to insert a load before each use. These are split
2065 // into distinct loops for performance reasons.
2067 // Update gc pointer after each statepoint: either store a relocated value or
2068 // null (if no relocated value was found for this gc pointer and it is not a
2069 // gc_result). This must happen before we update the statepoint with load of
2070 // alloca otherwise we lose the link between statepoint and old def.
2071 for (
const auto &
Info : Records) {
2072 Value *Statepoint =
Info.StatepointToken;
2074 // This will be used for consistency check
2077 // Insert stores for normal statepoint gc relocates
2080 // In case if it was invoke statepoint
2081 // we will insert stores for exceptional path gc relocates.
2087 // Do similar thing with rematerialized values
2092 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2093 // the gc.statepoint. This will turn some subtle GC problems into
2094 // slightly easier to debug SEGVs. Note that on large IR files with
2095 // lots of gc.statepoints this is extremely costly both memory and time
2098 for (
auto Pair : AllocaMap) {
2099 Value *Def = Pair.first;
2102 // This value was relocated
2103 if (VisitedLiveValues.
count(Def)) {
2110 for (
auto *AI : ToClobber) {
2111 auto AT = AI->getAllocatedType();
2113 if (AT->isVectorTy())
2121 // Insert the clobbering stores. These may get intermixed with the
2122 // gc.results and gc.relocates, but that's fine.
2124 InsertClobbersAt(
II->getNormalDest()->getFirstInsertionPt());
2125 InsertClobbersAt(
II->getUnwindDest()->getFirstInsertionPt());
2133 // Update use with load allocas and add store for gc_relocated.
2134 for (
auto Pair : AllocaMap) {
2135 Value *Def = Pair.first;
2138 // We pre-record the uses of allocas so that we dont have to worry about
2139 // later update that changes the user information..
2142 // PERF: trade a linear scan for repeated reallocation
2143 Uses.reserve(Def->getNumUses());
2144 for (
User *U : Def->users()) {
2146 // If the def has a ConstantExpr use, then the def is either a
2147 // ConstantExpr use itself or null. In either case
2148 // (recursively in the first, directly in the second), the oop
2149 // it is ultimately dependent on is null and this particular
2150 // use does not need to be fixed up.
2162 for (
unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
2163 if (Def == Phi->getIncomingValue(i)) {
2166 Phi->getIncomingBlock(i)->getTerminator()->getIterator());
2167 Phi->setIncomingValue(i, Load);
2172 Use->getIterator());
2173 Use->replaceUsesOfWith(Def, Load);
2177 // Emit store for the initial gc value. Store must be inserted after load,
2178 // otherwise store will be in alloca's use list and an extra load will be
2179 // inserted before it.
2181 DL.getABITypeAlign(Def->getType()));
2184 // InvokeInst is a terminator so the store need to be inserted into its
2185 // normal destination block.
2186 BasicBlock *NormalDest = Invoke->getNormalDest();
2189 assert(!Inst->isTerminator() &&
2190 "The only terminator that can produce a value is "
2191 "InvokeInst which is handled above.");
2192 Store->insertAfter(Inst->getIterator());
2200 assert(PromotableAllocas.
size() == Live.
size() + NumRematerializedValues &&
2201 "we must have the same allocas with lives");
2202 (void) NumRematerializedValues;
2203 if (!PromotableAllocas.
empty()) {
2204 // Apply mem2reg to promote alloca to SSA
2209 for (
auto &
I :
F.getEntryBlock())
2212 assert(InitialAllocaNum == 0 &&
"We must not introduce any extra allocas");
2216/// Insert holders so that each Value is obviously live through the entire
2217/// lifetime of the call.
2221 // No values to hold live, might as well not insert the empty holder
2225 // Use a dummy vararg function to actually hold the values live
2229 // For call safepoints insert dummy calls right after safepoint
2234 // For invoke safepooints insert dummy calls both in normal and
2235 // exceptional destination blocks
2238 Func, Values,
"",
II->getNormalDest()->getFirstInsertionPt()));
2240 Func, Values,
"",
II->getUnwindDest()->getFirstInsertionPt()));
2247 GCPtrLivenessData OriginalLivenessData;
2249 for (
size_t i = 0; i < records.
size(); i++) {
2250 struct PartiallyConstructedSafepointRecord &
info = records[i];
2255// Helper function for the "rematerializeLiveValues". It walks use chain
2256// starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2257// the base or a value it cannot process. Only "simple" values are processed
2258// (currently it is GEP's and casts). The returned root is examined by the
2259// callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2260// with all visited values.
2263 Value *CurrentValue) {
2267 GEP->getPointerOperand());
2271 if (!CI->isNoopCast(CI->getDataLayout()))
2279 // We have reached the root of the chain, which is either equal to the base or
2280 // is the first unsupported value along the use chain.
2281 return CurrentValue;
2284// Helper function for the "rematerializeLiveValues". Compute cost of the use
2285// chain we are going to rematerialize.
2293 assert(CI->isNoopCast(CI->getDataLayout()) &&
2294 "non noop cast is found during rematerialization");
2296 Type *SrcTy = CI->getOperand(0)->getType();
2297 Cost +=
TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
2302 // Cost of the address calculation
2303 Cost +=
TTI.getAddressComputationCost(
2304 GEP->getType(),
nullptr,
nullptr,
2307 // And cost of the GEP itself
2308 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2309 // allowed for the external usage)
2310 if (!
GEP->hasAllConstantIndices())
2326 // Map of incoming values and their corresponding basic blocks of
2329 for (
unsigned i = 0; i < PhiNum; i++)
2333 // Both current and base PHIs should have same incoming values and
2334 // the same basic blocks corresponding to the incoming values.
2335 for (
unsigned i = 0; i < PhiNum; i++) {
2338 if (CIVI == CurrentIncomingValues.
end())
2340 BasicBlock *CurrentIncomingBB = CIVI->second;
2347// Find derived pointers that can be recomputed cheap enough and fill
2348// RematerizationCandidates with such candidates.
2351 RematCandTy &RematerizationCandidates,
2353 const unsigned int ChainLengthThreshold = 10;
2355 for (
auto P2B : PointerToBase) {
2356 auto *Derived = P2B.first;
2357 auto *
Base = P2B.second;
2358 // Consider only derived pointers.
2359 if (Derived ==
Base)
2362 // For each live pointer find its defining chain.
2364 Value *RootOfChain =
2367 // Nothing to do, or chain is too long
2368 if ( ChainToBase.
size() == 0 ||
2369 ChainToBase.
size() > ChainLengthThreshold)
2372 // Handle the scenario where the RootOfChain is not equal to the
2373 // Base Value, but they are essentially the same phi values.
2374 if (
Value *BaseVal = PointerToBase[Derived]; RootOfChain != BaseVal) {
2377 if (!OrigRootPhi || !AlternateRootPhi)
2379 // PHI nodes that have the same incoming values, and belonging to the same
2380 // basic blocks are essentially the same SSA value. When the original phi
2381 // has incoming values with different base pointers, the original phi is
2382 // marked as conflict, and an additional `AlternateRootPhi` with the same
2383 // incoming values get generated by the findBasePointer function. We need
2384 // to identify the newly generated AlternateRootPhi (.base version of phi)
2385 // and RootOfChain (the original phi node itself) are the same, so that we
2386 // can rematerialize the gep and casts. This is a workaround for the
2387 // deficiency in the findBasePointer algorithm.
2391 // Compute cost of this chain.
2393 // TODO: We can also account for cases when we will be able to remove some
2394 // of the rematerialized values by later optimization passes. I.e if
2395 // we rematerialized several intersecting chains. Or if original values
2396 // don't have any uses besides this statepoint.
2398 // Ok, there is a candidate.
2399 RematerizlizationCandidateRecord
Record;
2400 Record.ChainToBase = ChainToBase;
2401 Record.RootOfChain = RootOfChain;
2403 RematerizationCandidates.insert({ Derived,
Record });
2407// Try to rematerialize derived pointers immediately before their uses
2408// (instead of rematerializing after every statepoint it is live through).
2409// This can be beneficial when derived pointer is live across many
2410// statepoints, but uses are rare.
2412 RematCandTy &RematerizationCandidates,
2414 PointerToBaseTy &PointerToBase) {
2421 <<
"Num statepoints: " << Records.
size() <<
'\n');
2423 for (
auto &It : RematerizationCandidates) {
2425 auto &
Record = It.second;
2435 if (U->getParent() == Cand->
getParent())
2438 // Rematerialization before PHI nodes is not implemented.
2440 [](
const auto *U) { return isa<PHINode>(U); }))
2445 // Count of rematerialization instructions we introduce is equal to number
2446 // of candidate uses.
2447 // Count of rematerialization instructions we eliminate is equal to number
2448 // of statepoints it is live through.
2449 // Consider transformation profitable if latter is greater than former
2450 // (in other words, we create less than eliminate).
2452 Records, [Cand](
const auto &R) {
return R.LiveSet.contains(Cand); });
2455 LLVM_DEBUG(
dbgs() <<
"Num uses: " << NumUses <<
" Num live statepoints: "
2456 << NumLiveStatepoints <<
" ");
2458 if (NumLiveStatepoints < NumUses) {
2463 // If rematerialization is 'free', then favor rematerialization at
2464 // uses as it generally shortens live ranges.
2465 // TODO: Short (size ==1) chains only?
2466 if (NumLiveStatepoints == NumUses &&
Record.Cost > 0) {
2473 // ChainToBase may contain another remat candidate (as a sub chain) which
2474 // has been rewritten by now. Need to recollect chain to have up to date
2476 // TODO: sort records in findRematerializationCandidates() in
2477 // decreasing chain size order?
2478 if (
Record.ChainToBase.size() > 1) {
2479 Record.ChainToBase.clear();
2483 // Current rematerialization algorithm is very simple: we rematerialize
2484 // immediately before EVERY use, even if there are several uses in same
2485 // block or if use is local to Cand Def. The reason is that this allows
2486 // us to avoid recomputing liveness without complicated analysis:
2487 // - If we did not eliminate all uses of original Candidate, we do not
2488 // know exaclty in what BBs it is still live.
2489 // - If we rematerialize once per BB, we need to find proper insertion
2490 // place (first use in block, but after Def) and analyze if there is
2491 // statepoint between uses in the block.
2496 Record.RootOfChain, PointerToBase[Cand]);
2498 PointerToBase[RematChain] = PointerToBase[Cand];
2504 <<
" derived pointers\n");
2505 for (
auto *Cand : LiveValuesToBeDeleted) {
2506 assert(Cand->use_empty() &&
"Unexpected user remain");
2507 RematerizationCandidates.erase(Cand);
2508 for (
auto &R : Records) {
2509 assert(!R.LiveSet.contains(Cand) ||
2510 R.LiveSet.contains(PointerToBase[Cand]));
2511 R.LiveSet.remove(Cand);
2515 // Recollect not rematerialized chains - we might have rewritten
2516 // their sub-chains.
2517 if (!LiveValuesToBeDeleted.
empty()) {
2518 for (
auto &
P : RematerizationCandidates) {
2520 if (R.ChainToBase.size() > 1) {
2521 R.ChainToBase.clear();
2528// From the statepoint live set pick values that are cheaper to recompute then
2529// to relocate. Remove this values from the live set, rematerialize them after
2530// statepoint and record them in "Info" structure. Note that similar to
2531// relocated values we don't do any user adjustments here.
2533 PartiallyConstructedSafepointRecord &
Info,
2534 PointerToBaseTy &PointerToBase,
2535 RematCandTy &RematerizationCandidates,
2537 // Record values we are going to delete from this statepoint live set.
2538 // We can not di this in following loop due to iterator invalidation.
2542 auto It = RematerizationCandidates.find(LiveValue);
2543 if (It == RematerizationCandidates.end())
2546 RematerizlizationCandidateRecord &
Record = It->second;
2549 // For invokes we need to rematerialize each chain twice - for normal and
2550 // for unwind basic blocks. Model this by multiplying cost by two.
2554 // If it's too expensive - skip it.
2558 // Remove value from the live set
2559 LiveValuesToBeDeleted.
push_back(LiveValue);
2561 // Clone instructions and record them inside "Info" structure.
2563 // Different cases for calls and invokes. For invokes we need to clone
2564 // instructions both on normal and unwind path.
2570 Record.RootOfChain, PointerToBase[LiveValue]);
2571 Info.RematerializedValues[RematerializedValue] = LiveValue;
2576 Invoke->getNormalDest()->getFirstInsertionPt();
2578 Invoke->getUnwindDest()->getFirstInsertionPt();
2582 Record.RootOfChain, PointerToBase[LiveValue]);
2585 Record.RootOfChain, PointerToBase[LiveValue]);
2587 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2588 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2592 // Remove rematerialized values from the live set.
2593 for (
auto *LiveValue: LiveValuesToBeDeleted) {
2594 Info.LiveSet.remove(LiveValue);
2600 DefiningValueMapTy &DVCache,
2601 IsKnownBaseMapTy &KnownBases) {
2602 auto &Context =
F.getContext();
2603 auto &
DL =
F.getDataLayout();
2606 for (
auto *Callsite : Intrinsics)
2607 switch (Callsite->getIntrinsicID()) {
2608 case Intrinsic::experimental_gc_get_pointer_base: {
2612 assert(!DVCache.count(Callsite));
2613 Callsite->replaceAllUsesWith(
Base);
2614 if (!
Base->hasName())
2615 Base->takeName(Callsite);
2616 Callsite->eraseFromParent();
2619 case Intrinsic::experimental_gc_get_pointer_offset: {
2621 Value *Derived = Callsite->getOperand(0);
2623 assert(!DVCache.count(Callsite));
2633 Value *
Offset = Builder.CreateSub(DerivedInt, BaseInt);
2634 Callsite->replaceAllUsesWith(
Offset);
2635 Offset->takeName(Callsite);
2636 Callsite->eraseFromParent();
2649 DefiningValueMapTy &DVCache,
2650 IsKnownBaseMapTy &KnownBases) {
2654 // Validate the input
2655 std::set<CallBase *> Uniqued;
2656 Uniqued.insert(ToUpdate.
begin(), ToUpdate.
end());
2657 assert(Uniqued.size() == ToUpdate.
size() &&
"no duplicates please!");
2663 // When inserting gc.relocates for invokes, we need to be able to insert at
2664 // the top of the successor blocks. See the comment on
2665 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2666 // may restructure the CFG.
2675 // A list of dummy calls added to the IR to keep various values obviously
2676 // live in the IR. We'll remove all of these when done.
2679 // Insert a dummy call with all of the deopt operands we'll need for the
2680 // actual safepoint insertion as arguments. This ensures reference operands
2681 // in the deopt argument list are considered live through the safepoint (and
2682 // thus makes sure they get relocated.)
2688 "support for FCA unimplemented");
2698 // A) Identify all gc pointers which are statically live at the given call
2702 /// Global mapping from live pointers to a base-defining-value.
2703 PointerToBaseTy PointerToBase;
2705 // B) Find the base pointers for each live pointer
2706 for (
size_t i = 0; i < Records.
size(); i++) {
2707 PartiallyConstructedSafepointRecord &
info = Records[i];
2711 errs() <<
"Base Pairs (w/o Relocation):\n";
2712 for (
auto &Pair : PointerToBase) {
2713 errs() <<
" derived ";
2714 Pair.first->printAsOperand(
errs(),
false);
2716 Pair.second->printAsOperand(
errs(),
false);
2722 // The base phi insertion logic (for any safepoint) may have inserted new
2723 // instructions which are now live at some safepoint. The simplest such
2726 // phi a <-- will be a new base_phi here
2727 // safepoint 1 <-- that needs to be live here
2731 // We insert some dummy calls after each safepoint to definitely hold live
2732 // the base pointers which were identified for that safepoint. We'll then
2733 // ask liveness for _every_ base inserted to see what is now live. Then we
2734 // remove the dummy calls.
2736 for (
size_t i = 0; i < Records.
size(); i++) {
2737 PartiallyConstructedSafepointRecord &
Info = Records[i];
2740 for (
auto *Derived :
Info.LiveSet) {
2741 assert(PointerToBase.count(Derived) &&
"Missed base for derived pointer");
2742 Bases.
push_back(PointerToBase[Derived]);
2748 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2749 // need to rerun liveness. We may *also* have inserted new defs, but that's
2750 // not the key issue.
2754 errs() <<
"Base Pairs: (w/Relocation)\n";
2755 for (
auto Pair : PointerToBase) {
2756 errs() <<
" derived ";
2757 Pair.first->printAsOperand(
errs(),
false);
2759 Pair.second->printAsOperand(
errs(),
false);
2764 // It is possible that non-constant live variables have a constant base. For
2765 // example, a GEP with a variable offset from a global. In this case we can
2766 // remove it from the liveset. We already don't add constants to the liveset
2767 // because we assume they won't move at runtime and the GC doesn't need to be
2768 // informed about them. The same reasoning applies if the base is constant.
2769 // Note that the relocation placement code relies on this filtering for
2770 // correctness as it expects the base to be in the liveset, which isn't true
2771 // if the base is constant.
2772 for (
auto &
Info : Records) {
2773 Info.LiveSet.remove_if([&](
Value *LiveV) {
2774 assert(PointerToBase.count(LiveV) &&
"Missed base for derived pointer");
2780 CI->eraseFromParent();
2784 // Compute the cost of possible re-materialization of derived pointers.
2785 RematCandTy RematerizationCandidates;
2788 // In order to reduce live set of statepoint we might choose to rematerialize
2789 // some values instead of relocating them. This is purely an optimization and
2790 // does not influence correctness.
2791 // First try rematerialization at uses, then after statepoints.
2794 for (
size_t i = 0; i < Records.
size(); i++)
2796 RematerizationCandidates,
TTI);
2798 // We need this to safely RAUW and delete call or invoke return values that
2799 // may themselves be live over a statepoint. For details, please see usage in
2800 // makeStatepointExplicitImpl.
2801 std::vector<DeferredReplacement> Replacements;
2803 // Now run through and replace the existing statepoints with new ones with
2804 // the live variables listed. We do not yet update uses of the values being
2805 // relocated. We have references to live variables that need to
2806 // survive to the last iteration of this loop. (By construction, the
2807 // previous statepoint can not be a live variable, thus we can and remove
2808 // the old statepoint calls as we go.)
2809 for (
size_t i = 0; i < Records.
size(); i++)
2811 PointerToBase, GC.get());
2813 ToUpdate.
clear();
// prevent accident use of invalid calls.
2815 for (
auto &PR : Replacements)
2818 Replacements.clear();
2820 for (
auto &
Info : Records) {
2821 // These live sets may contain state Value pointers, since we replaced calls
2822 // with operand bundles with calls wrapped in gc.statepoint, and some of
2823 // those calls may have been def'ing live gc pointers. Clear these out to
2824 // avoid accidentally using them.
2826 // TODO: We should create a separate data structure that does not contain
2827 // these live sets, and migrate to using that data structure from this point
2829 Info.LiveSet.clear();
2831 PointerToBase.clear();
2833 // Do all the fixups of the original live variables to their relocated selves.
2834 // A SmallSetVector is used to collect live variables while retaining the
2835 // order in which we add them, which is important for reproducible tests.
2837 for (
const PartiallyConstructedSafepointRecord &
Info : Records) {
2838 // We can't simply save the live set from the original insertion. One of
2839 // the live values might be the result of a call which needs a safepoint.
2840 // That Value* no longer exists and we need to use the new gc_result.
2841 // Thankfully, the live set is embedded in the statepoint (and updated), so
2842 // we just grab that.
2845 // Do some basic validation checking on our liveness results before
2846 // performing relocation. Relocation can and will turn mistakes in liveness
2847 // results into non-sensical code which is must harder to debug.
2848 // TODO: It would be nice to test consistency as well
2850 "statepoint must be reachable or liveness is meaningless");
2851 for (
Value *V :
Info.StatepointToken->gc_live()) {
2853 // Non-instruction values trivial dominate all possible uses
2857 "unreachable values should never be live");
2859 "basic SSA liveness expectation violated by liveness analysis");
2866 for (
auto *
Ptr : Live)
2868 "must be a gc pointer type");
2872 return !Records.
empty();
2875// List of all parameter and return attributes which must be stripped when
2876// lowering from the abstract machine model. Note that we list attributes
2877// here which aren't valid as return attributes, that is okay.
2880 R.addAttribute(Attribute::Dereferenceable);
2881 R.addAttribute(Attribute::DereferenceableOrNull);
2882 R.addAttribute(Attribute::ReadNone);
2883 R.addAttribute(Attribute::ReadOnly);
2884 R.addAttribute(Attribute::WriteOnly);
2885 R.addAttribute(Attribute::NoAlias);
2886 R.addAttribute(Attribute::NoFree);
2893 // Intrinsics are very delicate. Lowering sometimes depends the presence
2894 // of certain attributes for correctness, but we may have also inferred
2895 // additional ones in the abstract machine model which need stripped. This
2896 // assumes that the attributes defined in Intrinsic.td are conservatively
2897 // correct for both physical and abstract model.
2906 F.removeParamAttrs(
A.getArgNo(), R);
2909 F.removeRetAttrs(R);
2912 F.removeFnAttr(Attr);
2915/// Certain metadata on instructions are invalid after running RS4GC.
2916/// Optimizations that run after RS4GC can incorrectly use this metadata to
2917/// optimize functions. We drop such metadata on the instruction.
2921 // These are the attributes that are still valid on loads and stores after
2923 // The metadata implying dereferenceability and noalias are (conservatively)
2924 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2925 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2926 // touch the entire heap including noalias objects. Note: The reasoning is
2927 // same as stripping the dereferenceability and noalias attributes that are
2928 // analogous to the metadata counterparts.
2929 // We also drop the invariant.load metadata on the load because that metadata
2930 // implies the address operand to the load points to memory that is never
2931 // changed once it became dereferenceable. This is no longer true after RS4GC.
2932 // Similar reasoning applies to invariant.group metadata, which applies to
2933 // loads within a group.
2934 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2935 LLVMContext::MD_range,
2936 LLVMContext::MD_alias_scope,
2937 LLVMContext::MD_nontemporal,
2938 LLVMContext::MD_nonnull,
2939 LLVMContext::MD_align,
2940 LLVMContext::MD_type};
2942 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2943 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2953 // Set of invariantstart instructions that we need to remove.
2954 // Use this to avoid invalidating the instruction iterator.
2958 // invariant.start on memory location implies that the referenced memory
2959 // location is constant and unchanging. This is no longer true after
2960 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2961 // which frees the entire heap and the presence of invariant.start allows
2962 // the optimizer to sink the load of a memory location past a statepoint,
2963 // which is incorrect.
2965 if (
II->getIntrinsicID() == Intrinsic::invariant_start) {
2970 if (
MDNode *Tag =
I.getMetadata(LLVMContext::MD_tbaa)) {
2971 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag);
2972 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2979 for (
int i = 0, e =
Call->arg_size(); i != e; i++)
2981 Call->removeParamAttrs(i, R);
2983 Call->removeRetAttrs(R);
2987 // Delete the invariant.start instructions and RAUW poison.
2988 for (
auto *
II : InvariantStartInstructions) {
2990 II->eraseFromParent();
2994/// Looks up the GC strategy for a given function, returning null if the
2995/// function doesn't have a GC tag. The strategy is stored in the cache.
3003/// Returns true if this function should be rewritten by this pass. The main
3004/// point of this function is as an extension point for custom logic.
3011 assert(Strategy &&
"GC strategy is required by function, but was not found");
3013 return Strategy->useRS4GC();
3031 assert(!
F.isDeclaration() && !
F.empty() &&
3032 "need function body to rewrite statepoints in");
3042 // Normally it's up to the frontend to make sure that non-leaf calls also
3043 // have proper deopt state if it is required. We make an exception for
3044 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3045 // these are non-leaf by default. They might be generated by the optimizer
3046 // which doesn't know how to produce a proper deopt state. So if we see a
3047 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3048 // copy and don't produce a statepoint.
3052 "Don't expect any other calls here!");
3060 // Delete any unreachable statepoints so that we don't have unrewritten
3061 // statepoints surviving this pass. This makes testing easier and the
3062 // resulting IR less confusing to human readers.
3065 // Flush the Dominator Tree.
3068 // Gather all the statepoints which need rewritten. Be careful to only
3069 // consider those in reachable code since we need to ask dominance queries
3070 // when rewriting. We'll delete the unreachable ones in a moment.
3074 // TODO: only the ones with the flag set!
3075 if (NeedsRewrite(
I)) {
3076 // NOTE removeUnreachableBlocks() is stronger than
3077 // DominatorTree::isReachableFromEntry(). In other words
3078 // removeUnreachableBlocks can remove some blocks for which
3079 // isReachableFromEntry() returns true.
3081 "no unreachable blocks expected");
3085 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base ||
3086 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset)
3090 // Return early if no work to do.
3091 if (ParsePointNeeded.
empty() && Intrinsics.
empty())
3094 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3095 // These are created by LCSSA. They have the effect of increasing the size
3096 // of liveness sets for no good reason. It may be harder to do this post
3097 // insertion since relocations and base phis can confuse things.
3099 if (BB.getUniquePredecessor())
3102 // Before we start introducing relocations, we want to tweak the IR a bit to
3103 // avoid unfortunate code generation effects. The main example is that we
3104 // want to try to make sure the comparison feeding a branch is after any
3105 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3106 // values feeding a branch after relocation. This is semantically correct,
3107 // but results in extra register pressure since both the pre-relocation and
3108 // post-relocation copies must be available in registers. For code without
3109 // relocations this is handled elsewhere, but teaching the scheduler to
3110 // reverse the transform we're about to do would be slightly complex.
3111 // Note: This may extend the live range of the inputs to the icmp and thus
3112 // increase the liveset of any statepoint we move over. This is profitable
3113 // as long as all statepoints are in rare blocks. If we had in-register
3114 // lowering for live values this would be a much safer transform.
3117 if (BI->isConditional())
3119 // TODO: Extend this to handle switches
3124 if (
auto *
Cond = getConditionInst(TI))
3125 // TODO: Handle more than just ICmps here. We should be able to move
3126 // most instructions without side effects or memory access.
3133 // Nasty workaround - The base computation code in the main algorithm doesn't
3134 // consider the fact that a GEP can be used to convert a scalar to a vector.
3135 // The right fix for this is to integrate GEPs into the base rewriting
3136 // algorithm properly, this is just a short term workaround to prevent
3137 // crashes by canonicalizing such GEPs into fully vector GEPs.
3143 for (
unsigned i = 0; i <
I.getNumOperands(); i++)
3150 // It's the vector to scalar traversal through the pointer operand which
3151 // confuses base pointer rewriting, so limit ourselves to that case.
3152 if (!
I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
3154 auto *
Splat =
B.CreateVectorSplat(VF,
I.getOperand(0));
3160 // Cache the 'defining value' relation used in the computation and
3161 // insertion of base phis and selects. This ensures that we don't insert
3162 // large numbers of duplicate base_phis. Use one cache for both
3163 // inlineGetBaseAndOffset() and insertParsePoints().
3164 DefiningValueMapTy DVCache;
3166 // Mapping between a base values and a flag indicating whether it's a known
3168 IsKnownBaseMapTy KnownBases;
3170 if (!Intrinsics.
empty())
3171 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3175 if (!ParsePointNeeded.
empty())
3182// liveness computation via standard dataflow
3183// -------------------------------------------------------------------
3185// TODO: Consider using bitvectors for liveness, the set of potentially
3186// interesting values should be small and easy to pre-compute.
3188/// Compute the live-in set for the location rbegin starting from
3189/// the live-out set of the basic block
3194 // KILL/Def - Remove this definition from LiveIn
3197 // Don't consider *uses* in PHI nodes, we handle their contribution to
3198 // predecessor blocks when we seed the LiveOut sets
3202 // USE - Add to the LiveIn set for this instruction
3203 for (
Value *V :
I.operands()) {
3205 "support for FCA unimplemented");
3207 // The choice to exclude all things constant here is slightly subtle.
3208 // There are two independent reasons:
3209 // - We assume that things which are constant (from LLVM's definition)
3210 // do not move at runtime. For example, the address of a global
3211 // variable is fixed, even though it's contents may not be.
3212 // - Second, we can't disallow arbitrary inttoptr constants even
3213 // if the language frontend does. Optimization passes are free to
3214 // locally exploit facts without respect to global reachability. This
3215 // can create sections of code which are dynamically unreachable and
3216 // contain just about anything. (see constants.ll in tests)
3226 for (
auto &
I : *Succ) {
3233 "support for FCA unimplemented");
3249/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3250/// validation check for the liveness computation.
3253 for (
Value *V : Live) {
3255 // The terminator can be a member of the LiveOut set. LLVM's definition
3256 // of instruction dominance states that V does not dominate itself. As
3257 // such, we need to special case this to allow it.
3258 if (TermOkay && TI ==
I)
3261 "basic SSA liveness expectation violated by liveness analysis");
3266/// Check that all the liveness sets used during the computation of liveness
3267/// obey basic SSA properties. This is useful for finding cases where we miss
3281 // Seed the liveness for each individual block
3284 auto &LiveSet =
Data.LiveSet[&BB];
3290 assert(!
Data.LiveSet[&BB].count(Kill) &&
"live set contains kill");
3295 auto &In =
Data.LiveIn[&BB] =
Data.LiveSet[&BB];
3297 In.set_subtract(
Data.KillSet[&BB]);
3302 // Propagate that liveness until stable
3303 while (!Worklist.
empty()) {
3306 // Compute our new liveout set, then exit early if it hasn't changed despite
3307 // the contribution of our successor.
3309 const auto OldLiveOutSize = LiveOut.
size();
3314 // assert OutLiveOut is a subset of LiveOut
3315 if (OldLiveOutSize == LiveOut.
size()) {
3316 // If the sets are the same size, then we didn't actually add anything
3317 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3322 // Apply the effects of this basic block
3329 // assert: LiveIn is a subset of LiveTmp
3330 if (LiveIn.
size() != LiveTmp.
size()) {
3331 LiveIn = std::move(LiveTmp);
3334 }
// while (!Worklist.empty())
3337 // Verify our output against SSA properties. This helps catch any
3338 // missing kills during the above iteration.
3348 // Note: The copy is intentional and required
3352 // We want to handle the statepoint itself oddly. It's
3353 // call result is not live (normal), nor are it's arguments
3354 // (unless they're used again later). This adjustment is
3355 // specifically what we need to relocate
3359 Out.insert_range(LiveOut);
3364 PartiallyConstructedSafepointRecord &
Info,
3365 PointerToBaseTy &PointerToBase,
3367 StatepointLiveSetTy Updated;
3370 // We may have base pointers which are now live that weren't before. We need
3371 // to update the PointerToBase structure to reflect this.
3372 for (
auto *V : Updated)
3373 PointerToBase.insert({ V, V });
3375 Info.LiveSet = Updated;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static void makeStatepointExplicitImpl(CallBase *Call, const SmallVectorImpl< Value * > &BasePtrs, const SmallVectorImpl< Value * > &LiveVariables, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static void rematerializeLiveValues(CallBase *Call, PartiallyConstructedSafepointRecord &Info, PointerToBaseTy &PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static void findRematerializationCandidates(PointerToBaseTy PointerToBase, RematCandTy &RematerizationCandidates, TargetTransformInfo &TTI)
static std::unique_ptr< GCStrategy > findGCStrategy(Function &F)
Looks up the GC strategy for a given function, returning null if the function doesn't have a GC tag.
static void stripNonValidDataFromBody(Function &F)
static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases)
Returns true if V is a known base.
static Value * findBasePointer(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
For a given value or instruction, figure out what base ptr its derived from.
static cl::opt< bool, true > ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden)
static void insertRelocationStores(iterator_range< Value::user_iterator > GCRelocs, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static BasicBlock * normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, DominatorTree &DT)
static void analyzeParsePointLiveness(DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &Result, GCStrategy *GC)
static void computeLiveOutSeed(BasicBlock *BB, SetVector< Value * > &LiveTmp, GCStrategy *GC)
static void relocationViaAlloca(Function &F, DominatorTree &DT, ArrayRef< Value * > Live, ArrayRef< PartiallyConstructedSafepointRecord > Records)
Do all the relocation update via allocas and mem2reg.
static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi)
static cl::opt< unsigned > RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, cl::init(6))
static Value * findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base pointer for this value if known.
static Value * findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Returns the base defining value for this value.
static void insertUseHolderAfter(CallBase *Call, const ArrayRef< Value * > Values, SmallVectorImpl< CallInst * > &Holders)
Insert holders so that each Value is obviously live through the entire lifetime of the call.
static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic, AttributeList StatepointAL)
static void insertRematerializationStores(const RematerializedValueMapTy &RematerializedValues, DenseMap< Value *, AllocaInst * > &AllocaMap, DenseSet< Value * > &VisitedLiveValues)
static bool insertParsePoints(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, SmallVectorImpl< CallBase * > &ToUpdate, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static void findBasePointers(const StatepointLiveSetTy &live, PointerToBaseTy &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static bool shouldRewriteStatepointsIn(Function &F)
Returns true if this function should be rewritten by this pass.
static cl::opt< bool > RematDerivedAtUses("rs4gc-remat-derived-at-uses", cl::Hidden, cl::init(true))
static ArrayRef< Use > GetDeoptBundleOperands(const CallBase *Call)
static Instruction * rematerializeChain(ArrayRef< Instruction * > ChainToBase, BasicBlock::iterator InsertBefore, Value *RootOfChain, Value *AlternateLiveBase)
static void stripNonValidAttributesFromPrototype(Function &F)
static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, StatepointLiveSetTy &out, GCStrategy *GC)
Given results from the dataflow liveness computation, find the set of live Values at a particular ins...
static void computeLiveInValues(DominatorTree &DT, Function &F, GCPtrLivenessData &Data, GCStrategy *GC)
Compute the live-in set for every basic block in the function.
static void stripInvalidMetadataFromInstruction(Instruction &I)
Certain metadata on instructions are invalid after running RS4GC.
static constexpr Attribute::AttrKind FnAttrsToStrip[]
static bool areBothVectorOrScalar(Value *First, Value *Second)
static void rematerializeLiveValuesAtUses(RematCandTy &RematerizationCandidates, MutableArrayRef< PartiallyConstructedSafepointRecord > Records, PointerToBaseTy &PointerToBase)
static bool isHandledGCPointerType(Type *T, GCStrategy *GC)
static Value * findRematerializableChainToBasePointer(SmallVectorImpl< Instruction * > &ChainToBase, Value *CurrentValue)
static cl::opt< bool > PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, cl::init(false))
static Value * findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Return a base defining value for the 'Index' element of the given vector instruction 'I'.
static void stripNonValidData(Module &M)
The IR fed into RewriteStatepointsForGC may have had attributes and metadata implying dereferenceabil...
static InstructionCost chainToBasePointerCost(SmallVectorImpl< Instruction * > &Chain, TargetTransformInfo &TTI)
static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC)
static SetVector< Value * > computeKillSet(BasicBlock *BB, GCStrategy *GC)
static bool ClobberNonLive
static cl::opt< bool > PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false))
static bool isOriginalBaseResult(Value *V)
This value is a base pointer that is not generated by RS4GC, i.e.
static cl::opt< bool > PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false))
static void setKnownBase(Value *V, bool IsKnownBase, IsKnownBaseMapTy &KnownBases)
Caches the IsKnownBase flag for a value and asserts that it wasn't present in the cache before.
static cl::opt< bool > AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true))
static void makeStatepointExplicit(DominatorTree &DT, CallBase *Call, PartiallyConstructedSafepointRecord &Result, std::vector< DeferredReplacement > &Replacements, const PointerToBaseTy &PointerToBase, GCStrategy *GC)
static std::string suffixed_name_or(Value *V, StringRef Suffix, StringRef DefaultName)
static void CreateGCRelocates(ArrayRef< Value * > LiveVariables, ArrayRef< Value * > BasePtrs, Instruction *StatepointToken, IRBuilder<> &Builder, GCStrategy *GC)
Helper function to place all gc relocates necessary for the given statepoint.
static void checkBasicSSA(DominatorTree &DT, SetVector< Value * > &Live, Instruction *TI, bool TermOkay=false)
Check that the items in 'Live' dominate 'TI'.
static StringRef getDeoptLowering(CallBase *Call)
static void findLiveReferences(Function &F, DominatorTree &DT, ArrayRef< CallBase * > toUpdate, MutableArrayRef< struct PartiallyConstructedSafepointRecord > records, GCStrategy *GC)
static AttributeMask getParamAndReturnAttributesToRemove()
static bool inlineGetBaseAndOffset(Function &F, SmallVectorImpl< CallInst * > &Intrinsics, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases)
static Value * findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, IsKnownBaseMapTy &KnownBases)
Helper function for findBasePointer - Will return a value which either a) defines the base pointer fo...
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, CallBase *Call, PartiallyConstructedSafepointRecord &result, PointerToBaseTy &PointerToBase, GCStrategy *GC)
Given an updated version of the dataflow liveness results, update the liveset and base pointer maps f...
static unsigned getNumElements(Type *Ty)
verify safepoint Safepoint IR static false bool isGCPointerType(Type *T)
static bool containsGCPtrType(Type *Ty)
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Value handle that asserts if the Value is deleted.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
Functions, function parameters, and return types can have attributes to indicate how they should be t...
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI const LandingPadInst * getLandingPadInst() const
Return the landingpad instruction associated with the landing pad.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setAttributes(AttributeList A)
Set the attributes for this call.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Represents calls to the gc.relocate intrinsic.
LLVM_ABI Value * getDerivedPtr() const
Represents a gc.statepoint intrinsic call.
GCStrategy describes a garbage collector algorithm's code generation requirements,...
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
iterator find(const KeyT &Key)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
A Module instance is used to store all the information related to an LLVM module.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
ArrayRef< value_type > getArrayRef() const
bool remove(const value_type &X)
Remove an item from the set vector.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
bool set_union(const STy &S)
Compute This := This u S, return whether 'This' changed.
bool empty() const
Determine if the SetVector is empty or not.
void set_subtract(const STy &S)
Compute This := This - B TODO: We should be able to use set_subtract from SetOperations....
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
Class to represent struct types.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
iterator_range< value_op_iterator > operand_values()
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI unsigned getNumUses() const
This method computes the number of uses of this Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeList AS)
Parse out statepoint directives from the function attributes present in AS.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
DWARFExpression::Operation Op
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ DeoptLiveIn
Mark the deopt arguments associated with the statepoint as only being "live-in".
@ GCTransition
Indicates that this statepoint is a transition from GC-aware code to code that is not GC-aware.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::unique_ptr< GCStrategy > getGCStrategy(const StringRef Name)
Lookup the GCStrategy object associated with the given gc name.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
bool isStatepointDirectiveAttr(Attribute Attr)
Return true if the Attr is an attribute that is a statepoint directive.
LLVM_ABI bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Remove all blocks that can not be reached from the function's entry.
LLVM_ABI bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI)
Return true if this call calls a gc leaf function.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, const TargetLibraryInfo &)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Call sites that get wrapped by a gc.statepoint (currently only in RewriteStatepointsForGC and potenti...
std::optional< uint32_t > NumPatchBytes
std::optional< uint64_t > StatepointID
static const uint64_t DefaultStatepointID