1//===- FunctionSpecialization.cpp - Function Specialization ---------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
27 #define DEBUG_TYPE "function-specialization"
29 STATISTIC(NumSpecsCreated,
"Number of specializations created");
36 "Force function specialization for every call site with a constant "
41 "The maximum number of clones allowed for a single function "
47 cl::desc(
"The maximum number of iterations allowed "
48 "when searching for transitive "
53 cl::desc(
"The maximum number of incoming values a PHI node can have to be "
54 "considered during the specialization bonus estimation"));
58 "The maximum number of predecessors a basic block can have to be "
59 "considered during the estimation of dead code"));
63 cl::desc(
"Don't specialize functions that have less than this number of "
68 "Maximum codesize growth allowed per function"));
72 cl::desc(
"Reject specializations whose codesize savings are less than this "
73 "much percent of the original function size"));
77 cl::desc(
"Reject specializations whose latency savings are less than this "
78 "much percent of the original function size"));
82 cl::desc(
"Reject specializations whose inlining bonus is less than this "
83 "much percent of the original function size"));
87 "Enable function specialization on the address of global values"));
92 "Enable specialization of functions that take a literal constant as an "
97}
// end namespace llvm
99bool InstCostVisitor::canEliminateSuccessor(
BasicBlock *BB,
108// Estimates the codesize savings due to dead code after constant propagation.
109// \p WorkList represents the basic blocks of a specialization which will
110// eventually become dead once we replace instructions that are known to be
111// constants. The successors of such blocks are added to the list as long as
112// the \p Solver found they were executable prior to specialization, and only
113// if all their predecessors are dead.
114Cost InstCostVisitor::estimateBasicBlocks(
117 // Accumulate the codesize savings of each basic block.
118 while (!WorkList.
empty()) {
121 // These blocks are considered dead as far as the InstCostVisitor
122 // is concerned. They haven't been proven dead yet by the Solver,
123 // but may become if we propagate the specialization arguments.
124 assert(Solver.isBlockExecutable(BB) &&
"BB already found dead by IPSCCP!");
125 if (!DeadBlocks.insert(BB).second)
128 for (Instruction &
I : *BB) {
129 // If it's a known constant we have already accounted for it.
130 if (KnownConstants.contains(&
I))
136 <<
" for user " <<
I <<
"\n");
140 // Keep adding dead successors to the list as long as they are
141 // executable and only reachable from dead blocks.
152 if (
auto *
C = Solver.getConstantOrNull(V))
154 return KnownConstants.lookup(V);
159 while (!PendingPHIs.empty()) {
161 // The pending PHIs could have been proven dead by now.
163 CodeSize += getCodeSizeSavingsForUser(Phi);
168/// Compute the codesize savings for replacing argument \p A with constant \p C.
170 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Analysing bonus for constant: "
171 <<
C->getNameOrAsOperand() <<
"\n");
173 for (
auto *U :
A->users())
176 CodeSize += getCodeSizeSavingsForUser(UI,
A,
C);
178 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Accumulated bonus {CodeSize = "
179 <<
CodeSize <<
"} for argument " << *
A <<
"\n");
183/// Compute the latency savings from replacing all arguments with constants for
184/// a specialization candidate. As this function computes the latency savings
185/// for all Instructions in KnownConstants at once, it should be called only
186/// after every instruction has been visited, i.e. after:
188/// * getCodeSizeSavingsForArg has been run for every constant argument of a
189/// specialization candidate
191/// * getCodeSizeSavingsFromPendingPHIs has been run
193/// to ensure that the latency savings are calculated for all Instructions we
194/// have visited and found to be constant.
196 auto &BFI = GetBFI(*F);
197 Cost TotalLatency = 0;
199 for (
auto Pair : KnownConstants) {
204 uint64_t Weight = BFI.getBlockFreq(
I->getParent()).getFrequency() /
205 BFI.getEntryFreq().getFrequency();
211 <<
"} for instruction " << *
I <<
"\n");
221 // We have already propagated a constant for this user.
225 // Cache the iterator before visiting.
227 : KnownConstants.end();
240 // Even though it doesn't make sense to bind switch and branch instructions
241 // with a constant, unlike any other instruction type, it prevents estimating
242 // their bonus multiple times.
243 KnownConstants.insert({
User,
C});
248 <<
"} for user " << *User <<
"\n");
250 for (
auto *U :
User->users())
253 CodeSize += getCodeSizeSavingsForUser(UI, User,
C);
259 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
261 if (
I.getCondition() != LastVisited->first)
268 BasicBlock *Succ =
I.findCaseValue(
C)->getCaseSuccessor();
269 // Initialize the worklist with the dead basic blocks. These are the
270 // destination labels which are different from the one corresponding
271 // to \p C. They should be executable and have a unique predecessor.
273 for (
const auto &Case :
I.cases()) {
276 canEliminateSuccessor(
I.getParent(), BB))
280 return estimateBasicBlocks(WorkList);
284 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
286 if (
I.getCondition() != LastVisited->first)
289 BasicBlock *Succ =
I.getSuccessor(LastVisited->second->isOneValue());
290 // Initialize the worklist with the dead successor as long as
291 // it is executable and has a unique predecessor.
296 return estimateBasicBlocks(WorkList);
299bool InstCostVisitor::discoverTransitivelyIncomingValues(
306 while (!WorkList.
empty()) {
313 if (!TransitivePHIs.
insert(PN).second)
319 // Disregard self-references and dead incoming values.
324 if (Constant *
C = findConstantFor(V)) {
325 // Not all incoming values are the same constant. Bail immediately.
336 // We can't reason about anything else.
347 bool Inserted = VisitedPHIs.insert(&
I).second;
349 bool HaveSeenIncomingPHI =
false;
351 for (
unsigned Idx = 0,
E =
I.getNumIncomingValues(); Idx !=
E; ++Idx) {
352 Value *
V =
I.getIncomingValue(Idx);
354 // Disregard self-references and dead incoming values.
359 if (Constant *
C = findConstantFor(V)) {
362 // Not all incoming values are the same constant. Bail immediately.
369 // First time we are seeing this phi. We will retry later, after
370 // all the constant arguments have been propagated. Bail for now.
371 PendingPHIs.push_back(&
I);
376 // Perhaps it is a Transitive Phi. We will confirm later.
377 HaveSeenIncomingPHI =
true;
381 // We can't reason about anything else.
388 if (!HaveSeenIncomingPHI)
391 DenseSet<PHINode *> TransitivePHIs;
392 if (!discoverTransitivelyIncomingValues(Const, &
I, TransitivePHIs))
399 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
402 return LastVisited->second;
407 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
414 Operands.
reserve(
I.getNumOperands());
416 for (
unsigned Idx = 0,
E =
I.getNumOperands() - 1; Idx !=
E; ++Idx) {
431 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
440 Operands.
reserve(
I.getNumOperands());
442 for (
unsigned Idx = 0,
E =
I.getNumOperands(); Idx !=
E; ++Idx) {
455 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
457 if (
I.getCondition() == LastVisited->first) {
458 Value *
V = LastVisited->second->isZeroValue() ?
I.getFalseValue()
460 return findConstantFor(V);
462 if (Constant *Condition = findConstantFor(
I.getCondition()))
463 if ((
I.getTrueValue() == LastVisited->first && Condition->isOneValue()) ||
464 (
I.getFalseValue() == LastVisited->first && Condition->isZeroValue()))
465 return LastVisited->second;
475 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
478 bool ConstOnRHS =
I.getOperand(1) == LastVisited->first;
479 Value *
V = ConstOnRHS ?
I.getOperand(0) :
I.getOperand(1);
488 // If we haven't found Other to be a specific constant value, we may still be
489 // able to constant fold using information from the lattice value.
491 const ValueLatticeElement &OtherLV = Solver.getLatticeValueFor(V);
492 auto &V1State = ConstOnRHS ? OtherLV : ConstLV;
493 auto &V2State = ConstOnRHS ? ConstLV : OtherLV;
494 return V1State.
getCompare(
I.getPredicate(),
I.getType(), V2State, DL);
498 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
504 assert(LastVisited != KnownConstants.end() &&
"Invalid iterator!");
506 bool ConstOnRHS =
I.getOperand(1) == LastVisited->first;
507 Value *
V = ConstOnRHS ?
I.getOperand(0) :
I.getOperand(1);
510 Value *ConstVal = LastVisited->second;
516 simplifyBinOp(
I.getOpcode(), ConstVal, OtherVal, SimplifyQuery(DL)));
521 Value *StoreValue =
nullptr;
522 for (
auto *User : Alloca->
users()) {
523 // We can't use llvm::isAllocaPromotable() as that would fail because of
524 // the usage in the CallInst, which is what we check here.
529 // This is a duplicate store, bail out.
530 if (StoreValue ||
Store->isVolatile())
532 StoreValue =
Store->getValueOperand();
535 // Bail if there is any other unknown usage.
542 return getCandidateConstant(StoreValue);
545// A constant stack value is an AllocaInst that has a single constant
546// value stored to it. Return this constant if such an alloca stack value
547// is a function argument.
558 return getPromotableAlloca(Alloca,
Call);
561// To support specializing recursive functions, it is important to propagate
562// constant arguments because after a first iteration of specialisation, a
563// reduced example may look like this:
565// define internal void @RecursiveFn(i32* arg1) {
566// %temp = alloca i32, align 4
567// store i32 2 i32* %temp, align 4
568// call void @RecursiveFn.1(i32* nonnull %temp)
572// Before a next iteration, we need to propagate the constant like so
573// which allows further specialization in next iterations.
575// @funcspec.arg = internal constant i32 2
577// define internal void @someFunc(i32* arg1) {
578// call void @otherFunc(i32* nonnull @funcspec.arg)
582// See if there are any new constant values for the callers of \p F via
583// stack variables and promote them to global variables.
584void FunctionSpecializer::promoteConstantStackValues(
Function *
F) {
585 for (User *U :
F->users()) {
602 auto *ConstVal = getConstantStackValue(
Call, ArgOp);
606 Value *GV =
new GlobalVariable(M, ConstVal->
getType(),
true,
608 "specialized.arg." + Twine(++NGlobals));
614// The SCCP solver inserts bitcasts for PredicateInfo. These interfere with the
615// promoteConstantStackValues() optimization.
620 if (!BC || BC->getType() != BC->getOperand(0)->getType())
622 Inst.replaceAllUsesWith(BC->getOperand(0));
623 Inst.eraseFromParent();
628/// Remove any ssa_copy intrinsics that may have been introduced.
629void FunctionSpecializer::cleanUpSSA() {
630 for (Function *
F : Specializations)
651 if (NumSpecsCreated > 0)
652 dbgs() <<
"FnSpecialization: Created " << NumSpecsCreated
653 <<
" specializations in module " << M.getName() <<
"\n");
654 // Eliminate dead code.
655 removeDeadFunctions();
659/// Get the unsigned Value of given Cost object. Assumes the Cost is always
660/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
663 int64_t
Value =
C.getValue();
665 assert(
Value >= 0 &&
"CodeSize and Latency cannot be negative");
666 // It is safe to down cast since we know the arguments cannot be negative and
667 // Cost is of type int64_t.
668 return static_cast<unsigned>(
Value);
671/// Attempt to specialize functions in the module to enable constant
672/// propagation across function boundaries.
674/// \returns true if at least one function is specialized.
676 // Find possible specializations for each function.
679 unsigned NumCandidates = 0;
681 if (!isCandidateFunction(&
F))
684 auto [It, Inserted] = FunctionMetrics.try_emplace(&
F);
686 //Analyze the function.
691 Metrics.analyzeBasicBlock(&BB, GetTTI(
F), EphValues);
694 // When specializing literal constants is enabled, always require functions
695 // to be larger than MinFunctionSize, to prevent excessive specialization.
696 const bool RequireMinSize =
700 // If the code metrics reveal that we shouldn't duplicate the function,
701 // or if the code size implies that this function is easy to get inlined,
702 // then we shouldn't specialize it.
707 // When specialization on literal constants is disabled, only consider
708 // recursive functions when running multiple times to save wasted analysis,
709 // as we will not be able to specialize on any newly found literal constant
714 int64_t Sz =
Metrics.NumInsts.getValue();
715 assert(Sz > 0 &&
"CodeSize should be positive");
716 // It is safe to down cast from int64_t, NumInsts is always positive.
717 unsigned FuncSize =
static_cast<unsigned>(Sz);
720 <<
F.getName() <<
" is " << FuncSize <<
"\n");
722 if (Inserted &&
Metrics.isRecursive)
723 promoteConstantStackValues(&
F);
725 if (!findSpecializations(&
F, FuncSize, AllSpecs, SM)) {
727 dbgs() <<
"FnSpecialization: No possible specializations found for "
728 <<
F.getName() <<
"\n");
735 if (!NumCandidates) {
738 <<
"FnSpecialization: No possible specializations found in module\n");
742 // Choose the most profitable specialisations, which fit in the module
743 // specialization budget, which is derived from maximum number of
744 // specializations per specialization candidate function.
745 auto CompareScore = [&AllSpecs](
unsigned I,
unsigned J) {
746 if (AllSpecs[
I].Score != AllSpecs[J].Score)
747 return AllSpecs[
I].Score > AllSpecs[J].Score;
750 const unsigned NSpecs =
751 std::min(NumCandidates *
MaxClones,
unsigned(AllSpecs.
size()));
753 std::iota(BestSpecs.
begin(), BestSpecs.
begin() + NSpecs, 0);
754 if (AllSpecs.
size() > NSpecs) {
755 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Number of candidates exceed "
756 <<
"the maximum number of clones threshold.\n"
757 <<
"FnSpecialization: Specializing the "
759 <<
" most profitable candidates.\n");
760 std::make_heap(BestSpecs.
begin(), BestSpecs.
begin() + NSpecs, CompareScore);
761 for (
unsigned I = NSpecs,
N = AllSpecs.
size();
I <
N; ++
I) {
762 BestSpecs[NSpecs] =
I;
763 std::push_heap(BestSpecs.
begin(), BestSpecs.
end(), CompareScore);
764 std::pop_heap(BestSpecs.
begin(), BestSpecs.
end(), CompareScore);
768 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: List of specializations \n";
769 for (
unsigned I = 0;
I < NSpecs; ++
I) {
770 const Spec &S = AllSpecs[BestSpecs[
I]];
771 dbgs() <<
"FnSpecialization: Function " << S.
F->
getName()
772 <<
" , score " << S.
Score <<
"\n";
774 dbgs() <<
"FnSpecialization: FormalArg = "
780 // Create the chosen specializations.
783 for (
unsigned I = 0;
I < NSpecs; ++
I) {
784 Spec &S = AllSpecs[BestSpecs[
I]];
786 // Accumulate the codesize growth for the function, now we are creating the
790 S.
Clone = createSpecialization(S.
F, S.
Sig);
792 // Update the known call sites to call the clone.
796 <<
" to call " << Clone->
getName() <<
"\n");
798 auto &BFI = GetBFI(*
Call->getFunction());
799 std::optional<uint64_t>
Count =
800 BFI.getBlockProfileCount(
Call->getParent());
802 std::optional<llvm::Function::ProfileCount> MaybeCloneCount =
804 if (MaybeCloneCount) {
807 if (std::optional<llvm::Function::ProfileCount> MaybeOriginalCount =
809 uint64_t OriginalCount = MaybeOriginalCount->getCount();
810 if (OriginalCount >= *
Count) {
813 // This should generally not happen as that would mean there are
814 // more computed calls to the function than what was recorded.
823 OriginalFuncs.insert(S.
F);
826 Solver.solveWhileResolvedUndefsIn(Clones);
828 // Update the rest of the call sites - these are the recursive calls, calls
829 // to discarded specialisations and calls that may match a specialisation
830 // after the solver runs.
832 auto [Begin, End] = SM[
F];
833 updateCallSites(
F, AllSpecs.
begin() + Begin, AllSpecs.
begin() + End);
837 if (
F->getReturnType()->isVoidTy())
839 if (
F->getReturnType()->isStructTy()) {
841 if (!Solver.isStructLatticeConstant(
F, STy))
844 auto It = Solver.getTrackedRetVals().find(
F);
845 assert(It != Solver.getTrackedRetVals().end() &&
846 "Return value ought to be tracked");
850 for (
User *U :
F->users()) {
852 //The user instruction does not call our function.
853 if (CS->getCalledFunction() !=
F)
855 Solver.resetLatticeValueFor(CS);
860 // Rerun the solver to notify the users of the modified callsites.
861 Solver.solveWhileResolvedUndefs();
864 if (FunctionMetrics[
F].isRecursive)
865 promoteConstantStackValues(
F);
870void FunctionSpecializer::removeDeadFunctions() {
873 <<
F->getName() <<
"\n");
875 FAM->clear(*
F,
F->getName());
877 // Remove all the callsites that were proven unreachable once, and replace
881 "User of dead function must be call or invoke");
886 F->eraseFromParent();
888 DeadFunctions.clear();
891/// Clone the function \p F and remove the ssa_copy intrinsics added by
892/// the SCCPSolver in the cloned version.
896 Clone->
setName(
F->getName() +
".specialized." +
Twine(NSpecs));
901bool FunctionSpecializer::findSpecializations(
Function *
F,
unsigned FuncSize,
904 // A mapping from a specialisation signature to the index of the respective
905 // entry in the all specialisation array. Used to ensure uniqueness of
907 DenseMap<SpecSig, unsigned> UniqueSpecs;
909 // Get a list of interesting arguments.
911 for (Argument &Arg :
F->args())
912 if (isArgumentInteresting(&Arg))
913 Args.push_back(&Arg);
918 for (User *U :
F->users()) {
923 // The user instruction does not call our function.
924 if (CS.getCalledFunction() !=
F)
927 // If the call site has attribute minsize set, that callsite won't be
929 if (CS.hasFnAttr(Attribute::MinSize))
932 // If the parent of the call site will never be executed, we don't need
933 // to worry about the passed value.
934 if (!Solver.isBlockExecutable(CS.
getParent()))
937 // Examine arguments and create a specialisation candidate from the
938 // constant operands of this call site.
940 for (Argument *
A : Args) {
941 Constant *
C = getCandidateConstant(CS.getArgOperand(
A->getArgNo()));
944 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Found interesting argument "
945 <<
A->getName() <<
" : " <<
C->getNameOrAsOperand()
953 // Check if we have encountered the same specialisation already.
954 if (
auto It = UniqueSpecs.
find(S); It != UniqueSpecs.
end()) {
955 // Existing specialisation. Add the call to the list to rewrite, unless
956 // it's a recursive call. A specialisation, generated because of a
957 // recursive call may end up as not the best specialisation for all
958 // the cloned instances of this call, which result from specialising
959 // functions. Hence we don't rewrite the call directly, but match it with
960 // the best specialisation once all specialisations are known.
963 const unsigned Index = It->second;
966 // Calculate the specialisation gain.
970 for (ArgInfo &
A : S.
Args) {
972 Score += getInliningBonus(
A.Formal,
A.Actual);
977 unsigned SpecSize = FuncSize - CodeSizeSavings;
979 auto IsProfitable = [&]() ->
bool {
980 // No check required.
985 dbgs() <<
"FnSpecialization: Specialization bonus {Inlining = "
986 << Score <<
" (" << (Score * 100 / FuncSize) <<
"%)}\n");
988 // Minimum inlining bonus.
993 dbgs() <<
"FnSpecialization: Specialization bonus {CodeSize = "
994 << CodeSizeSavings <<
" ("
995 << (CodeSizeSavings * 100 / FuncSize) <<
"%)}\n");
997 // Minimum codesize savings.
1001 // Lazily compute the Latency, to avoid unnecessarily computing BFI.
1002 unsigned LatencySavings =
1006 dbgs() <<
"FnSpecialization: Specialization bonus {Latency = "
1007 << LatencySavings <<
" ("
1008 << (LatencySavings * 100 / FuncSize) <<
"%)}\n");
1010 // Minimum latency savings.
1013 // Maximum codesize growth.
1017 Score += std::max(CodeSizeSavings, LatencySavings);
1021 // Discard unprofitable specialisations.
1022 if (!IsProfitable())
1025 // Create a new specialisation entry.
1028 Spec.CallSites.push_back(&CS);
1029 const unsigned Index = AllSpecs.
size() - 1;
1030 UniqueSpecs[S] =
Index;
1031 if (
auto [It, Inserted] = SM.try_emplace(
F, Index, Index + 1); !Inserted)
1032 It->second.second =
Index + 1;
1036 return !UniqueSpecs.
empty();
1039bool FunctionSpecializer::isCandidateFunction(
Function *
F) {
1040 if (
F->isDeclaration() ||
F->arg_empty())
1043 if (
F->hasFnAttribute(Attribute::NoDuplicate))
1046 // Do not specialize the cloned function again.
1047 if (Specializations.contains(
F))
1050 // If we're optimizing the function for size, we shouldn't specialize it.
1054 // Exit if the function is not executable. There's no point in specializing
1056 if (!Solver.isBlockExecutable(&
F->getEntryBlock()))
1059 // It wastes time to specialize a function which would get inlined finally.
1060 if (
F->hasFnAttribute(Attribute::AlwaysInline))
1063 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Try function: " <<
F->getName()
1072 // The original function does not neccessarily have internal linkage, but the
1079 // Initialize the lattice state of the arguments of the function clone,
1080 // marking the argument on which we specialized the function constant
1081 // with the given value.
1082 Solver.setLatticeValueForSpecializationArguments(Clone, S.
Args);
1083 Solver.markBlockExecutable(&Clone->
front());
1084 Solver.addArgumentTrackedFunction(Clone);
1085 Solver.addTrackedFunction(Clone);
1087 // Mark all the specialized functions
1088 Specializations.insert(Clone);
1094/// Compute the inlining bonus for replacing argument \p A with constant \p C.
1095/// The below heuristic is only concerned with exposing inlining
1096/// opportunities via indirect call promotion. If the argument is not a
1097/// (potentially casted) function pointer, give up.
1100 if (!CalledFunction)
1103 // Get TTI for the called function (used for the inline cost).
1104 auto &CalleeTTI = (GetTTI)(*CalledFunction);
1106 // Look at all the call sites whose called value is the argument.
1107 // Specializing the function on the argument would allow these indirect
1108 // calls to be promoted to direct calls. If the indirect call promotion
1109 // would likely enable the called function to be inlined, specializing is a
1111 int InliningBonus = 0;
1112 for (User *U :
A->users()) {
1116 if (CS->getCalledOperand() !=
A)
1121 // Get the cost of inlining the called function at this call site. Note
1122 // that this is only an estimate. The called function may eventually
1123 // change in a way that leads to it not being inlined here, even though
1124 // inlining looks profitable now. For example, one of its called
1125 // functions may be inlined into it, making the called function too large
1126 // to be inlined into this call site.
1128 // We apply a boost for performing indirect call promotion by increasing
1129 // the default threshold by the threshold for indirect calls.
1133 getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
1135 // We clamp the bonus for this call to be between zero and the default
1138 InliningBonus += Params.DefaultThreshold;
1142 LLVM_DEBUG(
dbgs() <<
"FnSpecialization: Inlining bonus " << InliningBonus
1143 <<
" for user " << *U <<
"\n");
1146 return InliningBonus > 0 ?
static_cast<unsigned>(InliningBonus) : 0;
1149/// Determine if it is possible to specialise the function for constant values
1150/// of the formal parameter \p A.
1151bool FunctionSpecializer::isArgumentInteresting(
Argument *
A) {
1152 // No point in specialization if the argument is unused.
1153 if (
A->user_empty())
1156 Type *Ty =
A->getType();
1161 // SCCP solver does not record an argument that will be constructed on
1163 if (
A->hasByValAttr() && !
A->getParent()->onlyReadsMemory())
1166 // For non-argument-tracked functions every argument is overdefined.
1167 if (!Solver.isArgumentTrackedFunction(
A->getParent()))
1170 // Check the lattice value and decide if we should attemt to specialize,
1171 // based on this argument. No point in specialization, if the lattice value
1172 // is already a constant.
1175 : SCCPSolver::isOverdefined(Solver.getLatticeValueFor(
A));
1179 dbgs() <<
"FnSpecialization: Found interesting parameter "
1180 <<
A->getNameOrAsOperand() <<
"\n";
1182 dbgs() <<
"FnSpecialization: Nothing to do, parameter "
1183 <<
A->getNameOrAsOperand() <<
" is already constant\n";
1185 return IsOverdefined;
1188/// Check if the value \p V (an actual argument) is a constant or can only
1189/// have a constant value. Return that constant.
1190Constant *FunctionSpecializer::getCandidateConstant(
Value *V) {
1194 // Select for possible specialisation values that are constants or
1195 // are deduced to be constants or constant ranges with a single element.
1198 C = Solver.getConstantOrNull(V);
1200 // Don't specialize on (anything derived from) the address of a non-constant
1201 // global variable, unless explicitly enabled.
1202 if (
C &&
C->getType()->isPointerTy() && !
C->isNullValue())
1210void FunctionSpecializer::updateCallSites(
Function *
F,
const Spec *Begin,
1212 // Collect the call sites that need updating.
1214 for (User *U :
F->users())
1216 CS && CS->getCalledFunction() ==
F &&
1217 Solver.isBlockExecutable(CS->
getParent()))
1220 unsigned NCallsLeft = ToUpdate.
size();
1221 for (CallBase *CS : ToUpdate) {
1224 // Find the best matching specialisation.
1225 const Spec *BestSpec =
nullptr;
1226 for (
const Spec &S :
make_range(Begin, End)) {
1227 if (!S.Clone || (BestSpec && S.Score <= BestSpec->
Score))
1230 if (
any_of(S.Sig.
Args, [CS,
this](
const ArgInfo &Arg) {
1231 unsigned ArgNo = Arg.Formal->getArgNo();
1232 return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
1242 CS->setCalledFunction(BestSpec->
Clone);
1243 ShouldDecrementCount =
true;
1246 if (ShouldDecrementCount)
1250 // If the function has been completely specialized, the original function
1251 // is no longer needed. Mark it unreachable.
1252 // NOTE: If the address of a function is taken, we cannot treat it as dead
1254 if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(
F) &&
1255 !
F->hasAddressTaken()) {
1256 Solver.markFunctionUnreachable(
F);
1257 DeadFunctions.insert(
F);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Function * cloneCandidateFunction(Function *F, unsigned NSpecs)
Clone the function F and remove the ssa_copy intrinsics added by the SCCPSolver in the cloned version...
static void removeSSACopy(Function &F)
static unsigned getCostValue(const Cost &C)
Get the unsigned Value of given Cost object.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
FunctionAnalysisManager FAM
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool onlyReadsMemory(unsigned OpNo) const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
This is an important base class in LLVM.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
This class represents a freeze function that returns random concrete value if an operand is either a ...
LLVM_ABI ~FunctionSpecializer()
LLVM_ABI bool run()
Attempt to specialize functions in the module to enable constant propagation across function boundari...
InstCostVisitor getInstCostVisitorFor(Function *F)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & front() const
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
void setLinkage(LinkageTypes LT)
@ InternalLinkage
Rename collisions when linking (static functions).
int getCostDelta() const
Get the cost delta from the threshold for inlining.
LLVM_ABI Cost getLatencySavingsForKnownConstants()
Compute the latency savings from replacing all arguments with constants for a specialization candidat...
LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C)
Compute the codesize savings for replacing argument A with constant C.
LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs()
bool isBlockExecutable(BasicBlock *BB) const
void visit(Iterator Start, Iterator End)
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
An instruction for reading from memory.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static LLVM_ABI bool isOverdefined(const ValueLatticeElement &LV)
This class represents the LLVM 'select' instruction.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isPointerTy() const
True if this is an instance of PointerType.
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
LLVM_ABI Constant * getCompare(CmpInst::Predicate Pred, Type *Ty, const ValueLatticeElement &Other, const DataLayout &DL) const
true, false or undef constants, or nullptr if the comparison cannot be evaluated.
static ValueLatticeElement get(Constant *C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI std::string getNameOrAsOperand() const
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const int IndirectCallThreshold
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< unsigned > MinCodeSizeSavings("funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose codesize savings are less than this " "much percent of the original function size"))
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
static cl::opt< bool > SpecializeOnAddress("funcspec-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values"))
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
static cl::opt< unsigned > MaxIncomingPhiValues("funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden, cl::desc("The maximum number of incoming values a PHI node can have to be " "considered during the specialization bonus estimation"))
static cl::opt< bool > SpecializeLiteralConstant("funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant as an " "argument"))
DenseMap< Function *, std::pair< unsigned, unsigned > > SpecMap
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
static cl::opt< unsigned > MaxCodeSizeGrowth("funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc("Maximum codesize growth allowed per function"))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
static cl::opt< unsigned > MinLatencySavings("funcspec-min-latency-savings", cl::init(20), cl::Hidden, cl::desc("Reject specializations whose latency savings are less than this " "much percent of the original function size"))
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
static cl::opt< unsigned > MinFunctionSize("funcspec-min-function-size", cl::init(500), cl::Hidden, cl::desc("Don't specialize functions that have less than this number of " "instructions"))
static cl::opt< unsigned > MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100), cl::Hidden, cl::desc("The maximum number of iterations allowed " "when searching for transitive " "phis"))
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static cl::opt< unsigned > MaxClones("funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " "specialization"))
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
static cl::opt< bool > ForceSpecialization("force-specialization", cl::init(false), cl::Hidden, cl::desc("Force function specialization for every call site with a constant " "argument"))
static cl::opt< unsigned > MaxBlockPredecessors("funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc("The maximum number of predecessors a basic block can have to be " "considered during the estimation of dead code"))
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
static cl::opt< unsigned > MinInliningBonus("funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc("Reject specializations whose inlining bonus is less than this " "much percent of the original function size"))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct shared between Function Specialization and SCCP Solver.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
static unsigned getHashValue(const SpecSig &S)
static bool isEqual(const SpecSig &LHS, const SpecSig &RHS)
static SpecSig getEmptyKey()
static SpecSig getTombstoneKey()
An information struct used to provide DenseMap with the various necessary components for a given valu...
SmallVector< ArgInfo, 4 > Args
SmallVector< CallBase * > CallSites