1/*-------------------------------------------------------------------------
4 * Routines to compute clause selectivities
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/optimizer/path/clausesel.c
13 *-------------------------------------------------------------------------
23#include "utils/fmgroids.h"
28 * Data structure for accumulating info about possible range-query
29 * clause pairs in clauselist_selectivity.
34 Node *
var;
/* The common variable of the clauses */
50 bool use_extended_stats);
52/****************************************************************************
53 * ROUTINES TO COMPUTE SELECTIVITIES
54 ****************************************************************************/
57 * clauselist_selectivity -
58 * Compute the selectivity of an implicitly-ANDed list of boolean
59 * expression clauses. The list can be empty, in which case 1.0
60 * must be returned. List elements may be either RestrictInfos
61 * or bare expression clauses --- the former is preferred since
62 * it allows caching of results.
64 * See clause_selectivity() for the meaning of the additional parameters.
66 * The basic approach is to apply extended statistics first, on as many
67 * clauses as possible, in order to capture cross-column dependencies etc.
68 * The remaining clauses are then estimated by taking the product of their
69 * selectivities, but that's only right if they have independent
70 * probabilities, and in reality they are often NOT independent even if they
71 * only refer to a single column. So, we want to be smarter where we can.
73 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74 * are recognized as possible range query components if they are restriction
75 * opclauses whose operators have scalarltsel or a related function as their
76 * restriction selectivity estimator. We pair up clauses of this form that
77 * refer to the same variable. An unpairable clause of this kind is simply
78 * multiplied into the selectivity product in the normal way. But when we
79 * find a pair, we know that the selectivities represent the relative
80 * positions of the low and high bounds within the column's range, so instead
81 * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82 * losel - 1. (To visualize this, see that hisel is the fraction of the range
83 * below the high bound, while losel is the fraction above the low bound; so
84 * hisel can be interpreted directly as a 0..1 value but we need to convert
85 * losel to 1-losel before interpreting it as a value. Then the available
86 * range is 1-losel to hisel. However, this calculation double-excludes
87 * nulls, so really we need hisel + losel + null_frac - 1.)
89 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91 * yields an impossible (negative) result.
93 * A free side-effect is that we can recognize redundant inequalities such
94 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
96 * Of course this is all very dependent on the behavior of the inequality
97 * selectivity functions; perhaps some day we can generalize the approach.
107 jointype, sjinfo,
true);
111 * clauselist_selectivity_ext -
112 * Extended version of clauselist_selectivity(). If "use_extended_stats"
113 * is false, all extended statistics will be ignored, and only per-column
114 * statistics will be used.
122 bool use_extended_stats)
132 * If there's exactly one clause, just go directly to
133 * clause_selectivity_ext(). None of what we might do below is relevant.
137 varRelid, jointype, sjinfo,
141 * Determine if these clauses reference a single relation. If so, and if
142 * it has extended statistics, try to apply those.
148 * Estimate as many clauses as possible using extended statistics.
150 * 'estimatedclauses' is populated with the 0-based list position
151 * index of clauses estimated here, and that should be ignored below.
154 jointype, sjinfo, rel,
155 &estimatedclauses,
false);
159 * Apply normal selectivity estimates for remaining clauses. We'll be
160 * careful to skip any clauses which were already estimated above.
162 * Anything that doesn't look like a potential rangequery clause gets
163 * multiplied into s1 and forgotten. Anything that does gets inserted into
176 * Skip this clause if it's already been estimated by some other
182 /* Compute the selectivity of this clause in isolation */
187 * Check for being passed a RestrictInfo.
189 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190 * 0.0; just use that rather than looking for range pairs.
195 if (rinfo->pseudoconstant)
206 * See if it looks like a restriction clause with a pseudoconstant on
207 * one side. (Anything more complicated than that might not behave in
208 * the simple way we are expecting.) Most of the tests here can be
209 * done more efficiently with rinfo than without.
214 bool varonleft =
true;
219 ok = (rinfo->num_base_rels == 1) &&
221 rinfo->right_relids) ||
224 rinfo->left_relids)));
237 * If it's not a "<"/"<="/">"/">=" operator, just merge the
238 * selectivity in generically. But if it's the right oprrest,
239 * add the clause to rqlist for later processing.
246 varonleft,
true,
s2);
251 varonleft,
false,
s2);
254 /* Just merge the selectivity in generically */
258 continue;
/* drop to loop bottom */
262 /* Not the right form, so treat it generically. */
267 * Now scan the rangequery pair list.
269 while (rqlist != NULL)
275 /* Successfully matched a pair of range clauses */
279 * Exact equality to the default value probably means the
280 * selectivity function punted. This is not airtight but should
292 /* Adjust for double-exclusion of NULLs */
294 varRelid, jointype, sjinfo);
297 * A zero or slightly negative s2 should be converted into a
298 * small positive value; we probably are dealing with a very
299 * tight range and got a bogus result due to roundoff errors.
300 * However, if s2 is very negative, then we probably have
301 * default selectivity estimates on one or both sides of the
302 * range that we failed to recognize above for some reason.
309 * No data available --- use a default estimate that
310 * is small, but not real small.
317 * It's just roundoff error; use a small positive
324 /* Merge in the selectivity of the pair of clauses */
329 /* Only found one of a pair, merge it in generically */
335 /* release storage and advance */
336 rqnext = rqlist->
next;
345 * clauselist_selectivity_or -
346 * Compute the selectivity of an implicitly-ORed list of boolean
347 * expression clauses. The list can be empty, in which case 0.0
348 * must be returned. List elements may be either RestrictInfos
349 * or bare expression clauses --- the former is preferred since
350 * it allows caching of results.
352 * See clause_selectivity() for the meaning of the additional parameters.
354 * The basic approach is to apply extended statistics first, on as many
355 * clauses as possible, in order to capture cross-column dependencies etc.
356 * The remaining clauses are then estimated as if they were independent.
364 bool use_extended_stats)
373 * Determine if these clauses reference a single relation. If so, and if
374 * it has extended statistics, try to apply those.
380 * Estimate as many clauses as possible using extended statistics.
382 * 'estimatedclauses' is populated with the 0-based list position
383 * index of clauses estimated here, and that should be ignored below.
386 jointype, sjinfo, rel,
387 &estimatedclauses,
true);
391 * Estimate the remaining clauses as if they were independent.
393 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394 * for the probable overlap of selected tuple sets.
396 * XXX is this too conservative?
406 * Skip this clause if it's already been estimated by some other
413 jointype, sjinfo, use_extended_stats);
422 * addRangeClause --- add a new range clause for clauselist_selectivity
424 * Here is where we try to match up pairs of range-query clauses
437 is_lobound = !isLTsel;
/* x < something is high bound */
442 is_lobound = isLTsel;
/* something < x is low bound */
445 for (rqelem = *rqlist; rqelem; rqelem = rqelem->
next)
448 * We use full equal() here because the "var" might be a function of
449 * one or more attributes of the same relation...
453 /* Found the right group to put this clause in */
465 * We have found two similar clauses, such as
467 * Keep only the more restrictive one.
485 * We have found two similar clauses, such as
487 * Keep only the more restrictive one.
497 /* No matching var found, so make a new clause-pair data structure */
512 rqelem->
next = *rqlist;
517 * find_single_rel_for_clauses
518 * Examine each clause in 'clauses' and determine if all clauses
519 * reference only a single relation. If so return that relation,
520 * otherwise return NULL.
534 * If we have a list of bare clauses rather than RestrictInfos, we
535 * could pull out their relids the hard way with pull_varnos().
536 * However, currently the extended-stats machinery won't do anything
537 * with non-RestrictInfo clauses anyway, so there's no point in
538 * spending extra cycles; just fail if that's what we have.
540 * An exception to that rule is if we have a bare BoolExpr AND clause.
541 * We treat this as a special case because the restrictinfo machinery
542 * doesn't build RestrictInfos on top of AND clauses.
554 lastrelid = rel->
relid;
555 else if (rel->
relid != lastrelid)
565 continue;
/* we can ignore variable-free clauses */
567 return NULL;
/* multiple relations in this clause */
569 lastrelid = relid;
/* first clause referencing a relation */
570 else if (relid != lastrelid)
571 return NULL;
/* relation not same as last one */
577 return NULL;
/* no clauses */
581 * treat_as_join_clause -
582 * Decide whether an operator clause is to be handled by the
583 * restriction or join estimator. Subroutine for clause_selectivity().
592 * Caller is forcing restriction mode (eg, because we are examining an
593 * inner indexscan qual).
597 else if (sjinfo == NULL)
600 * It must be a restriction clause, since it's being evaluated at a
608 * Otherwise, it's a join if there's more than one base relation used.
609 * We can optimize this calculation if an rinfo was passed.
611 * XXX Since we know the clause is being evaluated at a join, the
612 * only way it could be single-relation is if it was delayed by outer
613 * joins. We intentionally count only baserels here, not OJs that
614 * might be present in rinfo->clause_relids, so that we direct such
615 * cases to the restriction qual estimators not join estimators.
616 * Eventually some notice should be taken of the possibility of
617 * injected nulls, but we'll likely want to do that in the restriction
618 * estimators rather than starting to treat such cases as join quals.
621 return (rinfo->num_base_rels > 1);
629 * clause_selectivity -
630 * Compute the selectivity of a general boolean expression clause.
632 * The clause can be either a RestrictInfo or a plain expression. If it's
633 * a RestrictInfo, we try to cache the selectivity for possible re-use,
634 * so passing RestrictInfos is preferred.
636 * varRelid is either 0 or a rangetable index.
638 * When varRelid is not 0, only variables belonging to that relation are
639 * considered in computing selectivity; other vars are treated as constants
640 * of unknown values. This is appropriate for estimating the selectivity of
641 * a join clause that is being used as a restriction clause in a scan of a
642 * nestloop join's inner relation --- varRelid should then be the ID of the
645 * When varRelid is 0, all variables are treated as variables. This
646 * is appropriate for ordinary join clauses and restriction clauses.
648 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649 * if the clause isn't a join clause.
651 * sjinfo is NULL for a non-join clause, otherwise it provides additional
652 * context information about the join being performed. There are some
654 * 1. For a special (not INNER) join, sjinfo is always a member of
655 * root->join_info_list.
656 * 2. For an INNER join, sjinfo is just a transient struct, and only the
657 * relids and jointype fields in it can be trusted.
658 * It is possible for jointype to be different from sjinfo->jointype.
659 * This indicates we are considering a variant join: either with
660 * the LHS and RHS switched, or with one input unique-ified.
662 * Note: when passing nonzero varRelid, it's normally appropriate to set
663 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664 * join clause; because we aren't treating it as a join clause.
674 jointype, sjinfo,
true);
678 * clause_selectivity_ext -
679 * Extended version of clause_selectivity(). If "use_extended_stats" is
680 * false, all extended statistics will be ignored, and only per-column
681 * statistics will be used.
689 bool use_extended_stats)
691 Selectivity s1 = 0.5;
/* default for any unhandled clause type */
693 bool cacheable =
false;
695 if (clause == NULL)
/* can this still happen? */
703 * If the clause is marked pseudoconstant, then it will be used as a
704 * gating qual and should not affect selectivity estimates; hence
705 * return 1.0. The only exception is that a constant FALSE may be
706 * taken as having selectivity 0.0, since it will surely mean no rows
707 * out of the plan. This case is simple enough that we need not
708 * bother caching the result.
710 if (rinfo->pseudoconstant)
717 * If possible, cache the result of the selectivity calculation for
718 * the clause. We can cache if varRelid is zero or the clause
719 * contains only vars of that relid --- otherwise varRelid will affect
720 * the result, so mustn't cache. Outer join quals might be examined
721 * with either their join's actual jointype or JOIN_INNER, so we need
722 * two cache variables to remember both cases. Note: we assume the
723 * result won't change if we are switching the input relations or
724 * considering a unique-ified case, so we only need one cache variable
725 * for all non-JOIN_INNER cases.
728 rinfo->num_base_rels == 0 ||
729 (rinfo->num_base_rels == 1 &&
732 /* Cacheable --- do we already have the result? */
735 if (rinfo->norm_selec >= 0)
736 return rinfo->norm_selec;
740 if (rinfo->outer_selec >= 0)
741 return rinfo->outer_selec;
747 * Proceed with examination of contained clause. If the clause is an
748 * OR-clause, we want to look at the variant with sub-RestrictInfos,
749 * so that per-subclause selectivities can be cached.
752 clause = (
Node *) rinfo->orclause;
762 * We probably shouldn't ever see an uplevel Var here, but if we do,
763 * return the default selectivity...
765 if (
var->varlevelsup == 0 &&
766 (varRelid == 0 || varRelid == (
int)
var->varno))
768 /* Use the restriction selectivity function for a bool Var */
774 /* bool constant is pretty easy... */
777 s1 = con->constisnull ? 0.0 :
782 /* see if we can replace the Param */
787 /* bool constant is pretty easy... */
790 s1 = con->constisnull ? 0.0 :
795 /* XXX any way to do better than default? */
800 /* inverse of the selectivity of the underlying clause */
810 /* share code with clauselist_selectivity() */
821 * Almost the same thing as clauselist_selectivity, but with the
822 * clauses connected by OR.
838 /* Estimate selectivity for a join clause. */
841 opclause->inputcollid,
847 /* Estimate selectivity for a restriction clause. */
850 opclause->inputcollid,
855 * DistinctExpr has the same representation as OpExpr, but the
856 * contained operator is "=" not "<>", so we must negate the result.
857 * This estimation method doesn't give the right behavior for nulls,
858 * but it's better than doing nothing.
867 /* Try to get an estimate from the support function, if any */
871 funcclause->inputcollid,
878 /* If no support, fall back on boolvarsel */
884 /* Use node specific selectivity calculation function */
895 /* Use node specific selectivity calculation function */
904 /* Use node specific selectivity calculation function */
906 ((
NullTest *) clause)->nulltesttype,
914 /* Use node specific selectivity calculation function */
924 /* CURRENT OF selects at most one row of its table */
933 /* Not sure this case is needed, but it can't hurt */
943 /* Not sure this case is needed, but it can't hurt */
954 * For anything else, see if we can consider it as a boolean variable.
955 * This only works if it's an immutable expression in Vars of a single
956 * relation; but there's no point in us checking that here because
957 * boolvarsel() will do it internally, and return a suitable default
958 * selectivity if not.
963 /* Cache the result if possible */
967 rinfo->norm_selec =
s1;
969 rinfo->outer_selec =
s1;
972#ifdef SELECTIVITY_DEBUG
974#endif /* SELECTIVITY_DEBUG */
bool bms_is_member(int x, const Bitmapset *a)
bool bms_get_singleton_member(const Bitmapset *a, int *member)
int NumRelids(PlannerInfo *root, Node *clause)
bool is_pseudo_constant_clause(Node *clause)
Node * estimate_expression_value(PlannerInfo *root, Node *node)
bool is_pseudo_constant_clause_relids(Node *clause, Relids relids)
static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
static RelOptInfo * find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
Selectivity clause_selectivity_ext(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
struct RangeQueryClause RangeQueryClause
static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
static Selectivity clauselist_selectivity_or(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
bool equal(const void *a, const void *b)
Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses, bool is_or)
RegProcedure get_oprrest(Oid opno)
void pfree(void *pointer)
static bool is_andclause(const void *clause)
static bool is_orclause(const void *clause)
static Node * get_rightop(const void *clause)
static bool is_opclause(const void *clause)
static bool is_funcclause(const void *clause)
static bool is_notclause(const void *clause)
static Expr * get_notclausearg(const void *notclause)
static Node * get_leftop(const void *clause)
#define IsA(nodeptr, _type_)
static int list_length(const List *l)
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Selectivity function_selectivity(PlannerInfo *root, Oid funcid, List *args, Oid inputcollid, bool is_join, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
static bool DatumGetBool(Datum X)
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
#define DEFAULT_RANGE_INEQ_SEL
struct RangeQueryClause * next