From 97930cf578e28c01f67fe4006ffcdbb5aedf18c2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: 2011年8月31日 16:04:48 -0400 Subject: [PATCH] Improve eqjoinsel's ndistinct clamping to work for multiple levels of join. This patch fixes an oversight in my commit 7f3eba30c9d622d1981b1368f2d79ba0999cdff2 of 2008年10月23日. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant. --- src/backend/utils/adt/selfuncs.c | 81 ++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e0658265ecb..18bbfe9b8d7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -142,9 +142,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype); static double eqjoinsel_inner(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2); + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2); static double eqjoinsel_semi(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2); + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -173,6 +175,7 @@ static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max); +static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); @@ -2008,24 +2011,47 @@ eqjoinsel(PG_FUNCTION_ARGS) VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; + RelOptInfo *rel1; + RelOptInfo *rel2; get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); + /* + * Identify the join's direct input relations. We use the min lefthand + * and min righthand as the inputs, even though the join might actually + * get done with larger input relations. The min inputs are guaranteed to + * have been formed by now, though, and always using them ensures + * consistency of estimates. + */ + if (!join_is_reversed) + { + rel1 = find_join_input_rel(root, sjinfo->min_lefthand); + rel2 = find_join_input_rel(root, sjinfo->min_righthand); + } + else + { + rel1 = find_join_input_rel(root, sjinfo->min_righthand); + rel2 = find_join_input_rel(root, sjinfo->min_lefthand); + } + switch (sjinfo->jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_FULL: - selec = eqjoinsel_inner(operator, &vardata1, &vardata2); + selec = eqjoinsel_inner(operator, &vardata1, &vardata2, + rel1, rel2); break; case JOIN_SEMI: case JOIN_ANTI: if (!join_is_reversed) - selec = eqjoinsel_semi(operator, &vardata1, &vardata2); + selec = eqjoinsel_semi(operator, &vardata1, &vardata2, + rel1, rel2); else selec = eqjoinsel_semi(get_commutator(operator), - &vardata2, &vardata1); + &vardata2, &vardata1, + rel2, rel1); break; default: /* other values not expected here */ @@ -2051,7 +2077,8 @@ eqjoinsel(PG_FUNCTION_ARGS) */ static double eqjoinsel_inner(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2) + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2) { double selec; double nd1; @@ -2252,15 +2279,19 @@ eqjoinsel_inner(Oid operator, * be, providing a crude correction for the selectivity of restriction * clauses on those relations. (We don't do that in the other path * since there we are comparing the nd values to stats for the whole - * relations.) + * relations.) We can apply this clamp both with respect to the base + * relations from which the join variables come, and to the immediate + * input relations of the current join. */ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; if (vardata1->rel) nd1 = Min(nd1, vardata1->rel->rows); + nd1 = Min(nd1, rel1->rows); if (vardata2->rel) nd2 = Min(nd2, vardata2->rel->rows); + nd2 = Min(nd2, rel2->rows); selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); if (nd1> nd2) @@ -2287,7 +2318,8 @@ eqjoinsel_inner(Oid operator, */ static double eqjoinsel_semi(Oid operator, - VariableStatData *vardata1, VariableStatData *vardata2) + VariableStatData *vardata1, VariableStatData *vardata2, + RelOptInfo *rel1, RelOptInfo *rel2) { double selec; double nd1; @@ -2435,8 +2467,10 @@ eqjoinsel_semi(Oid operator, { if (vardata1->rel) nd1 = Min(nd1, vardata1->rel->rows); + nd1 = Min(nd1, rel1->rows); if (vardata2->rel) nd2 = Min(nd2, vardata2->rel->rows); + nd2 = Min(nd2, rel2->rows); if (nd1 <= nd2 || nd2 <= 0) selec = 1.0 - nullfrac1; @@ -4759,6 +4793,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, return have_data; } +/* + * find_join_input_rel + * Look up the input relation for a join. + * + * We assume that the input relation's RelOptInfo must have been constructed + * already. + */ +static RelOptInfo * +find_join_input_rel(PlannerInfo *root, Relids relids) +{ + RelOptInfo *rel = NULL; + + switch (bms_membership(relids)) + { + case BMS_EMPTY_SET: + /* should not happen */ + break; + case BMS_SINGLETON: + rel = find_base_rel(root, bms_singleton_member(relids)); + break; + case BMS_MULTIPLE: + rel = find_join_rel(root, relids); + break; + } + + if (rel == NULL) + elog(ERROR, "could not find RelOptInfo for given relids"); + + return rel; +} + /*------------------------------------------------------------------------- * -- 2.39.5

AltStyle によって変換されたページ (->オリジナル) /