git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff
? search:
summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 0e754ab)
Be more wary of missing statistics in eqjoinsel_semi().
2011年4月12日 05:59:42 +0000 (01:59 -0400)
2011年4月12日 05:59:42 +0000 (01:59 -0400)
In particular, if we don't have real ndistinct estimates for both sides,
fall back to assuming that half of the left-hand rows have join partners.
This is what was done in 8.2 and 8.3 (cf nulltestsel() in those versions).
It's pretty stupid but it won't lead us to think that an antijoin produces
no rows out, as seen in recent example from Uwe Schroeder.


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5c7e8325a198e3b9b638edf66962c84ad0efce4c..bbc344f16bd80f0812966c606771d5bb60039892 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2329,7 +2329,9 @@ eqjoinsel_semi(Oid operator,
bool *hasmatch1;
bool *hasmatch2;
double nullfrac1 = stats1->stanullfrac;
- double matchfreq1;
+ double matchfreq1,
+ uncertainfrac,
+ uncertain;
int i,
nmatches;
@@ -2382,18 +2384,26 @@ eqjoinsel_semi(Oid operator,
* the uncertain rows that a fraction nd2/nd1 have join partners. We
* can discount the known-matched MCVs from the distinct-values counts
* before doing the division.
+ *
+ * Crude as the above is, it's completely useless if we don't have
+ * reliable ndistinct values for both sides. Hence, if either nd1
+ * or nd2 is default, punt and assume half of the uncertain rows
+ * have join partners.
*/
- nd1 -= nmatches;
- nd2 -= nmatches;
- if (nd1 <= nd2 || nd2 <= 0)
- selec = Max(matchfreq1, 1.0 - nullfrac1);
- else
+ if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
{
- double uncertain = 1.0 - matchfreq1 - nullfrac1;
-
- CLAMP_PROBABILITY(uncertain);
- selec = matchfreq1 + (nd2 / nd1) * uncertain;
+ nd1 -= nmatches;
+ nd2 -= nmatches;
+ if (nd1 <= nd2 || nd2 <= 0)
+ uncertainfrac = 1.0;
+ else
+ uncertainfrac = nd2 / nd1;
}
+ else
+ uncertainfrac = 0.5;
+ uncertain = 1.0 - matchfreq1 - nullfrac1;
+ CLAMP_PROBABILITY(uncertain);
+ selec = matchfreq1 + uncertainfrac * uncertain;
}
else
{
@@ -2403,15 +2413,20 @@ eqjoinsel_semi(Oid operator,
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
- if (vardata1->rel)
- nd1 = Min(nd1, vardata1->rel->rows);
- if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
+ if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ {
+ if (vardata1->rel)
+ nd1 = Min(nd1, vardata1->rel->rows);
+ if (vardata2->rel)
+ nd2 = Min(nd2, vardata2->rel->rows);
- if (nd1 <= nd2 || nd2 <= 0)
- selec = 1.0 - nullfrac1;
+ if (nd1 <= nd2 || nd2 <= 0)
+ selec = 1.0 - nullfrac1;
+ else
+ selec = (nd2 / nd1) * (1.0 - nullfrac1);
+ }
else
- selec = (nd2 / nd1) * (1.0 - nullfrac1);
+ selec = 0.5 * (1.0 - nullfrac1);
}
if (have_mcvs1)
This is the main PostgreSQL git repository.
RSS Atom

AltStyle によって変換されたページ (->オリジナル) /