1/*-------------------------------------------------------------------------
4 * Planner support functions for LIKE, regex, and related operators.
6 * These routines handle special optimization of operators that can be
7 * used with index scans even though they are not known to the executor's
8 * indexscan machinery. The key idea is that these operators allow us
9 * to derive approximate indexscan qual clauses, such that any tuples
10 * that pass the operator clause itself must also satisfy the simpler
11 * indexscan condition(s). Then we can use the indexscan machinery
12 * to avoid scanning as much of the table as we'd otherwise have to,
13 * while applying the original operator as a qpqual condition to ensure
14 * we deliver only the tuples we want. (In essence, we're using a regular
15 * index as if it were a lossy index.)
17 * An example of what we're doing is
18 * textfield LIKE 'abc%def'
19 * from which we can generate the indexscanable conditions
20 * textfield >= 'abc' AND textfield < 'abd'
21 * which allow efficient scanning of an index on textfield.
22 * (In reality, character set and collation issues make the transformation
23 * from LIKE to indexscan limits rather harder than one might think ...
24 * but that's the basic idea.)
26 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/backend/utils/adt/like_support.c
33 *-------------------------------------------------------------------------
98 bool case_insensitive);
100 bool case_insensitive,
101 int fixed_prefix_len);
112 * Planner support functions for LIKE, regex, and related operators
154/* Common code for the above */
163 * Make a selectivity estimate for a function call, just as we'd do if
164 * the call was via the corresponding operator.
172 * For the moment we just punt. If patternjoinsel is ever
173 * improved to do better, this should be made to call it.
179 /* Share code with operator restriction selectivity functions */
194 /* Try to convert operator/function call to index conditions */
198 * Currently we have no "reverse" match operators with the pattern on
199 * the left, so we only need consider cases with the indexkey on the
237 * match_pattern_prefix
238 * Try to generate an indexqual for a LIKE or regex operator.
258 bool collation_aware;
264 * Can't do anything with a non-constant or NULL pattern argument.
266 * Note that since we restrict ourselves to cases with a hard constant on
267 * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
268 * about verifying that.
271 ((
Const *) rightop)->constisnull)
273 patt = (
Const *) rightop;
276 * Try to extract a fixed prefix from the pattern.
281 /* fail if no fixed prefix */
286 * Identify the operators we want to use, based on the type of the
287 * left-hand argument. Usually these are just the type's regular
288 * comparison operators, but if we are considering one of the semi-legacy
289 * "pattern" opclasses, use the "pattern" operators instead. Those are
290 * not collation-sensitive but always use C collation, as we want. The
291 * selected operators also determine the needed type of the prefix
298 if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
300 eqopr = TextEqualOperator;
301 ltopr = TextPatternLessOperator;
302 geopr = TextPatternGreaterEqualOperator;
303 collation_aware =
false;
305 else if (opfamily == TEXT_SPGIST_FAM_OID)
307 eqopr = TextEqualOperator;
308 ltopr = TextPatternLessOperator;
309 geopr = TextPatternGreaterEqualOperator;
310 /* This opfamily has direct support for prefixing */
311 preopr = TextPrefixOperator;
312 collation_aware =
false;
316 eqopr = TextEqualOperator;
317 ltopr = TextLessOperator;
318 geopr = TextGreaterEqualOperator;
319 collation_aware =
true;
326 * Note that here, we need the RHS type to be text, so that the
327 * comparison value isn't improperly truncated to NAMEDATALEN.
329 eqopr = NameEqualTextOperator;
330 ltopr = NameLessTextOperator;
331 geopr = NameGreaterEqualTextOperator;
332 collation_aware =
true;
336 if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
338 eqopr = BpcharEqualOperator;
339 ltopr = BpcharPatternLessOperator;
340 geopr = BpcharPatternGreaterEqualOperator;
341 collation_aware =
false;
345 eqopr = BpcharEqualOperator;
346 ltopr = BpcharLessOperator;
347 geopr = BpcharGreaterEqualOperator;
348 collation_aware =
true;
350 rdatatype = BPCHAROID;
353 eqopr = ByteaEqualOperator;
354 ltopr = ByteaLessOperator;
355 geopr = ByteaGreaterEqualOperator;
356 collation_aware =
false;
357 rdatatype = BYTEAOID;
360 /* Can't get here unless we're attached to the wrong operator */
365 * If necessary, coerce the prefix constant to the right type. The given
366 * prefix constant is either text or bytea type, therefore the only case
367 * where we need to do anything is when converting text to bpchar. Those
368 * two types are binary-compatible, so relabeling the Const node is
374 rdatatype == BPCHAROID);
379 * If we found an exact-match pattern, generate an "=" indexqual.
381 * Here and below, check to see whether the desired operator is actually
382 * supported by the index opclass, and fail quietly if not. This allows
383 * us to not be concerned with specific opclasses (except for the legacy
384 * "pattern" cases); any index that correctly implements the operators
391 if (indexcollation != expr_coll)
401 * Anything other than Pattern_Prefix_Exact is not supported if the
402 * expression collation is nondeterministic. The optimized equality or
403 * prefix tests use bytewise comparisons, which is not consistent with
404 * nondeterministic collations.
406 * expr_coll is not set for a non-collation-aware data type such as bytea.
412 * Otherwise, we have a nonempty required prefix of the values. Some
413 * opclasses support prefix checks directly, otherwise we'll try to
414 * generate a range constraint.
426 * Since we need a range constraint, it's only going to work reliably if
427 * the index is collation-insensitive or has "C" collation. Note that
428 * here we are looking at the index's collation, not the expression's
429 * collation -- this test is *not* dependent on the LIKE/regex operator's
432 if (collation_aware &&
437 * We can always say "x >= prefix".
447 * If we can create a string larger than the prefix, we can say
448 * "x < greaterstr". NB: we rely on make_greater_string() to generate
449 * a guaranteed-greater string, not just a probably-greater string.
450 * In general this is only guaranteed in C locale, so we'd better be
451 * using a C-locale index collation.
461 (
Expr *) leftop, (
Expr *) greaterstr,
463 result =
lappend(result, expr);
471 * patternsel_common - generic code for pattern-match restriction selectivity.
473 * To support using this from either the operator or function paths, caller
474 * may pass either operator OID or underlying function OID; we look up the
475 * latter from the former if needed. (We could just have patternsel() call
476 * get_opcode(), but the work would be wasted if we don't have a need to
477 * compare a fixed prefix to the pg_statistic data.)
479 * Note that oprid and/or opfuncid should be for the positive-match operator
480 * even when negate is true.
504 Const *prefix = NULL;
506 double nullfrac = 0.0;
510 * Initialize result to the appropriate default estimate depending on
511 * whether it's a match or not-match operator.
519 * If expression is not variable op constant, then punt and return the
523 &vardata, &other, &varonleft))
525 if (!varonleft || !
IsA(other,
Const))
532 * If the constant is NULL, assume operator is strict and return zero, ie,
533 * operator will never return TRUE. (It's zero even for a negator op.)
535 if (((
Const *) other)->constisnull)
540 constval = ((
Const *) other)->constvalue;
541 consttype = ((
Const *) other)->consttype;
544 * The right-hand const is type text or bytea for all supported operators.
545 * We do not expect to see binary-compatible types here, since
546 * const-folding should have relabeled the const to exactly match the
547 * operator's declared type.
549 if (consttype != TEXTOID && consttype != BYTEAOID)
556 * Similarly, the exposed type of the left-hand side should be one of
557 * those we know. (Do not look at vardata.atttype, which might be
558 * something binary-compatible but different.) We can use it to identify
559 * the comparison operators and the required type of the comparison
560 * constant, much as in match_pattern_prefix().
567 eqopr = TextEqualOperator;
568 ltopr = TextLessOperator;
569 geopr = TextGreaterEqualOperator;
575 * Note that here, we need the RHS type to be text, so that the
576 * comparison value isn't improperly truncated to NAMEDATALEN.
578 eqopr = NameEqualTextOperator;
579 ltopr = NameLessTextOperator;
580 geopr = NameGreaterEqualTextOperator;
584 eqopr = BpcharEqualOperator;
585 ltopr = BpcharLessOperator;
586 geopr = BpcharGreaterEqualOperator;
587 rdatatype = BPCHAROID;
590 eqopr = ByteaEqualOperator;
591 ltopr = ByteaLessOperator;
592 geopr = ByteaGreaterEqualOperator;
593 rdatatype = BYTEAOID;
596 /* Can't get here unless we're attached to the wrong operator */
602 * Grab the nullfrac for use below.
609 nullfrac = stats->stanullfrac;
613 * Pull out any fixed prefix implied by the pattern, and estimate the
614 * fractional selectivity of the remainder of the pattern. Unlike many
615 * other selectivity estimators, we use the pattern operator's actual
616 * collation for this step. This is not because we expect the collation
617 * to make a big difference in the selectivity estimate (it seldom would),
618 * but because we want to be sure we cache compiled regexps under the
619 * right cache key, so that they can be re-used at runtime.
621 patt = (
Const *) other;
623 &prefix, &rest_selec);
626 * If necessary, coerce the prefix constant to the right type. The only
627 * case where we need to do anything is when converting text to bpchar.
628 * Those two types are binary-compatible, so relabeling the Const node is
631 if (prefix && prefix->
consttype != rdatatype)
634 rdatatype == BPCHAROID);
641 * Pattern specifies an exact match, so estimate as for '='
643 result =
var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
649 * Not exact-match pattern. If we have a sufficiently large
650 * histogram, estimate selectivity for the histogram part of the
651 * population by counting matches in the histogram. If not, estimate
652 * selectivity of the fixed prefix and remainder of pattern
653 * separately, then combine the two to get an estimate of the
654 * selectivity for the part of the column population represented by
655 * the histogram. (For small histograms, we combine these
658 * We then add up data for any most-common-values values; these are
659 * not in the histogram population, and we can get exact answers for
660 * them by applying the pattern operator, so there's no reason to
661 * approximate. (If the MCVs cover a significant part of the total
662 * population, this gives us a big leg up in accuracy.)
670 /* Try to use the histogram entries to get selectivity */
679 /* If not at least 100 entries, use the heuristic method */
694 if (selec < 0)
/* fewer than 10 histogram entries? */
699 * For histogram sizes from 10 to 100, we combine the
700 * histogram and heuristic selectivities, putting increasingly
701 * more trust in the histogram for larger sizes.
703 double hist_weight = hist_size / 100.0;
705 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
709 /* In any case, don't believe extremely small or large estimates. */
712 else if (selec > 0.9999)
716 * If we have most-common-values info, add up the fractions of the MCV
717 * entries that satisfy MCV OP PATTERN. These fractions contribute
718 * directly to the result selectivity. Also add up the total fraction
719 * represented by MCV entries.
726 * Now merge the results from the MCV and histogram calculations,
727 * realizing that the histogram covers only the non-null values that
728 * are not listed in MCV.
730 selec *= 1.0 - nullfrac - sumcommon;
735 /* now adjust if we wanted not-match rather than match */
737 result = 1.0 - result - nullfrac;
739 /* result should be in range, but make sure... */
754 * Fix impedance mismatch between SQL-callable functions and patternsel_common
766 * If this is for a NOT LIKE or similar operator, get the corresponding
767 * positive-match operator and work with that.
773 elog(
ERROR,
"patternsel called for operator without a negator");
787 * regexeqsel - Selectivity of regular-expression pattern match.
796 * icregexeqsel - Selectivity of case-insensitive regex match.
805 * likesel - Selectivity of LIKE pattern match.
814 * prefixsel - selectivity of prefix operator
824 * iclikesel - Selectivity of ILIKE pattern match.
833 * regexnesel - Selectivity of regular-expression pattern non-match.
842 * icregexnesel - Selectivity of case-insensitive regex non-match.
851 * nlikesel - Selectivity of LIKE pattern non-match.
860 * icnlikesel - Selectivity of ILIKE pattern non-match.
869 * patternjoinsel - Generic code for pattern-match join selectivity.
874 /* For the moment we just punt. */
879 * regexeqjoinsel - Join selectivity of regular-expression pattern match.
888 * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
897 * likejoinsel - Join selectivity of LIKE pattern match.
906 * prefixjoinsel - Join selectivity of prefix operator
915 * iclikejoinsel - Join selectivity of ILIKE pattern match.
924 * regexnejoinsel - Join selectivity of regex non-match.
933 * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
942 * nlikejoinsel - Join selectivity of LIKE pattern non-match.
951 * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
960/*-------------------------------------------------------------------------
962 * Pattern analysis functions
964 * These routines support analysis of LIKE and regular-expression patterns
965 * by the planner/optimizer. It's important that they agree with the
966 * regular-expression code in backend/regex/ and the LIKE code in
967 * backend/utils/adt/like.c. Also, the computation of the fixed prefix
968 * must be conservative: if we report a string longer than the true fixed
969 * prefix, the query may produce actually wrong answers, rather than just
970 * getting a bad selectivity estimate!
972 *-------------------------------------------------------------------------
976 * Extract the fixed prefix, if any, for a pattern.
978 * *prefix is set to a palloc'd prefix string (in the form of a Const node),
979 * or to NULL if no fixed prefix exists for the pattern.
980 * If rest_selec is not NULL, *rest_selec is set to an estimate of the
981 * selectivity of the remainder of the pattern (without any fixed prefix).
982 * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
984 * The return value distinguishes no fixed prefix, a partial prefix,
985 * or an exact-match-only pattern.
1001 /* the right-hand const is type text or bytea */
1002 Assert(
typeid == BYTEAOID ||
typeid == TEXTOID);
1004 if (case_insensitive)
1006 if (
typeid == BYTEAOID)
1008 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1009 errmsg(
"case insensitive matching not supported on type bytea")));
1014 * This typically means that the parser could not resolve a
1015 * conflict of implicit collations, so report it that way.
1018 (
errcode(ERRCODE_INDETERMINATE_COLLATION),
1019 errmsg(
"could not determine which collation to use for ILIKE"),
1020 errhint(
"Use the COLLATE clause to set the collation explicitly.")));
1026 if (
typeid != BYTEAOID)
1029 pattlen = strlen(patt);
1036 patt = (
char *)
palloc(pattlen);
1041 match =
palloc(pattlen + 1);
1043 for (pos = 0; pos < pattlen; pos++)
1045 /* % and _ are wildcard characters in LIKE */
1046 if (patt[pos] ==
'%' ||
1050 /* Backslash escapes the next character */
1051 if (patt[pos] ==
'\\')
1058 /* Stop if case-varying character (it's sort of a wildcard) */
1059 if (case_insensitive &&
1063 match[match_pos++] = patt[pos];
1066 match[match_pos] =
'0円';
1068 if (
typeid != BYTEAOID)
1073 if (rest_selec != NULL)
1080 /* in LIKE, an empty pattern is an exact match! */
1099 * Should be unnecessary, there are no bytea regex operators defined. As
1100 * such, it should be noted that the rest of this function has *not* been
1101 * made safe for binary (possibly NULL containing) strings.
1103 if (
typeid == BYTEAOID)
1105 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1106 errmsg(
"regular-expression matching not supported on type bytea")));
1108 /* Use the regexp machinery to extract the prefix, if any */
1110 case_insensitive, collation,
1115 *prefix_const = NULL;
1117 if (rest_selec != NULL)
1132 if (rest_selec != NULL)
1136 /* Exact match, so there's no additional selectivity */
1168 prefix, rest_selec);
1172 prefix, rest_selec);
1176 prefix, rest_selec);
1180 prefix, rest_selec);
1183 /* Prefix type work is trivial. */
1194 if (rest_selec != NULL)
1195 *rest_selec = 1.0;
/* all */
1198 elog(
ERROR,
"unrecognized ptype: %d", (
int) ptype);
1206 * Estimate the selectivity of a fixed prefix for a pattern match.
1208 * A fixed prefix "foo" is estimated as the selectivity of the expression
1209 * "variable >= 'foo' AND variable < 'fop'".
1211 * The selectivity estimate is with respect to the portion of the column
1212 * population represented by the histogram --- the caller must fold this
1213 * together with info about MCVs and NULLs.
1215 * We use the given comparison operators and collation to do the estimation.
1216 * The given variable and Const must be of the associated datatype(s).
1218 * XXX Note: we make use of the upper bound to estimate operator selectivity
1219 * even if the locale is such that we cannot rely on the upper-bound string.
1220 * The selectivity only needs to be approximately right anyway, so it seems
1221 * more useful to use the upper-bound code than not.
1231 Const *greaterstrcon;
1234 /* Estimate the selectivity of "x >= prefix" */
1238 geopr, &opproc,
true,
true,
1240 prefixcon->constvalue,
1245 /* No histogram is present ... return a suitable default estimate */
1250 * If we can create a string larger than the prefix, say "x < greaterstr".
1259 ltopr, &opproc,
false,
false,
1261 greaterstrcon->constvalue,
1264 /* ineq_histogram_selectivity worked before, it shouldn't fail now */
1268 * Merge the two selectivities in the same way as for a range query
1269 * (see clauselist_selectivity()). Note that we don't need to worry
1270 * about double-exclusion of nulls, since ineq_histogram_selectivity
1271 * doesn't count those anyway.
1277 * If the prefix is long then the two bounding values might be too close
1278 * together for the histogram to distinguish them usefully, resulting in a
1279 * zero estimate (plus or minus roundoff error). To avoid returning a
1280 * ridiculously small estimate, compute the estimated selectivity for
1281 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
1282 * estimate should be at least that.)
1284 * We apply this even if we couldn't make a greater string. That case
1285 * suggests that the prefix is near the maximum possible, and thus
1286 * probably off the end of the histogram, and thus we probably got a very
1287 * small estimate from the >= condition; so we still need to clamp.
1289 eq_sel =
var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
1290 false,
true,
false);
1299 * Estimate the selectivity of a pattern of the specified type.
1300 * Note that any fixed prefix of the pattern will have been removed already,
1301 * so actually we may be looking at just a fragment of the pattern.
1303 * For now, we use a very simplistic approach: fixed characters reduce the
1304 * selectivity a good deal, character ranges reduce it a little,
1305 * wildcards (such as % for LIKE or .* for regex) increase it.
1308 #define FIXED_CHAR_SEL 0.20 /* about 1/5 */
1309 #define CHAR_RANGE_SEL 0.25
1310 #define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
1311 #define FULL_WILDCARD_SEL 5.0
1312 #define PARTIAL_WILDCARD_SEL 2.0
1320 /* Skip any leading wildcard; it's already factored into initial sel */
1321 for (pos = 0; pos < pattlen; pos++)
1323 if (patt[pos] !=
'%' && patt[pos] !=
'_')
1327 for (; pos < pattlen; pos++)
1329 /* % and _ are wildcard characters in LIKE */
1330 if (patt[pos] ==
'%')
1332 else if (patt[pos] ==
'_')
1334 else if (patt[pos] ==
'\\')
1336 /* Backslash quotes the next character */
1345 /* Could get sel > 1 if multiple wildcards */
1355 int paren_depth = 0;
1356 int paren_pos = 0;
/* dummy init to keep compiler quiet */
1359 /* since this function recurses, it could be driven to stack overflow */
1362 for (pos = 0; pos < pattlen; pos++)
1364 if (patt[pos] ==
'(')
1366 if (paren_depth == 0)
1367 paren_pos = pos;
/* remember start of parenthesized item */
1370 else if (patt[pos] ==
')' && paren_depth > 0)
1373 if (paren_depth == 0)
1375 pos - (paren_pos + 1),
1378 else if (patt[pos] ==
'|' && paren_depth == 0)
1381 * If unquoted | is present at paren level 0 in pattern, we have
1382 * multiple alternatives; sum their probabilities.
1385 pattlen - (pos + 1),
1387 break;
/* rest of pattern is now processed */
1389 else if (patt[pos] ==
'[')
1391 bool negclass =
false;
1393 if (patt[++pos] ==
'^')
1398 if (patt[pos] ==
']')
/* ']' at start of class is not special */
1400 while (pos < pattlen && patt[pos] !=
']')
1402 if (paren_depth == 0)
1405 else if (patt[pos] ==
'.')
1407 if (paren_depth == 0)
1410 else if (patt[pos] ==
'*' ||
1414 /* Ought to be smarter about quantifiers... */
1415 if (paren_depth == 0)
1418 else if (patt[pos] ==
'{')
1420 while (pos < pattlen && patt[pos] !=
'}')
1422 if (paren_depth == 0)
1425 else if (patt[pos] ==
'\\')
1427 /* backslash quotes the next character */
1431 if (paren_depth == 0)
1436 if (paren_depth == 0)
1440 /* Could get sel > 1 if multiple wildcards */
1448 int fixed_prefix_len)
1452 /* If patt doesn't end with ,ドル consider it to have a trailing wildcard */
1453 if (pattlen > 0 && patt[pattlen - 1] ==
'$' &&
1454 (pattlen == 1 || patt[pattlen - 2] !=
'\\'))
1456 /* has trailing $ */
1467 * If there's a fixed prefix, discount its selectivity. We have to be
1468 * careful here since a very long prefix could result in pow's result
1469 * underflowing to zero (in which case "sel" probably has as well).
1471 if (fixed_prefix_len > 0)
1479 /* Make sure result stays in range */
1485 * Check whether char is a letter (and, hence, subject to case-folding)
1487 * In multibyte character sets or with ICU, we can't use isalpha, and it does
1488 * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1489 * Instead, just assume any non-ASCII char is potentially case-varying, and
1490 * hard-wire knowledge of which ASCII chars are letters.
1497 return (
c >=
'A' &&
c <=
'Z') || (
c >=
'a' &&
c <=
'z');
1504 * For bytea, the increment function need only increment the current byte
1505 * (there are no multibyte characters to worry about).
1517 * Try to generate a string greater than the given string or any
1518 * string it is a prefix of. If successful, return a palloc'd string
1519 * in the form of a Const node; else return NULL.
1521 * The caller must provide the appropriate "less than" comparison function
1522 * for testing the strings, along with the collation to use.
1524 * The key requirement here is that given a prefix string, say "foo",
1525 * we must be able to generate another string "fop" that is greater than
1526 * all strings "foobar" starting with "foo". We can test that we have
1527 * generated a string greater than the prefix string, but in non-C collations
1528 * that is not a bulletproof guarantee that an extension of the string might
1529 * not sort after it; an example is that "foo " is less than "foo!", but it
1530 * is not clear that a "dictionary" sort ordering will consider "foo!" less
1531 * than "foo bar". CAUTION: Therefore, this function should be used only for
1532 * estimation purposes when working in a non-C collation.
1534 * To try to catch most cases where an extended string might otherwise sort
1535 * before the result value, we determine which of the strings "Z", "z", "y",
1536 * and "9" is seen as largest by the collation, and append that to the given
1537 * prefix before trying to find a string that compares as larger.
1539 * To search for a greater string, we repeatedly "increment" the rightmost
1540 * character, using an encoding-specific character incrementer function.
1541 * When it's no longer possible to increment the last character, we truncate
1542 * off that character and start incrementing the next-to-rightmost.
1543 * For example, if "z" were the last character in the sort order, then we
1544 * could produce "foo" as a string greater than "fonz".
1546 * This could be rather slow in the worst case, but in most cases we
1547 * won't have to try more than one or two strings before succeeding.
1549 * Note that it's important for the character incrementer not to be too anal
1550 * about producing every possible character code, since in some cases the only
1551 * way to get a larger string is to increment a previous character position.
1552 * So we don't want to spend too much time trying every possible character
1553 * code at the last position. A good rule of thumb is to be sure that we
1554 * don't try more than 256*K values for a K-byte character (and definitely
1555 * not 256^K, which is what an exhaustive search would approach).
1564 char *cmptxt = NULL;
1568 * Get a modifiable copy of the prefix string in C-string format, and set
1569 * up the string we will compare to as a Datum. In C locale this can just
1570 * be the given prefix string, otherwise we need to add a suffix. Type
1571 * BYTEA sorts bytewise so it never needs a suffix either.
1573 if (datatype == BYTEAOID)
1581 cmpstr = str_const->constvalue;
1585 if (datatype == NAMEOID)
1587 str_const->constvalue));
1590 len = strlen(workstr);
1592 cmpstr = str_const->constvalue;
1595 /* If first time through, determine the suffix to use */
1596 static char suffixchar = 0;
1597 static Oid suffixcollation = 0;
1599 if (!suffixchar || suffixcollation != collation)
1604 if (
varstr_cmp(best, 1,
"z", 1, collation) < 0)
1606 if (
varstr_cmp(best, 1,
"y", 1, collation) < 0)
1608 if (
varstr_cmp(best, 1,
"9", 1, collation) < 0)
1611 suffixcollation = collation;
1614 /* And build the string to compare to */
1615 if (datatype == NAMEOID)
1618 memcpy(cmptxt, workstr,
len);
1619 cmptxt[
len] = suffixchar;
1620 cmptxt[
len + 1] =
'0円';
1634 /* Select appropriate character-incrementer function */
1635 if (datatype == BYTEAOID)
1640 /* And search ... */
1644 unsigned char *lastchar;
1646 /* Identify the last character --- for bytea, just the last byte */
1647 if (datatype == BYTEAOID)
1651 lastchar = (
unsigned char *) (workstr +
len - charlen);
1654 * Try to generate a larger string by incrementing the last character
1655 * (for BYTEA, we treat each byte as a character).
1657 * Note: the incrementer function is expected to return true if it's
1658 * generated a valid-per-the-encoding new character, otherwise false.
1659 * The contents of the character on false return are unspecified.
1661 while (charinc(lastchar, charlen))
1663 Const *workstr_const;
1665 if (datatype == BYTEAOID)
1673 workstr_const->constvalue)))
1675 /* Successfully made a string larger than cmpstr */
1679 return workstr_const;
1682 /* No good, release unusable value and try again */
1684 pfree(workstr_const);
1688 * No luck here, so truncate off the last character and try to
1689 * increment the next one.
1692 workstr[
len] =
'0円';
1704 * Generate a Datum of the appropriate type from a C string.
1705 * Note that all of the supported types are pass-by-ref, so the
1706 * returned value should be pfree'd if no longer needed.
1714 * We cheat a little by assuming that CStringGetTextDatum() will do for
1715 * bpchar and varchar constants too...
1717 if (datatype == NAMEOID)
1719 else if (datatype == BYTEAOID)
1726 * Generate a Const node of the appropriate type from a C string.
1736 * We only need to support a few datatypes here, so hard-wire properties
1737 * instead of incurring the expense of catalog lookups.
1744 collation = DEFAULT_COLLATION_OID;
1749 collation = C_COLLATION_OID;
1759 elog(
ERROR,
"unexpected datatype in string_to_const: %u",
1764 return makeConst(datatype, -1, collation, constlen,
1765 conval,
false,
false);
1769 * Generate a Const node of bytea type from a binary C string and a length.
#define CStringGetTextDatum(s)
#define TextDatumGetCString(d)
Datum byteain(PG_FUNCTION_ARGS)
#define OidIsValid(objectId)
Datum datumCopy(Datum value, bool typByVal, int typLen)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
void fmgr_info(Oid functionId, FmgrInfo *finfo)
#define DatumGetByteaPP(X)
#define PG_RETURN_FLOAT8(x)
#define DatumGetTextPP(X)
#define PG_GETARG_POINTER(n)
#define DirectFunctionCall1(func, arg1)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
#define PG_GET_COLLATION()
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
static void * GETSTRUCT(const HeapTupleData *tuple)
Datum icregexnesel(PG_FUNCTION_ARGS)
Datum regexnesel(PG_FUNCTION_ARGS)
static Node * like_regex_support(Node *rawreq, Pattern_Type ptype)
Datum iclikesel(PG_FUNCTION_ARGS)
Datum texticregexeq_support(PG_FUNCTION_ARGS)
static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid eqopr, Oid ltopr, Oid geopr, Oid collation, Const *prefixcon)
#define FULL_WILDCARD_SEL
Datum iclikejoinsel(PG_FUNCTION_ARGS)
Datum prefixjoinsel(PG_FUNCTION_ARGS)
static double patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Datum regexeqsel(PG_FUNCTION_ARGS)
static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
Datum likejoinsel(PG_FUNCTION_ARGS)
static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive)
Datum icregexnejoinsel(PG_FUNCTION_ARGS)
static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
static List * match_pattern_prefix(Node *leftop, Node *rightop, Pattern_Type ptype, Oid expr_coll, Oid opfamily, Oid indexcollation)
Datum nlikejoinsel(PG_FUNCTION_ARGS)
static Datum string_to_datum(const char *str, Oid datatype)
Datum icnlikejoinsel(PG_FUNCTION_ARGS)
static Selectivity regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
Datum texticlike_support(PG_FUNCTION_ARGS)
Datum nlikesel(PG_FUNCTION_ARGS)
static int pattern_char_isalpha(char c, bool is_multibyte, pg_locale_t locale)
static Const * string_to_const(const char *str, Oid datatype)
#define PARTIAL_WILDCARD_SEL
Datum text_starts_with_support(PG_FUNCTION_ARGS)
static Const * string_to_bytea_const(const char *str, size_t str_len)
static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
static Const * make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
Datum icregexeqsel(PG_FUNCTION_ARGS)
Datum textlike_support(PG_FUNCTION_ARGS)
Datum regexnejoinsel(PG_FUNCTION_ARGS)
static bool byte_increment(unsigned char *ptr, int len)
static double patternsel_common(PlannerInfo *root, Oid oprid, Oid opfuncid, List *args, int varRelid, Oid collation, Pattern_Type ptype, bool negate)
static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
Datum icnlikesel(PG_FUNCTION_ARGS)
Datum textregexeq_support(PG_FUNCTION_ARGS)
Datum prefixsel(PG_FUNCTION_ARGS)
static double patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Datum likesel(PG_FUNCTION_ARGS)
Datum regexeqjoinsel(PG_FUNCTION_ARGS)
Datum icregexeqjoinsel(PG_FUNCTION_ARGS)
List * lappend(List *list, void *datum)
RegProcedure get_opcode(Oid opno)
bool get_collation_isdeterministic(Oid colloid)
bool op_in_opfamily(Oid opno, Oid opfamily)
Oid get_negator(Oid opno)
Expr * make_opclause(Oid opno, Oid opresulttype, bool opretset, Expr *leftop, Expr *rightop, Oid opcollid, Oid inputcollid)
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
mbcharacter_incrementer pg_database_encoding_character_incrementer(void)
int pg_mbcliplen(const char *mbstr, int len, int limit)
int pg_database_encoding_max_length(void)
void pfree(void *pointer)
Datum nameout(PG_FUNCTION_ARGS)
Datum namein(PG_FUNCTION_ARGS)
Oid exprType(const Node *expr)
static bool is_opclause(const void *clause)
static bool is_funcclause(const void *clause)
#define IsA(nodeptr, _type_)
static int list_length(const List *l)
pg_locale_t pg_newlocale_from_collation(Oid collid)
bool char_is_cased(char ch, pg_locale_t locale)
FormData_pg_statistic * Form_pg_statistic
bool(* mbcharacter_incrementer)(unsigned char *mbstr, int len)
static bool DatumGetBool(Datum X)
static Datum PointerGetDatum(const void *X)
static char * DatumGetCString(Datum X)
static Pointer DatumGetPointer(Datum X)
static Datum CStringGetDatum(const char *X)
char * regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, bool *exact)
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
#define ReleaseVariableStats(vardata)
#define CLAMP_PROBABILITY(p)
#define DEFAULT_MATCH_SEL
void check_stack_depth(void)
static Size VARSIZE_ANY_EXHDR(const void *PTR)
static char * VARDATA(const void *PTR)
static char * VARDATA_ANY(const void *PTR)
static void SET_VARSIZE(void *PTR, Size len)
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)