1/*-------------------------------------------------------------------------
4 * ctype functions adapted to work on pg_wchar (a/k/a chr),
5 * and functions to cache the results of wholesale ctype probing.
7 * This file is #included by regcomp.c; it's not meant to compile standalone.
9 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
13 * src/backend/regex/regc_pg_locale.c
15 *-------------------------------------------------------------------------
31 * Hard-wired character properties for C locale
33 #define PG_ISDIGIT 0x01
34 #define PG_ISALPHA 0x02
35 #define PG_ISALNUM (PG_ISDIGIT | PG_ISALPHA)
36 #define PG_ISUPPER 0x04
37 #define PG_ISLOWER 0x08
38 #define PG_ISGRAPH 0x10
39 #define PG_ISPRINT 0x20
40 #define PG_ISPUNCT 0x40
41 #define PG_ISSPACE 0x80
176 * pg_set_regex_collation: set collation for these functions to obey
178 * This is called when beginning compilation or execution of a regexp.
179 * Since there's no need for reentrancy of regexp operations, it's okay
180 * to store the results in static variables.
190 * This typically means that the parser could not resolve a conflict
191 * of implicit collations, so report it that way.
194 (
errcode(ERRCODE_INDETERMINATE_COLLATION),
195 errmsg(
"could not determine which collation to use for regular expression"),
196 errhint(
"Use the COLLATE clause to set the collation explicitly.")));
199 if (collation == C_COLLATION_OID)
202 * Some callers expect regexes to work for C_COLLATION_OID before
203 * catalog access is available, so we can't call
204 * pg_newlocale_from_collation().
212 if (!
locale->deterministic)
214 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
215 errmsg(
"nondeterministic collations are not supported for regular expressions")));
220 * C/POSIX collations use this path regardless of database
263 /* We define word characters as alnum class plus underscore */
357 * These functions cache the results of probing libc's ctype behavior for
358 * all character codes of interest in a given encoding/collation. The
359 * result is provided as a "struct cvec", but notice that the representation
360 * is a touch different from a cvec created by regc_cvec.c: we allocate the
361 * chrs[] and ranges[] arrays separately from the struct so that we can
362 * realloc them larger at need. This is okay since the cvecs made here
363 * should never be freed by freecvec().
365 * We use malloc not palloc since we mustn't lose control on out-of-memory;
366 * the main regex code expects us to return a failure indication instead.
375 struct cvec cv;
/* cache entry contents */
382 * Add a chr or range to pcc->cv; return false if run out of memory
422 * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
423 * chrs satisfying the probe function. The active collation is the one
424 * previously set by pg_set_regex_collation. Return NULL if out of memory.
426 * Note that the result must not be freed or modified by caller.
438 * Do we already have the answer cached?
448 * Nope, so initialize some workspace ...
466 * Decide how many character codes we ought to look through. In general
467 * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
468 * runtime using the "high colormap" mechanism. However, in C locale
469 * there's no need to go further than 127, and if we only have a 1-byte
470 * <ctype.h> API there's no need to go further than that can handle.
472 * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
473 * output cvec as not having any locale-dependent behavior, since there
474 * will be no need to do any run-time locale checks. (The #if's here
475 * would always be true for production values of MAX_SIMPLE_CHR, but it's
476 * useful to allow it to be small for testing purposes.)
480#if MAX_SIMPLE_CHR >= 127
502 nmatches = 0;
/* number of consecutive matches */
504 for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
506 if ((*probefunc) (cur_chr))
508 else if (nmatches > 0)
510 if (!
store_match(pcc, cur_chr - nmatches, nmatches))
517 if (!
store_match(pcc, cur_chr - nmatches, nmatches))
521 * We might have allocated more memory than needed, if so free it
555 * Success, link it into cache chain
#define OidIsValid(objectId)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
pg_locale_t pg_newlocale_from_collation(Oid collid)
unsigned char pg_ascii_tolower(unsigned char ch)
unsigned char pg_ascii_toupper(unsigned char ch)
static int pg_wc_islower(pg_wchar c)
static int pg_wc_isword(pg_wchar c)
static int pg_wc_isspace(pg_wchar c)
static pg_wchar pg_wc_tolower(pg_wchar c)
static int pg_wc_ispunct(pg_wchar c)
static pg_ctype_cache * pg_ctype_cache_list
static int pg_wc_isgraph(pg_wchar c)
static pg_wchar pg_wc_toupper(pg_wchar c)
static bool store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
static int pg_wc_isprint(pg_wchar c)
static int pg_wc_isalnum(pg_wchar c)
int(* pg_wc_probefunc)(pg_wchar c)
static int pg_wc_isdigit(pg_wchar c)
static struct pg_locale_struct dummy_c_locale
void pg_set_regex_collation(Oid collation)
static struct cvec * pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
static pg_locale_t pg_regex_locale
static int pg_wc_isupper(pg_wchar c)
static int pg_wc_isalpha(pg_wchar c)
static const unsigned char pg_char_properties[128]
struct pg_ctype_cache pg_ctype_cache
pg_wchar(* wc_toupper)(pg_wchar wc, pg_locale_t locale)
bool(* wc_ispunct)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isprint)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isalpha)(pg_wchar wc, pg_locale_t locale)
pg_wchar(* wc_tolower)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isupper)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isspace)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isgraph)(pg_wchar wc, pg_locale_t locale)
bool(* wc_islower)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isalnum)(pg_wchar wc, pg_locale_t locale)
bool(* wc_isdigit)(pg_wchar wc, pg_locale_t locale)
pg_wc_probefunc probefunc
struct pg_ctype_cache * next
const struct ctype_methods * ctype