1/*-------------------------------------------------------------------------
4 * Functions for exporting info about a regex's NFA
6 * In this implementation, the NFA defines a necessary but not sufficient
7 * condition for a string to match the regex: that is, there can be strings
8 * that match the NFA but don't match the full regex, but not vice versa.
9 * Thus, for example, it is okay for the functions below to treat lookaround
10 * constraints as no-ops, since they merely constrain the string some more.
12 * Notice that these functions return info into caller-provided arrays
13 * rather than doing their own malloc's. This simplifies the APIs by
14 * eliminating a class of error conditions, and in the case of colors
15 * allows the caller to decide how big is too big to bother with.
18 * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
19 * Portions Copyright (c) 1998, 1999 Henry Spencer
22 * src/backend/regex/regexport.c
24 *-------------------------------------------------------------------------
33 * Get total number of NFA states.
41 cnfa = &((
struct guts *) regex->re_guts)->search;
47 * Get initial state of NFA.
55 cnfa = &((
struct guts *) regex->re_guts)->search;
61 * Get final state of NFA.
69 cnfa = &((
struct guts *) regex->re_guts)->search;
75 * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
76 * arcs from the caller, treating any LACON as being automatically satisfied.
77 * Since the output representation does not support arcs that consume no
78 * character when traversed, we have to recursively traverse LACON arcs here,
79 * and report whatever normal arcs are reachable by traversing LACON arcs.
80 * Note that this wouldn't work if it were possible to reach the final state
81 * via LACON traversal, but the regex library never builds NFAs that have
82 * LACON arcs leading directly to the final state. (This is because the
83 * regex executor is designed to consume one character beyond the nominal
84 * match end --- possibly an EOS indicator --- so there is always a set of
85 * ordinary arcs leading to the final state.)
87 * traverse_lacons is a recursive subroutine used by both exported functions
88 * to count and then emit the reachable regular arcs. *arcs_count is
89 * incremented by the number of reachable arcs, and as many as will fit in
90 * arcs_len (possibly 0) are emitted into arcs[].
100 * Since this function recurses, it could theoretically be driven to stack
101 * overflow. In practice, this is mostly useful to backstop against a
102 * failure of the regex compiler to remove a loop of LACON arcs.
110 /* Ordinary arc, so count and possibly emit it */
111 int ndx = (*arcs_count)++;
115 arcs[ndx].
co = ca->
co;
116 arcs[ndx].
to = ca->
to;
121 /* LACON arc --- assume it's satisfied and recurse... */
122 /* ... but first, assert it doesn't lead directly to post state */
131 * Get number of outgoing NFA arcs of state number "st".
140 cnfa = &((
struct guts *) regex->re_guts)->search;
150 * Write array of outgoing NFA arcs of state number "st" into arcs[],
151 * whose length arcs_len must be at least as long as indicated by
152 * pg_reg_getnumoutarcs(), else not all arcs will be returned.
162 cnfa = &((
struct guts *) regex->re_guts)->search;
164 if (st < 0 || st >=
cnfa->
nstates || arcs_len <= 0)
171 * Get total number of colors.
179 cm = &((
struct guts *) regex->re_guts)->cmap;
185 * Check if color is beginning of line/string.
187 * (We might at some point need to offer more refined handling of pseudocolors,
188 * but this will do for now.)
196 cnfa = &((
struct guts *) regex->re_guts)->search;
205 * Check if color is end of line/string.
213 cnfa = &((
struct guts *) regex->re_guts)->search;
222 * Get number of member chrs of color number "co".
224 * Note: we return -1 if the color number is invalid, or if it is a special
225 * color (WHITE, RAINBOW, or a pseudocolor), or if the number of members is
227 * Callers should not try to extract the members if -1 is returned.
235 cm = &((
struct guts *) regex->re_guts)->cmap;
237 if (co <= 0 || co > cm->
max)
/* <= 0 rejects WHITE and RAINBOW */
239 if (cm->
cd[co].
flags &
PSEUDO)
/* also pseudocolors (BOS etc) */
243 * If the color appears anywhere in the high colormap, treat its number of
244 * members as uncertain. In principle we could determine all the specific
245 * chrs corresponding to each such entry, but it would be expensive
246 * (particularly if character class tests are required) and it doesn't
252 /* OK, return the known number of member chrs */
257 * Write array of member chrs of color number "co" into chars[],
258 * whose length chars_len must be at least as long as indicated by
259 * pg_reg_getnumcharacters(), else not all chars will be returned.
261 * Fetching the members of WHITE, RAINBOW, or a pseudocolor is not supported.
263 * Caution: this is a relatively expensive operation.
273 cm = &((
struct guts *) regex->re_guts)->cmap;
275 if (co <= 0 || co > cm->
max || chars_len <= 0)
281 * We need only examine the low character map; there should not be any
282 * matching entries in the high map.
289 if (--chars_len == 0)
Assert(PointerIsAligned(start, uint64))
int pg_reg_getnumoutarcs(const regex_t *regex, int st)
int pg_reg_getnumcolors(const regex_t *regex)
int pg_reg_getnumstates(const regex_t *regex)
int pg_reg_getfinalstate(const regex_t *regex)
int pg_reg_getinitialstate(const regex_t *regex)
void pg_reg_getoutarcs(const regex_t *regex, int st, regex_arc_t *arcs, int arcs_len)
int pg_reg_colorisend(const regex_t *regex, int co)
static void traverse_lacons(struct cnfa *cnfa, int st, int *arcs_count, regex_arc_t *arcs, int arcs_len)
void pg_reg_getcharacters(const regex_t *regex, int co, pg_wchar *chars, int chars_len)
int pg_reg_getnumcharacters(const regex_t *regex, int co)
int pg_reg_colorisbegin(const regex_t *regex, int co)
void check_stack_depth(void)
static char chars[TZ_MAX_CHARS]