2/*-------------------------------------------------------------------------
5 * lexical scanner for SQL commands
7 * This lexer used to be part of psql, and that heritage is reflected in
8 * the file name as well as function and typedef names, though it can now
9 * be used by other frontend programs as well. It's also possible to extend
10 * this lexer with a compatible add-on lexer to handle program-specific
13 * This code is mainly concerned with determining where the end of a SQL
14 * statement is: we are looking for semicolons that are not within quotes,
15 * comments, or parentheses. The most reliable way to handle this is to
16 * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 * below are (except for a few) the same as the backend's, but their actions
18 * are just ECHO whereas the backend's actions generally do other things.
20 * XXX The rules in this file must be kept in sync with the backend lexer!!!
22 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
24 * See psqlscan_int.h for additional commentary.
27 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/fe_utils/psqlscan.l
33 *-------------------------------------------------------------------------
50 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 * doesn't presently make use of that argument, so just declare it as int.
56/* Return values from yylex() */
57 #define LEXRES_EOL 0 /* end of input */
58 #define LEXRES_SEMI 1 /* command-terminating semicolon found */
59 #define LEXRES_BACKSLASH 2 /* backslash command start */
62 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
69%option never-interactive
75%option prefix="psql_yy"
78 * Set the type of yyextra; we use it as a pointer back to the containing
81%option extra-type="PsqlScanState"
84 * All of the following definitions and rules should exactly match
85 * src/backend/parser/scan.l so far as the flex patterns are concerned.
86 * The rule bodies are just ECHO as opposed to what the backend does,
87 * however. (But be sure to duplicate code that affects the lexing process,
88 * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
89 * whereas scan.l has a separate one for each exclusive state.
93 * OK, here is a short description of lex/flex rules behavior.
94 * The longest pattern which matches an input string is always chosen.
95 * For equal-length patterns, the first occurring in the rules list is chosen.
96 * INITIAL is the starting state, to which all non-conditional rules apply.
97 * Exclusive states change parsing rules while the state is active. When in
98 * an exclusive state, only those rules defined for that state apply.
100 * We use exclusive states for quoted strings, extended comments,
101 * and to eliminate parsing troubles for numeric strings.
103 * <xb> bit string literal
104 * <xc> extended C-style comments
105 * <xd> delimited identifiers (double-quoted identifiers)
106 * <xh> hexadecimal byte string
107 * <xq> standard quoted strings
108 * <xqs> quote stop (detect continued strings)
109 * <xe> extended quoted strings (support backslash escape sequences)
110 * <xdolq> $foo$ quoted strings
111 * <xui> quoted identifier with Unicode escapes
112 * <xus> quoted string with Unicode escapes
114 * Note: we intentionally don't mimic the backend's <xeu> state; we have
115 * no need to distinguish it from <xe> state, and no good way to get out
116 * of it in error cases. The backend just throws yyerror() in those
117 * cases, but that's not an option here.
132 * In order to make the world safe for Windows and Mac clients as well as
133 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
134 * sequence will be seen as two successive newlines, but that doesn't cause
135 * any problems. Comments that start with -- and extend to the next
136 * newline are treated as equivalent to a single whitespace character.
138 * NOTE a fine point: if there is no newline following --, we will absorb
139 * everything to the end of the input as a comment. This is correct. Older
140 * versions of Postgres failed to recognize -- as a comment if the input
141 * did not end with a newline.
143 * non_newline_space tracks all space characters except newlines.
145 * XXX if you change the set of whitespace characters, fix scanner_isspace()
150non_newline_space [ \t\f\v]
154comment ("--"{non_newline}*)
156whitespace ({space}+|{comment})
159 * SQL requires at least one newline in the whitespace separating
160 * string literals that are to be concatenated. Silly, but who are we
161 * to argue? Note that {whitespace_with_newline} should not have * after
162 * it, whereas {whitespace} should generally have a * after it...
165special_whitespace ({space}+|{comment}{newline})
166non_newline_whitespace ({non_newline_space}|{comment})
167whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
170/* If we see {quote} then {quotecontinue}, the quoted string continues */
171quotecontinue {whitespace_with_newline}{quote}
174 * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
175 * {quotecontinue}. It might seem that this could just be {whitespace}*,
176 * but if there's a dash after {whitespace_with_newline}, it must be consumed
177 * to see if there's another dash --- which would start a {comment} and thus
178 * allow continuation of the {quotecontinue} token.
180quotecontinuefail {whitespace}*"-"?
183 * It is tempting to scan the string for only those characters
184 * which are allowed. However, this leads to silently swallowed
185 * characters if illegal characters are included in the string.
186 * For example, if xbinside is [01] then B'ABCD' is interpreted
187 * as a zero-length string, and the ABCD' is lost!
188 * Better to pass the string forward and let the input routines
189 * validate the contents.
194/* Hexadecimal byte string */
198/* National character */
201/* Quoted string that allows backslash escapes */
205xeoctesc [\\][0-7]{1,3}
206xehexesc [\\]x[0-9A-Fa-f]{1,2}
207xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
208xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
211 * xqdouble implements embedded quote, ''''
214xqdouble {quote}{quote}
217/* $foo$ style quotes ("dollar quoting")
218 * The quoted string starts with $foo$ where "foo" is an optional string
219 * in the form of an identifier, except that it may not contain "$",
220 * and extends to the first occurrence of an identical string.
221 * There is *no* processing of the quoted text.
223 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
224 * fails to match its trailing "$".
226dolq_start [A-Za-z200円-377円_]
227dolq_cont [A-Za-z200円-377円_0-9]
228dolqdelim \$({dolq_start}{dolq_cont}*)?\$
229dolqfailed \${dolq_start}{dolq_cont}*
233 * Allows embedded spaces and other special characters into identifiers.
238xddouble {dquote}{dquote}
241/* Quoted identifier with Unicode escapes */
242xuistart [uU]&{dquote}
244/* Quoted string with Unicode escapes */
247/* error rule to avoid backup */
253 * The "extended comment" syntax closely resembles allowable operator syntax.
254 * The tricky part here is to get lex to recognize a string starting with
255 * slash-star as a comment, when interpreting it as an operator would produce
256 * a longer match --- remember lex will prefer a longer match! Also, if we
257 * have something like plus-slash-star, lex will think this is a 3-character
258 * operator whereas we want to see it as a + operator and a comment start.
259 * The solution is two-fold:
260 * 1. append {op_chars}* to xcstart so that it matches as much text as
261 * {operator} would. Then the tie-breaker (first matching rule of same
262 * length) ensures xcstart wins. We put back the extra stuff with yyless()
263 * in case it contains a star-slash that should terminate the comment.
264 * 2. In the operator rule, check for slash-star within the operator, and
265 * if found throw it back with yyless(). This handles the plus-slash-star
267 * Dash-dash comments have similar interactions with the operator rule.
269xcstart \/\*{op_chars}*
273ident_start [A-Za-z200円-377円_]
274ident_cont [A-Za-z200円-377円_0-9\$]
276identifier {ident_start}{ident_cont}*
278/* Assorted special-case operators and operator-like tokens */
284 * These operator-like tokens (unlike the above ones) also match the {operator}
285 * rule, which means that they might be overridden by a longer match if they
286 * are followed by a comment start or a + or - character. Accordingly, if you
287 * add to this list, you must also add corresponding code to the {operator}
288 * block to return the correct token in such cases. (This is not needed in
289 * psqlscan.l since the token value is ignored there.)
298 * "self" is the set of chars that should be returned as single-character
299 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
300 * which can be one or more characters long (but if a single-char token
301 * appears in the "self" set, it is not to be returned as an Op). Note
302 * that the sets overlap, but each has some chars that are not in the other.
304 * If you change either set, adjust the character lists appearing in the
305 * rule for "operator"!
307self [,()\[\].;\:\+\-\*\/\%\^<>\=]
308op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%<>\=]
314 * Unary minus is not part of a number here. Instead we pass it separately to
315 * the parser, and there it gets coerced via doNegate().
317 * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
319 * {realfail} is added to prevent the need for scanner
320 * backup when the {real} rule fails to match completely.
327decinteger {decdigit}(_?{decdigit})*
328hexinteger 0[xX](_?{hexdigit})+
329octinteger 0[oO](_?{octdigit})+
330bininteger 0[bB](_?{bindigit})+
336numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
337numericfail {decinteger}\.\.
339real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
340realfail ({decinteger}|{numeric})[Ee][-+]
342/* Positional parameters don't accept underscores. */
346 * An identifier immediately following an integer literal is disallowed because
347 * in some cases it's ambiguous what is meant: for example, 0x1234 could be
348 * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
349 * detect such problems by seeing if integer_junk matches a longer substring
350 * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
351 * bininteger). One "junk" pattern is sufficient because
352 * {decinteger}{identifier} will match all the same strings we'd match with
353 * {hexinteger}{identifier} etc.
355 * Note that the rule for integer_junk must appear after the ones for
356 * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
357 * and integer_junk, and we need hexinteger to be chosen in that case.
359 * Also disallow strings matched by numeric_junk, real_junk and param_junk
362integer_junk {decinteger}{identifier}
363numeric_junk {numeric}{identifier}
364real_junk {real}{identifier}
365param_junk \${decdigit}+{identifier}
367/* psql-specific: characters allowed in variable names */
368variable_char [A-Za-z200円-377円_0-9]
373 * Dollar quoted strings are totally opaque, and no escaping is done on them.
374 * Other quoted strings must allow some special characters such as single-quote
376 * Embedded single-quotes are implemented both in the SQL standard
377 * style of two adjacent single quotes "''" and in the Postgres/Java style
378 * of escaped-quote "\'".
379 * Other embedded escaped characters are matched explicitly and the leading
380 * backslash is dropped from the string.
381 * Note that xcstart must appear before operator, as explained above!
382 * Also whitespace (comment) must appear before operator.
388 /* Declare some local variables inside yylex(), for convenience */
389 PsqlScanState cur_state =
yyextra;
390 PQExpBuffer output_buf = cur_state->output_buf;
393 * Force flex into the state indicated by start_state. This has a
394 * couple of purposes: it lets some of the functions below set a new
395 * starting state without ugly direct access to flex variables, and it
396 * allows us to transition from one flex lexer to another so that we
397 * can lex different parts of the source string using separate lexers.
399 BEGIN(cur_state->start_state);
404 * Note that the whitespace rule includes both true
405 * whitespace and single-line ("--" style) comments.
406 * We suppress whitespace until we have collected some
407 * non-whitespace data. (This interacts with some
408 * decisions in MainLoop(); see there for details.)
410 if (output_buf->len > 0)
415 cur_state->xcdepth = 0;
417 /* Put back any characters past slash-star; see above */
424 cur_state->xcdepth++;
425 /* Put back any characters past slash-star; see above */
431 if (cur_state->xcdepth <= 0)
434 cur_state->xcdepth--;
461 /* Hexadecimal bit type.
462 * At some point we should simply pass the string
463 * forward to the parser and label it there.
464 * In the meantime, place a leading "x" on the string
465 * to mark it for the input routine as a hex string.
472 yyless(1);
/* eat only 'n' this time */
477 if (cur_state->std_strings)
492<xb,xh,xq,xe,xus>{quote} {
494 * When we are scanning a quoted string and see an end
495 * quote, we must look ahead for a possible continuation.
496 * If we don't see one, we know the end quote was in fact
497 * the end of the string. To reduce the lexer table size,
498 * we use a single "xqs" state to do the lookahead for all
501 cur_state->state_before_str_stop = YYSTATE;
505<xqs>{quotecontinue} {
507 * Found a quote continuation, so return to the in-quote
508 * state and continue scanning the literal. Nothing is
509 * added to the literal's contents.
511 BEGIN(cur_state->state_before_str_stop);
514<xqs>{quotecontinuefail} |
517 * Failed to see a quote continuation. Throw back
518 * everything after the end quote, and handle the string
519 * according to the state we were in previously.
523 /* There's nothing to echo ... */
526<xq,xe,xus>{xqdouble} {
551 /* This is only needed for \ just before EOF */
556 cur_state->dolqstart =
pg_strdup(yytext);
char * pg_strdup(const char *in)
561 /* throw back all but the initial "$" */
566 if (strcmp(yytext, cur_state->dolqstart) == 0)
568 free(cur_state->dolqstart);
569 cur_state->dolqstart = NULL;
575 * When we fail to match $...$ to dolqstart, transfer
576 * the $... part to the output, but put back the final
577 * $ for rescanning. Consider $delim$...$junk$delim$
590 /* This is only needed for $ inside the quoted text */
618 /* throw back all but the initial u/U */
656 * These rules are specific to psql --- they implement parenthesis
657 * counting and detection of command-ending semicolon. These must
658 * appear before the {self} rule so that they take precedence over it.
662 cur_state->paren_depth++;
667 if (cur_state->paren_depth > 0)
668 cur_state->paren_depth--;
674 if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
676 /* Terminate lexing temporarily */
677 cur_state->start_state = YY_START;
678 cur_state->identifier_count = 0;
684 * psql-specific rules to handle backslash commands and variable
685 * substitution. We want these before {self}, also.
689 /* Force a semi-colon or colon into the query buffer */
691 if (yytext[1] ==
';')
692 cur_state->identifier_count = 0;
void psqlscan_emit(PsqlScanState state, const char *txt, int len)
696 /* Terminate lexing temporarily */
697 cur_state->start_state = YY_START;
702 /* Possible psql variable substitution */
709 if (cur_state->callbacks->get_variable)
710 value = cur_state->callbacks->get_variable(varname,
712 cur_state->cb_passthrough);
718 /* It is a variable, check for recursion */
721 /* Recursive expansion --- don't go there */
724 /* Instead copy the string as is */
729 /* OK, perform substitution */
731 /* yy_scan_string already made buffer active */
738 * if the variable doesn't exist we'll copy the string
#define pg_log_warning(...)
char * psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
void psqlscan_push_new_buffer(PsqlScanState state, const char *newstr, const char *varname)
bool psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
void psqlscan_escape_variable(PsqlScanState state, const char *txt, int len, PsqlScanQuoteType quote)
752:\"{variable_char}+\" {
757:\{\?{variable_char}+\} {
void psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
762 * These rules just avoid the need for scanner backup if one of the
763 * three rules above fails to match completely.
767 /* Throw back everything but the colon */
773 /* Throw back everything but the colon */
778:\{\?{variable_char}* {
779 /* Throw back everything but the colon */
784 /* Throw back everything but the colon */
790 * Back to backend-compatible rules.
799 * Check for embedded slash-star or dash-dash; those
800 * are comment starts, so operator must stop there.
801 * Note that slash-star or dash-dash at the first
802 * character will match a prior rule, not this one.
805 char *slashstar = strstr(yytext,
"/*");
806 char *dashdash = strstr(yytext,
"--");
808 if (slashstar && dashdash)
810 /* if both appear, take the first one */
811 if (slashstar > dashdash)
812 slashstar = dashdash;
815 slashstar = dashdash;
817 nchars = slashstar - yytext;
820 * For SQL compatibility, '+' and '-' cannot be the
821 * last char of a multi-char operator unless the operator
822 * contains chars that are not in SQL operators.
823 * The idea is to lex '=-' as two operators, but not
824 * to forbid operator names like '?-' that could not be
825 * sequences of SQL operators.
828 (yytext[nchars - 1] ==
'+' ||
829 yytext[nchars - 1] ==
'-'))
833 for (ic = nchars - 2; ic >= 0; ic--)
836 if (
c ==
'~' ||
c ==
'!' ||
c ==
'@' ||
837 c ==
'#' ||
c ==
'^' ||
c ==
'&' ||
838 c ==
'|' ||
c ==
'`' ||
c ==
'?' ||
845 * didn't find a qualifying character, so remove
850 }
while (nchars > 1 &&
851 (yytext[nchars - 1] ==
'+' ||
852 yytext[nchars - 1] ==
'-'));
858 /* Strip the unwanted chars from the token */
896 /* throw back the .., and treat as integer */
919 * We need to track if we are inside a BEGIN .. END block
920 * in a function definition, so that semicolons contained
921 * therein don't terminate the whole statement. Short of
922 * writing a full parser here, the following heuristic
923 * should work. First, we track whether the beginning of
924 * the statement matches CREATE [OR REPLACE]
925 * {FUNCTION|PROCEDURE}
928 if (cur_state->identifier_count == 0)
929 memset(cur_state->identifiers, 0,
sizeof(cur_state->identifiers));
937 if (cur_state->identifier_count <
sizeof(cur_state->identifiers))
938 cur_state->identifiers[cur_state->identifier_count] =
pg_tolower((
unsigned char) yytext[0]);
941 cur_state->identifier_count++;
943 if (cur_state->identifiers[0] ==
'c' &&
944 (cur_state->identifiers[1] ==
'f' || cur_state->identifiers[1] ==
'p' ||
945 (cur_state->identifiers[1] ==
'o' && cur_state->identifiers[2] ==
'r' &&
946 (cur_state->identifiers[3] ==
'f' || cur_state->identifiers[3] ==
'p'))) &&
947 cur_state->paren_depth == 0)
950 cur_state->begin_depth++;
954 * CASE also ends with END. We only need to track
955 * this if we are already inside a BEGIN.
957 if (cur_state->begin_depth >= 1)
958 cur_state->begin_depth++;
962 if (cur_state->begin_depth > 0)
963 cur_state->begin_depth--;
int pg_strcasecmp(const char *s1, const char *s2)
unsigned char pg_tolower(unsigned char ch)
975 if (cur_state->buffer_stack == NULL)
977 cur_state->start_state = YY_START;
982 * We were expanding a variable, so pop the inclusion
983 * stack and keep lexing
void psqlscan_select_top_buffer(PsqlScanState state)
void psqlscan_pop_buffer_stack(PsqlScanState state)
994 * Create a lexer working state struct.
996 * callbacks is a struct of function pointers that encapsulate some
997 * behavior we need from the surrounding program. This struct must
998 * remain valid for the lifespan of the PsqlScanState.
1007 state->callbacks = callbacks;
1009 yylex_init(&
state->scanner);
1019 * Destroy a lexer working state struct, releasing all resources.
1028 yylex_destroy(
state->scanner);
1034 * Set the callback passthrough pointer for the lexer.
1036 * This could have been integrated into psql_scan_create, but keeping it
1037 * separate allows the application to change the pointer later, which might
1043 state->cb_passthrough = passthrough;
1047 * Set up to perform lexing of the given input line.
1049 * The text at *line, extending for line_len bytes, will be scanned by
1050 * subsequent calls to the psql_scan routines. psql_scan_finish should
1051 * be called when scanning is complete. Note that the lexer retains
1052 * a pointer to the storage at *line --- this string must not be altered
1053 * or freed until after psql_scan_finish is called.
1055 * encoding is the libpq identifier for the character encoding in use,
1056 * and std_strings says whether standard_conforming_strings is on.
1060 const char *line,
int line_len,
1063 /* Mustn't be scanning already */
1067 /* Do we need to hack the character set encoding? */
1071 /* Save standard-strings flag as well */
1072 state->std_strings = std_strings;
1074 /* Set up flex input buffer with appropriate translation and padding */
1077 state->scanline = line;
1079 /* Set lookaside data in case we have to map unsafe encoding */
1083 /* Initialize state for psql_scan_get_location() */
1084 state->cur_line_no = 0;
/* yylex not called yet */
1089 * Do lexical analysis of SQL command text.
1091 * The text previously passed to psql_scan_setup is scanned, and appended
1092 * (possibly with transformation) to query_buf.
1094 * The return value indicates the condition that stopped scanning:
1096 * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1097 * transferred to query_buf.) The command accumulated in query_buf should
1098 * be executed, then clear query_buf and call again to scan the remainder
1101 * PSCAN_BACKSLASH: found a backslash that starts a special command.
1102 * Any previous data on the line has been transferred to query_buf.
1103 * The caller will typically next apply a separate flex lexer to scan
1104 * the special command.
1106 * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1107 * incomplete SQL command. *prompt is set to the appropriate prompt type.
1109 * PSCAN_EOL: the end of the line was reached, and there is no lexical
1110 * reason to consider the command incomplete. The caller may or may not
1111 * choose to send it. *prompt is set to the appropriate prompt type if
1112 * the caller chooses to collect more input.
1114 * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1115 * be called next, then the cycle may be repeated with a fresh input line.
1117 * In all cases, *prompt is set to an appropriate prompt type code for the
1118 * next line-input operation.
1128 /* Must be scanning already */
1131 /* Set current output target */
1132 state->output_buf = query_buf;
1134 /* Set input source */
1135 if (
state->buffer_stack != NULL)
1136 yy_switch_to_buffer(
state->buffer_stack->buf,
state->scanner);
1138 yy_switch_to_buffer(
state->scanbufhandle,
state->scanner);
1143 /* Notify psql_scan_get_location() that a yylex call has been made. */
1144 if (
state->cur_line_no == 0)
1145 state->cur_line_no = 1;
1148 * Check termination state and return appropriate result info.
1153 switch (
state->start_state)
1156 case xqs:
/* we treat this like INITIAL */
1157 if (
state->paren_depth > 0)
1162 else if (
state->begin_depth > 0)
1167 else if (query_buf->
len > 0)
1174 /* never bother to send an empty buffer */
1216 /* can't get here */
1217 fprintf(stderr,
"invalid YY_START\n");
1230 /* can't get here */
1231 fprintf(stderr,
"invalid yylex result\n");
1239 * Clean up after scanning a string. This flushes any unread input and
1240 * releases resources (but not the PsqlScanState itself). Note however
1241 * that this does not reset the lexer scan state; that can be done by
1242 * psql_scan_reset(), which is an orthogonal operation.
1244 * It is legal to call this when not scanning anything (makes it easier
1245 * to deal with error recovery).
1250 /* Drop any incomplete variable expansions. */
1251 while (
state->buffer_stack != NULL)
1254 /* Done with the outer scan buffer, too */
1255 if (
state->scanbufhandle)
1256 yy_delete_buffer(
state->scanbufhandle,
state->scanner);
1257 state->scanbufhandle = NULL;
1260 state->scanbuf = NULL;
1264 * Reset lexer scanning state to start conditions. This is appropriate
1265 * for executing \r psql commands (or any other time that we discard the
1266 * prior contents of query_buf). It is not, however, necessary to do this
1267 * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1268 * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1269 * conditions are returned.
1271 * Note that this is unrelated to flushing unread input; that task is
1272 * done by psql_scan_finish().
1277 state->start_state = INITIAL;
1278 state->paren_depth = 0;
1279 state->xcdepth = 0;
/* not really necessary */
1280 if (
state->dolqstart)
1282 state->dolqstart = NULL;
1283 state->identifier_count = 0;
1284 state->begin_depth = 0;
1288 * Reselect this lexer (psqlscan.l) after using another one.
1290 * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1291 * state, because we'd never switch to another lexer in a different state.
1292 * However, we don't want to reset e.g. paren_depth, so this can't be
1293 * the same as psql_scan_reset().
1295 * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1296 * must be a superset of this.
1298 * Note: it seems likely that other lexers could just assign INITIAL for
1299 * themselves, since that probably has the value zero in every flex-generated
1300 * lexer. But let's not assume that.
1305 state->start_state = INITIAL;
1309 * Return true if lexer is currently in an "inside quotes" state.
1311 * This is pretty grotty but is needed to preserve the old behavior
1312 * that mainloop.c drops blank lines not inside quotes without even
1318 return state->start_state != INITIAL &&
1319 state->start_state != xqs;
1323 * Return the current scanning location (end+1 of last scanned token),
1324 * as a line number counted from 1 and an offset from string start.
1326 * This considers only the outermost input string, and therefore is of
1327 * limited use for programs that use psqlscan_push_new_buffer().
1329 * It would be a bit easier probably to use "%option yylineno" to count
1330 * lines, but the flex manual says that has a performance cost, and only
1331 * a minority of programs using psqlscan have need for this functionality.
1332 * So we implement it ourselves without adding overhead to the lexer itself.
1336 int *lineno,
int *offset)
1338 const char *line_end;
1341 * We rely on flex's having stored a NUL after the current token in
1342 * scanbuf. Therefore we must specially handle the state before yylex()
1343 * has been called, when obviously that won't have happened yet.
1345 if (
state->cur_line_no == 0)
1353 * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
1354 * Doing this prevents repeated calls from being O(N^2) for long inputs.
1356 while ((line_end = strchr(
state->cur_line_ptr,
'\n')) != NULL)
1358 state->cur_line_no++;
1359 state->cur_line_ptr = line_end + 1;
1361 state->cur_line_ptr += strlen(
state->cur_line_ptr);
1363 /* Report current location. */
1364 *lineno =
state->cur_line_no;
1365 *offset =
state->cur_line_ptr -
state->scanbuf;
1369 * Push the given string onto the stack of stuff to scan.
1371 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1375 const char *varname)
1382 * In current usage, the passed varname points at the current flex input
1383 * buffer; we must copy it before calling psqlscan_prepare_buffer()
1384 * because that will change the buffer state.
1391 if (
state->safe_encoding)
1402 state->buffer_stack = stackelem;
1406 * Pop the topmost buffer stack item (there must be one!)
1408 * NB: after this, the flex input state is unspecified; caller must
1409 * switch to an appropriate buffer to continue lexing.
1410 * See psqlscan_select_top_buffer().
1418 yy_delete_buffer(stackelem->
buf,
state->scanner);
1428 * Select the topmost surviving buffer as the active input.
1435 if (stackelem != NULL)
1437 yy_switch_to_buffer(stackelem->
buf,
state->scanner);
1443 yy_switch_to_buffer(
state->scanbufhandle,
state->scanner);
1450 * Check if specified variable name is the source for any string
1451 * currently being scanned
1458 for (stackelem =
state->buffer_stack;
1460 stackelem = stackelem->
next)
1462 if (stackelem->
varname && strcmp(stackelem->
varname, varname) == 0)
1469 * Set up a flex input buffer to scan the given data. We always make a
1470 * copy of the data. If working in an unsafe encoding, the copy has
1471 * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1473 * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1481 /* Flex wants two 0円 characters after the actual data */
1484 newtxt[
len] = newtxt[
len + 1] = YY_END_OF_BUFFER_CHAR;
1486 if (
state->safe_encoding)
1487 memcpy(newtxt, txt,
len);
1490 /* Gotta do it the hard way */
1497 /* first byte should always be okay... */
1500 while (--thislen > 0 &&
i <
len)
1501 newtxt[
i++] = (char) 0xFF;
1505 return yy_scan_buffer(newtxt,
len + 2,
state->scanner);
1509 * psqlscan_emit() --- body for ECHO macro
1511 * NB: this must be used for ALL and ONLY the text copied from the flex
1512 * input data. If you pass it something that is not part of the yytext
1513 * string, you are making a mistake. Internally generated text can be
1514 * appended directly to state->output_buf.
1521 if (
state->safe_encoding)
1525 /* Gotta do it the hard way */
1526 const char *reference =
state->refline;
1529 reference += (txt -
state->curline);
1531 for (
i = 0;
i <
len;
i++)
1535 if (ch == (
char) 0xFF)
1543 * psqlscan_extract_substring --- fetch value of (part of) the current token
1545 * This is like psqlscan_emit(), except that the data is returned as a
1546 * malloc'd string rather than being pushed directly to state->output_buf.
1553 if (
state->safe_encoding)
1554 memcpy(result, txt,
len);
1557 /* Gotta do it the hard way */
1558 const char *reference =
state->refline;
1561 reference += (txt -
state->curline);
1563 for (
i = 0;
i <
len;
i++)
1567 if (ch == (
char) 0xFF)
1577 * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1579 * If the variable name is found, escape its value using the appropriate
1580 * quoting method and emit the value to output_buf. (Since the result is
1581 * surely quoted, there is never any reason to rescan it.) If we don't
1582 * find the variable or escaping fails, emit the token as-is.
1591 /* Variable lookup. */
1593 if (
state->callbacks->get_variable)
1594 value =
state->callbacks->get_variable(varname, quote,
1595 state->cb_passthrough);
1602 /* Emit the suitably-escaped value */
1608 /* Emit original token as-is */
1620 if (
state->callbacks->get_variable)
1622 state->cb_passthrough);
#define fprintf(file, fmt, msg)
int PQmblen(const char *s, int encoding)
void * pg_malloc(size_t size)
void * pg_malloc0(size_t size)
Assert(PointerIsAligned(start, uint64))
#define pg_valid_server_encoding_id
void appendBinaryPQExpBuffer(PQExpBuffer str, const char *data, size_t datalen)
void appendPQExpBufferChar(PQExpBuffer str, char ch)
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
enum _promptStatus promptStatus_t
YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy)
void psql_scan_get_location(PsqlScanState state, int *lineno, int *offset)
void psql_scan_reset(PsqlScanState state)
void psql_scan_destroy(PsqlScanState state)
PsqlScanResult psql_scan(PsqlScanState state, PQExpBuffer query_buf, promptStatus_t *prompt)
PsqlScanState psql_scan_create(const PsqlScanCallbacks *callbacks)
void psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
void psql_scan_setup(PsqlScanState state, const char *line, int line_len, int encoding, bool std_strings)
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner)
void psql_scan_reselect_sql_lexer(PsqlScanState state)
bool psql_scan_in_quote(PsqlScanState state)
void psql_scan_finish(PsqlScanState state)
struct yy_buffer_state * YY_BUFFER_STATE