2/*-------------------------------------------------------------------------
5 * lexical scanner for psql backslash commands
7 * XXX Avoid creating backtracking cases --- see the backend lexer for info.
9 * See fe_utils/psqlscan_int.h for additional commentary.
11 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/bin/psql/psqlscanslash.l
17 *-------------------------------------------------------------------------
36 * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
37 * doesn't presently make use of that argument, so just declare it as int.
42 * These variables do not need to be saved across calls. Yeah, it's a bit
43 * of a hack, but putting them into PsqlScanStateData would be klugy too.
51/* Return values from yylex() */
52 #define LEXRES_EOL 0 /* end of input */
53 #define LEXRES_OK 1 /* OK completion of backslash argument */
58 #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
static int backtick_start_offset
static int unquoted_option_chars
static char * option_quote
static enum slash_option_type option_type
static void evaluate_backtick(PsqlScanState state)
64/* Except for the prefix, these options should match psqlscan.l */
68%option never-interactive
74%option prefix="slash_yy"
77 * Set the type of yyextra; we use it as a pointer back to the containing
80%option extra-type="PsqlScanState"
83 * OK, here is a short description of lex/flex rules behavior.
84 * The longest pattern which matches an input string is always chosen.
85 * For equal-length patterns, the first occurring in the rules list is chosen.
86 * INITIAL is the starting state, to which all non-conditional rules apply.
87 * Exclusive states change parsing rules while the state is active. When in
88 * an exclusive state, only those rules defined for that state apply.
91/* Exclusive states for lexing backslash commands */
102 * Assorted character class definitions that should match psqlscan.l.
106xeoctesc [\\][0-7]{1,3}
107xehexesc [\\]x[0-9A-Fa-f]{1,2}
108xqdouble {quote}{quote}
110variable_char [A-Za-z200円-377円_0-9]
117 /* Declare some local variables inside yylex(), for convenience */
118 PsqlScanState cur_state =
yyextra;
119 PQExpBuffer output_buf = cur_state->output_buf;
122 * Force flex into the state indicated by start_state. This has a
123 * couple of purposes: it lets some of the functions below set a new
124 * starting state without ugly direct access to flex variables, and it
125 * allows us to transition from one flex lexer to another so that we
126 * can lex different parts of the source string using separate lexers.
128 BEGIN(cur_state->start_state);
132 * We don't really expect to be invoked in the INITIAL state in this
133 * lexer; but if we are, just spit data to the output_buf until EOF.
139 * Exclusive lexer states to handle backslash command lexing
143 /* command name ends at whitespace or backslash; eat all else */
147 cur_state->start_state = YY_START;
157 * Discard any whitespace before argument, then go to xslasharg state.
158 * An exception is that "|" is only special at start of argument, so we
167 /* treat like whole-string case */
169 BEGIN(xslashwholeline);
173 /* vertical bar is not special otherwise */
188 * Default processing of text in a slash command's argument.
190 * Note: unquoted_option_chars counts the number of characters at the
191 * end of the argument that were not subject to any form of quoting.
192 * psql_scan_slash_option needs this to strip trailing semicolons safely.
197 * Unquoted space is end of arg; do not eat. Likewise
198 * backslash is end of command or next command, do not eat
200 * XXX this means we can't conveniently accept options
201 * that include unquoted backslashes; therefore, option
202 * processing that encourages use of backslashes is rather
206 cur_state->start_state = YY_START;
220 BEGIN(xslashbackquote);
231 /* Possible psql variable substitution */
232 if (cur_state->callbacks->get_variable == NULL)
242 value = cur_state->callbacks->get_variable(varname,
244 cur_state->cb_passthrough);
248 * The variable value is just emitted without any
249 * further examination. This is consistent with the
250 * pre-8.0 code behavior, if not with the way that
251 * variables are handled outside backslash commands.
252 * Note that we needn't guard against recursion here.
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
char * psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
void psqlscan_escape_variable(PsqlScanState state, const char *txt, int len, PsqlScanQuoteType quote)
275:\"{variable_char}+\" {
282:\{\?{variable_char}+\} {
void psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
287 /* Throw back everything but the colon */
294 /* Throw back everything but the colon */
300:\{\?{variable_char}* {
301 /* Throw back everything but the colon */
308 /* Throw back everything but the colon */
323 * single-quoted text: copy literally except for '' and backslash
327{quote} { BEGIN(xslasharg); }
void appendPQExpBufferChar(PQExpBuffer str, char ch)
340 (
char) strtol(yytext + 1, NULL, 8));
346 (
char) strtol(yytext + 2, NULL, 16));
void psqlscan_emit(PsqlScanState state, const char *txt, int len)
357 * backticked text: copy everything until next backquote (expanding
358 * variable references, but doing nought else), then evaluate.
362 /* In an inactive \if branch, don't evaluate the command */
363 if (cur_state->cb_passthrough == NULL ||
bool conditional_active(ConditionalStack cstack)
370 /* Possible psql variable substitution */
371 if (cur_state->callbacks->get_variable == NULL)
381 value = cur_state->callbacks->get_variable(varname,
383 cur_state->cb_passthrough);
402 /* Throw back everything but the colon */
412 /* double-quoted text: copy verbatim, including the double quotes */
424 /* copy everything until end of input line */
425 /* but suppress leading whitespace */
428 if (output_buf->len > 0)
437 /* at end of command, eat a double backslash, but not anything else */
440 cur_state->start_state = YY_START;
446 cur_state->start_state = YY_START;
453 if (cur_state->buffer_stack == NULL)
455 cur_state->start_state = YY_START;
460 * We were expanding a variable, so pop the inclusion
461 * stack and keep lexing
void psqlscan_select_top_buffer(PsqlScanState state)
void psqlscan_pop_buffer_stack(PsqlScanState state)
472 * Scan the command name of a psql backslash command. This should be called
473 * after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
474 * has been consumed through the leading backslash.
476 * The return value is a malloc'd copy of the command name, as parsed off
484 /* Must be scanning already */
487 /* Build a local buffer that we'll return the data of */
490 /* Set current output target */
491 state->output_buf = &mybuf;
493 /* Set input source */
494 if (
state->buffer_stack != NULL)
495 yy_switch_to_buffer(
state->buffer_stack->buf,
state->scanner);
497 yy_switch_to_buffer(
state->scanbufhandle,
state->scanner);
500 * Set lexer start state. Note that this is sufficient to switch
501 * state->scanner over to using the tables in this lexer file.
503 state->start_state = xslashcmd;
508 /* There are no possible errors in this lex state... */
511 * In case the caller returns to using the regular SQL lexer, reselect the
512 * appropriate initial state.
520 * Parse off the next argument for a backslash command, and return it as a
521 * malloc'd string. If there are no more arguments, returns NULL.
523 * type tells what processing, if any, to perform on the option string;
524 * for example, if it's a SQL identifier, we want to downcase any unquoted
527 * if quote is not NULL, *quote is set to 0 if no quoting was found, else
528 * the last quote symbol used in the argument.
530 * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
531 * be taken as part of the option string will be stripped.
533 * NOTE: the only possible syntax errors for backslash options are unmatched
534 * quotes, which are detected when we run out of input. Therefore, on a
535 * syntax error we just throw away the string and return NULL; there is no
536 * need to worry about flushing remaining input.
549 /* Must be scanning already */
553 quote = &local_quote;
556 /* Build a local buffer that we'll return the data of */
559 /* Set up static variables that will be used by yylex */
564 /* Set current output target */
565 state->output_buf = &mybuf;
567 /* Set input source */
568 if (
state->buffer_stack != NULL)
569 yy_switch_to_buffer(
state->buffer_stack->buf,
state->scanner);
571 yy_switch_to_buffer(
state->scanbufhandle,
state->scanner);
573 /* Set lexer start state */
575 state->start_state = xslashwholeline;
577 state->start_state = xslashargstart;
582 /* Save final state for a moment... */
583 final_state =
state->start_state;
586 * In case the caller returns to using the regular SQL lexer, reselect the
587 * appropriate initial state.
592 * Check the lex result: we should have gotten back either LEXRES_OK or
593 * LEXRES_EOL (the latter indicating end of string). If we were inside a
594 * quoted string, as indicated by final_state, EOL is an error.
604 /* Strip any unquoted trailing semicolons if requested */
609 mybuf.
data[mybuf.
len - 1] ==
';')
611 mybuf.
data[--mybuf.
len] =
'0円';
616 * If SQL identifier processing was requested, then we strip out
617 * excess double quotes and optionally downcase unquoted letters.
624 /* update mybuf.len for possible shortening */
625 mybuf.
len = strlen(mybuf.
data);
629 case xslashbackquote:
631 /* must have hit EOL inside quotes */
635 case xslashwholeline:
638 * In whole-line mode, we interpret semicolon = true as stripping
639 * trailing whitespace as well as semicolons; this gives the
640 * nearest equivalent to what semicolon = true does in normal
641 * mode. Note there's no concept of quoting in this mode.
645 while (mybuf.
len > 0 &&
646 (mybuf.
data[mybuf.
len - 1] ==
';' ||
647 (isascii((
unsigned char) mybuf.
data[mybuf.
len - 1]) &&
648 isspace((
unsigned char) mybuf.
data[mybuf.
len - 1]))))
650 mybuf.
data[--mybuf.
len] =
'0円';
656 fprintf(stderr,
"invalid YY_START\n");
661 * An unquoted empty argument isn't possible unless we are at end of
662 * command. Return NULL instead.
664 if (mybuf.
len == 0 && *quote == 0)
670 /* Else return the completed string. */
675 * Eat up any unused \\ to complete a backslash command.
680 /* Must be scanning already */
683 /* Set current output target */
684 state->output_buf = NULL;
/* we won't output anything */
686 /* Set input source */
687 if (
state->buffer_stack != NULL)
688 yy_switch_to_buffer(
state->buffer_stack->buf,
state->scanner);
690 yy_switch_to_buffer(
state->scanbufhandle,
state->scanner);
692 /* Set lexer start state */
693 state->start_state = xslashend;
698 /* There are no possible errors in this lex state... */
701 * We expect the caller to return to using the regular SQL lexer, so
702 * reselect the appropriate initial state.
708 * Fetch current paren nesting depth
713 return state->paren_depth;
717 * Set paren nesting depth
723 state->paren_depth = depth;
727 * De-quote and optionally downcase a SQL identifier.
729 * The string at *str is modified in-place; it can become shorter,
732 * If downcase is true then non-quoted letters are folded to lower case.
733 * Ideally this behavior will match the backend's downcase_identifier();
734 * but note that it could differ if LC_CTYPE is different in the frontend.
736 * Note that a string like FOO"BAR"BAZ will be converted to fooBARbaz;
737 * this is somewhat inconsistent with the SQL spec, which would have us
738 * parse it as several identifiers. But for psql's purposes, we want a
739 * string like "foo"."bar" to be treated as one option, so there's little
740 * choice; this routine doesn't get to change the token boundaries.
745 bool inquotes =
false;
752 if (inquotes && cp[1] ==
'"')
754 /* Keep the first quote, remove the second */
758 inquotes = !inquotes;
759 /* Collapse out quote at *cp */
760 memmove(cp, cp + 1, strlen(cp));
761 /* do not advance cp */
765 if (downcase && !inquotes)
773 * Evaluate a backticked substring of a slash command's argument.
775 * The portion of output_buf starting at backtick_start_offset is evaluated
776 * as a shell command and then replaced by the command's output.
793 fd = popen(cmd,
"r");
805 result = fread(
buf, 1,
sizeof(
buf),
fd);
819 * Although pclose's result always sets the shell result variables, we
820 * historically have abandoned the backtick substitution only if it
823 exit_code = pclose(
fd);
837 /* Now done with cmd, delete it from output_buf */
839 output_buf->
data[output_buf->
len] =
'0円';
841 /* If no error, transfer result to output_buf */
844 /* strip any trailing newline (but only one) */
845 if (cmd_output.
len > 0 &&
846 cmd_output.
data[cmd_output.
len - 1] ==
'\n')
851 /* And finally, set the shell result variables */
void SetShellResultVariables(int wait_result)
#define PG_USED_FOR_ASSERTS_ONLY
#define fprintf(file, fmt, msg)
int PQmblenBounded(const char *s, int encoding)
Assert(PointerIsAligned(start, uint64))
#define pg_log_error(...)
unsigned char pg_tolower(unsigned char ch)
void initPQExpBuffer(PQExpBuffer str)
void appendBinaryPQExpBuffer(PQExpBuffer str, const char *data, size_t datalen)
void termPQExpBuffer(PQExpBuffer str)
#define PQExpBufferDataBroken(buf)
static int fd(const char *x, int i)
void psql_scan_reselect_sql_lexer(PsqlScanState state)
void psql_scan_slash_command_end(PsqlScanState state)
void psql_scan_set_paren_depth(PsqlScanState state, int depth)
void dequote_downcase_identifier(char *str, bool downcase, int encoding)
int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner)
char * psql_scan_slash_option(PsqlScanState state, enum slash_option_type type, char *quote, bool semicolon)
int psql_scan_get_paren_depth(PsqlScanState state)
char * psql_scan_slash_command(PsqlScanState state)