2 * test_escape.c Test escape functions
4 * Copyright (c) 2022-2025, PostgreSQL Global Development Group
7 * src/test/modules/test_escape/test_escape.c
35 #define NEVER_ACCESS_STR "\xff never-to-be-touched"
39 * An escape function to be tested by this test.
46 * Can the escape method report errors? If so, we validate that it does in
47 * case of various invalid inputs.
52 * Is the escape method known to not handle invalidly encoded input? If
53 * so, we don't run the test unless --force-unsupported is used.
58 * Is the escape method known to only handle encodings where no byte in a
59 * multi-byte characters are valid ascii.
64 * Does the escape function have a length input?
69 const char *unescaped,
size_t unescaped_len,
74 * A single test input for this test.
85 * Callback functions from flex lexer. Not currently used by the test.
93 * Print the string into buf, making characters outside of plain ascii
94 * somewhat easier to recognize.
96 * The output format could stand to be improved significantly, it's not at all
102 for (
size_t i = 0;
i <
len;
i++)
121 const char *testname,
124 const char *resultdesc)
127 bool print_details =
true;
128 bool print_result =
true;
133 print_details =
false;
135 print_result =
false;
144 printf(
"%s %d - %s: %s: %s\n",
152 * Return true for encodings in which bytes in a multi-byte character look
153 * like valid ascii characters.
159 * We don't store this property directly anywhere, but whether an encoding
160 * is a client-only encoding is a good proxy.
169 * Confirm escaping doesn't read past the end of an allocation. Consider the
170 * result of malloc(4096), in the absence of freelist entries satisfying the
171 * allocation. On OpenBSD, reading one byte past the end of that object
174 * Run this test before the program's other tests, so freelists are minimal.
175 * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192
176 * did. Use 128 KiB, to somewhat insulate the outcome from distant new free()
177 * calls and libc changes.
183 size_t input_len = 0x20000;
188 memset(
input,
'-', input_len - 1);
189 input[input_len - 1] = 0xfe;
191 /* name to describe the test */
201 "input validity vs escape success",
"ok");
208 * Confirm json parsing doesn't read past the end of an allocation. This
209 * exercises wchar.c infrastructure like the true "escape" tests do, but this
210 * isn't an "escape" test.
217 const char input[] =
"{\"\\u\xFE";
218 size_t input_len =
sizeof(
input) - 1;
223 /* prepare input like test_one_vector_escape() does */
228 raw_buf->
len - input_len);
230 /* name to describe the test */
252 const char *unescaped,
size_t unescaped_len,
261 escape_err->
data[escape_err->
len - 1] = 0;
275 const char *unescaped,
size_t unescaped_len,
284 escape_err->
data[escape_err->
len - 1] = 0;
298 const char *unescaped,
size_t unescaped_len,
307 unescaped, unescaped_len,
316 escape_err->
data[escape_err->
len - 1] = 0;
328 const char *unescaped,
size_t unescaped_len,
336 unescaped, unescaped_len);
345 * Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this
346 * method. It suffices iff the input passes encoding validation, so it's
347 * marked as supports_only_valid.
351 const char *unescaped,
size_t unescaped_len,
354 const char *s = unescaped;
358 for (
int i = 0;
i < unescaped_len;
i++)
377 const char *unescaped,
size_t unescaped_len,
387 const char *unescaped,
size_t unescaped_len,
399 .
name =
"PQescapeLiteral",
400 .reports_errors =
true,
401 .supports_input_length =
true,
405 .name =
"PQescapeIdentifier",
406 .reports_errors =
true,
407 .supports_input_length =
true,
411 .name =
"PQescapeStringConn",
412 .reports_errors =
true,
413 .supports_input_length =
true,
417 .name =
"PQescapeString",
418 .reports_errors =
false,
419 .supports_input_length =
true,
424 .reports_errors =
false,
425 .supports_only_valid =
true,
426 .supports_only_ascii_overlap =
true,
427 .supports_input_length =
true,
431 .name =
"appendStringLiteral",
432 .reports_errors =
false,
437 .reports_errors =
false,
443 #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
444 #define TV_LEN(enc, string, len) {.client_encoding = (enc), .escape=string, .escape_len=len, }
447 /* expected to work sanity checks */
460 /* trailing multi-byte character, paddable in available space */
461 TV(
"UTF-8",
"1\xC0"),
462 TV(
"UTF-8",
"1\xE0 "),
463 TV(
"UTF-8",
"1\xF0 "),
464 TV(
"UTF-8",
"1\xF0 "),
465 TV(
"UTF-8",
"1\xF0 "),
467 /* trailing multi-byte character, not enough space to pad */
468 TV(
"UTF-8",
"1\xE0"),
469 TV(
"UTF-8",
"1\xF0"),
472 /* try to smuggle in something in invalid characters */
473 TV(
"UTF-8",
"1\xE0'"),
474 TV(
"UTF-8",
"1\xE0\""),
475 TV(
"UTF-8",
"1\xF0'"),
476 TV(
"UTF-8",
"1\xF0\""),
477 TV(
"UTF-8",
"1\xF0'; "),
478 TV(
"UTF-8",
"1\xF0\"; "),
479 TV(
"UTF-8",
"1\xF0';;;;"),
480 TV(
"UTF-8",
"1\xF0 ';;;;"),
481 TV(
"UTF-8",
"1\xF0 \";;;;"),
482 TV(
"UTF-8",
"1\xE0'; \\l ; "),
483 TV(
"UTF-8",
"1\xE0\"; \\l ; "),
485 /* null byte handling */
486 TV(
"UTF-8",
"some0円thing"),
487 TV(
"UTF-8",
"some0円"),
488 TV(
"UTF-8",
"some\xF0'0円"),
489 TV(
"UTF-8",
"some\xF0'0円'"),
490 TV(
"UTF-8",
"some\xF0" "ab0円'"),
492 /* GB18030's 4 byte encoding requires a 2nd byte limited values */
493 TV(
"GB18030",
"\x90\x31"),
494 TV(
"GB18030",
"\\\x81\x5c'"),
495 TV(
"GB18030",
"\\\x81\x5c\""),
496 TV(
"GB18030",
"\\\x81\x5c0円'"),
499 * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but
500 * that requires encoding verification to know. E.g. replace_string()
503 TV(
"GB18030",
"\\\x81';"),
504 TV(
"GB18030",
"\\\x81\";"),
507 * \x81 indicates a 2 byte char. \ is a valid second character.
509 TV(
"GB18030",
"\\\x81\\';"),
510 TV(
"GB18030",
"\\\x81\\\";"),
511 TV(
"GB18030",
"\\\x810円;"),
512 TV(
"GB18030",
"\\\x810円'"),
513 TV(
"GB18030",
"\\\x81'0円"),
515 TV(
"SJIS",
"\xF0\x40;"),
517 TV(
"SJIS",
"\xF0';"),
518 TV(
"SJIS",
"\xF0\";"),
519 TV(
"SJIS",
"\xF00円'"),
520 TV(
"SJIS",
"\\\xF0\\';"),
521 TV(
"SJIS",
"\\\xF0\\\";"),
529 TV(
"mule_internal",
"\\\x9c';0円;"),
531 TV(
"sql_ascii",
"1\xC0'"),
534 * Testcases that are not null terminated for the specified input length.
535 * That's interesting to verify that escape functions don't read beyond
536 * the intended input length.
538 * One interesting special case is GB18030, which has the odd behaviour
539 * needing to read beyond the first byte to determine the length of a
540 * multi-byte character.
543 TV_LEN(
"GB18030",
"\x80", 1),
544 TV_LEN(
"GB18030",
"\x800円", 2),
545 TV_LEN(
"GB18030",
"\x80\x30", 2),
546 TV_LEN(
"GB18030",
"\x80\x300円", 3),
547 TV_LEN(
"GB18030",
"\x80\x30\x30", 3),
548 TV_LEN(
"GB18030",
"\x80\x30\x300円", 4),
549 TV_LEN(
"UTF-8",
"\xC3\xb6 ", 1),
550 TV_LEN(
"UTF-8",
"\xC3\xb6 ", 2),
557#define TOSTR_CASE(sym) case sym: return #sym
568 return "";
/* silence compiler */
572 * Verify that psql parses the input as a single statement. If this property
573 * is violated, the escape function does not effectively protect against
574 * smuggling in a second statement.
593 * TODO: This hardcodes standard conforming strings, it would be useful to
594 * test without as well.
603 scan_result =
psql_scan(scan_state, query_buf,
607 "#\t\t %d: scan_result: %s prompt: %u, query_buf: ",
608 matches,
scan_res_s(scan_result), prompt_status);
619 test_fails = matches > 1 || scan_result !=
PSCAN_EOL;
622 resdesc =
"more than one match";
624 resdesc =
"unexpected end state";
641 size_t input_encoding_validlen;
642 bool input_encoding_valid;
643 size_t input_encoding0_validlen;
644 bool input_encoding0_valid;
646 size_t escape_encoding_length;
647 bool escape_encoding_valid;
661 /* name to describe the test */
667 /* details to describe the test, to allow for debugging */
676 /* check encoding of input, to compare with after the test */
680 input_encoding_valid = input_encoding_validlen == tv->
escape_len;
682 input_encoding_valid);
689 input_encoding0_valid);
700 * Put the to-be-escaped data into a buffer, so that we
702 * a) can mark memory beyond end of the string as inaccessible when using
705 * b) can append extra data beyond the length passed to the escape
706 * function, to verify that that data is not processed.
708 * TODO: Should we instead/additionally escape twice, once with unmodified
709 * and once with appended input? That way we could compare the two.
716 * Append likely invalid string that does *not* contain a null byte
717 * (which'd prevent some invalid accesses to later memory).
726 /* append invalid string, after 0円 */
734 /* call the to-be-tested escape function */
735 escape_success = ef->
escape(tc->
conn, escape_buf,
744 if (escape_buf->
len > 0)
755 escape_encoding_valid = escape_encoding_length == escape_buf->
len;
758 escape_encoding_valid);
761 * Verify that no data beyond the end of the input is included in the
762 * escaped string. It'd be better to use something like memmem()
763 * here, but that's not available everywhere.
767 "escaped data beyond end of input",
768 contains_never ?
"no" :
"all secrets revealed");
772 escape_encoding_length = 0;
773 escape_encoding_valid = 1;
777 * If the test reports errors, and the input was invalidly encoded,
778 * escaping should fail. One edge-case that we accept for now is that the
779 * input could have an embedded null byte, which the escape functions will
780 * just treat as a shorter string. If the encoding error is after the zero
781 * byte, the output thus won't contain it.
786 const char *resdesc =
"ok";
790 if (!input_encoding0_valid)
793 resdesc =
"invalid input escaped successfully";
795 else if (!input_encoding_valid)
796 resdesc =
"invalid input escaped successfully, due to zero byte";
800 if (input_encoding0_valid)
803 resdesc =
"valid input failed to escape";
805 else if (input_encoding_valid)
806 resdesc =
"valid input failed to escape, due to zero byte";
810 "input validity vs escape success",
815 * If the input is invalidly encoded, the output should also be invalidly
816 * encoded. We accept the same zero-byte edge case as above.
820 const char *resdesc =
"ok";
822 if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid)
824 resdesc =
"invalid input produced valid output, due to zero byte";
826 else if (input_encoding0_valid && !escape_encoding_valid)
829 resdesc =
"valid input produced invalid output";
831 else if (!input_encoding0_valid &&
833 escape_encoding_valid)
836 resdesc =
"invalid input produced valid output";
840 "input and escaped encoding validity",
845 * Test psql parsing whenever we get any string back, even if the escape
846 * function returned a failure.
848 if (escape_buf->
len > 0)
851 escape_buf, details);
867 fprintf(stderr,
"failed to set encoding to %s:\n%s\n",
884 fprintf(stderr,
"Error: %s\n\n", hint);
886 printf(
"PostgreSQL escape function test\n"
889 " test_escape --conninfo=CONNINFO [OPTIONS]\n"
892 " -h, --help show this help\n"
893 " -c, --conninfo=CONNINFO connection information to use\n"
894 " -v, --verbose show test details even for successes\n"
895 " -q, --quiet only show failures\n"
896 " -f, --force-unsupported test invalid input even if unsupported\n"
910 static const struct option long_options[] = {
919 while ((
c =
getopt_long(argc, argv,
"c:fhqv", long_options, &option_index)) != -1)
943 usage(
"unused option(s) specified");
946 usage(
"--conninfo needs to be specified");
952 fprintf(stderr,
"could not connect: %s\n",
#define fprintf(file, fmt, msg)
PGconn * PQconnectdb(const char *conninfo)
ConnStatusType PQstatus(const PGconn *conn)
int PQclientEncoding(const PGconn *conn)
void PQfinish(PGconn *conn)
char * PQerrorMessage(const PGconn *conn)
int PQsetClientEncoding(PGconn *conn, const char *encoding)
void PQfreemem(void *ptr)
size_t PQescapeStringConn(PGconn *conn, char *to, const char *from, size_t length, int *error)
size_t PQescapeString(char *to, const char *from, size_t length)
char * PQescapeLiteral(PGconn *conn, const char *str, size_t len)
char * PQescapeIdentifier(PGconn *conn, const char *str, size_t len)
void * pg_malloc(size_t size)
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
#define required_argument
JsonParseErrorType pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
JsonLexContext * makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json, size_t len, int encoding, bool need_escapes)
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
void freeJsonLexContext(JsonLexContext *lex)
@ JSON_UNICODE_ESCAPE_FORMAT
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
PGDLLIMPORT char * optarg
#define PG_ENCODING_BE_LAST
size_t strnlen(const char *str, size_t maxlen)
PQExpBuffer createPQExpBuffer(void)
int enlargePQExpBuffer(PQExpBuffer str, size_t needed)
void resetPQExpBuffer(PQExpBuffer str)
void appendPQExpBuffer(PQExpBuffer str, const char *fmt,...)
void appendBinaryPQExpBuffer(PQExpBuffer str, const char *data, size_t datalen)
void destroyPQExpBuffer(PQExpBuffer str)
void appendPQExpBufferChar(PQExpBuffer str, char ch)
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
enum _promptStatus promptStatus_t
void psql_scan_destroy(PsqlScanState state)
PsqlScanResult psql_scan(PsqlScanState state, PQExpBuffer query_buf, promptStatus_t *prompt)
PsqlScanState psql_scan_create(const PsqlScanCallbacks *callbacks)
void psql_scan_setup(PsqlScanState state, const char *line, int line_len, int encoding, bool std_strings)
const char * fmtId(const char *rawid)
void setFmtEncoding(int encoding)
void appendStringLiteral(PQExpBuffer buf, const char *str, int encoding, bool std_strings)
bool supports_only_ascii_overlap
bool supports_input_length
bool(* escape)(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
const char * client_encoding
static void test_gb18030_json(pe_test_config *tc)
int main(int argc, char *argv[])
static bool escape_replace(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
static void escapify(PQExpBuffer buf, const char *str, size_t len)
static bool escape_string(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
static bool escape_literal(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
#define TV_LEN(enc, string, len)
static bool escape_append_literal(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
struct pe_test_config pe_test_config
static void test_one_vector(pe_test_config *tc, const pe_test_vector *tv)
static pe_test_escape_func pe_test_escape_funcs[]
static const char * scan_res_s(PsqlScanResult res)
static pe_test_vector pe_test_vectors[]
static void report_result(pe_test_config *tc, bool success, const char *testname, const char *details, const char *subname, const char *resultdesc)
static void test_psql_parse(pe_test_config *tc, PQExpBuffer testname, PQExpBuffer input_buf, PQExpBuffer details)
struct pe_test_vector pe_test_vector
static bool escape_string_conn(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
static bool escape_identifier(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
static void usage(const char *hint)
static void test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef)
struct pe_test_escape_func pe_test_escape_func
static bool encoding_conflicts_ascii(int encoding)
static const PsqlScanCallbacks test_scan_callbacks
static bool escape_fmt_id(PGconn *conn, PQExpBuffer target, const char *unescaped, size_t unescaped_len, PQExpBuffer escape_err)
static void test_gb18030_page_multiple(pe_test_config *tc)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)