PostgreSQL Source Code: src/test/modules/test_json_parser/test_json_parser_incremental.c Source File

PostgreSQL Source Code git master
test_json_parser_incremental.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * test_json_parser_incremental.c
4 * Test program for incremental JSON parser
5 *
6 * Copyright (c) 2024-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/test/modules/test_json_parser/test_json_parser_incremental.c
10 *
11 * This program tests incremental parsing of json. The input is fed into
12 * the parser in very small chunks. In practice you would normally use
13 * much larger chunks, but doing this makes it more likely that the
14 * full range of increment handling, especially in the lexer, is exercised.
15 *
16 * If the "-c SIZE" option is provided, that chunk size is used instead
17 * of the default of 60.
18 *
19 * If the "-r SIZE" option is provided, a range of chunk sizes from SIZE down to
20 * 1 are run sequentially. A null byte is printed to the streams after each
21 * iteration.
22 *
23 * If the -s flag is given, the program does semantic processing. This should
24 * just mirror back the json, albeit with white space changes.
25 *
26 * If the -o flag is given, the JSONLEX_CTX_OWNS_TOKENS flag is set. (This can
27 * be used in combination with a leak sanitizer; without the option, the parser
28 * may leak memory with invalid JSON.)
29 *
30 * The argument specifies the file containing the JSON input.
31 *
32 *-------------------------------------------------------------------------
33 */
34
35#include "postgres_fe.h"
36
37#include <stdio.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <unistd.h>
41
42#include "common/jsonapi.h"
43#include "common/logging.h"
44#include "lib/stringinfo.h"
45#include "mb/pg_wchar.h"
46#include "pg_getopt.h"
47
48 #define BUFSIZE 6000
49 #define DEFAULT_CHUNK_SIZE 60
50
51 typedef struct DoState
52{
53 JsonLexContext *lex;
54 bool elem_is_first;
55 StringInfo buf;
56 } DoState;
57
58static void usage(const char *progname);
59static void escape_json(StringInfo buf, const char *str);
60
61/* semantic action functions for parser */
62static JsonParseErrorType do_object_start(void *state);
63static JsonParseErrorType do_object_end(void *state);
64static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull);
65static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull);
66static JsonParseErrorType do_array_start(void *state);
67static JsonParseErrorType do_array_end(void *state);
68static JsonParseErrorType do_array_element_start(void *state, bool isnull);
69static JsonParseErrorType do_array_element_end(void *state, bool isnull);
70static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype);
71
72 static JsonSemAction sem = {
73 .object_start = do_object_start,
74 .object_end = do_object_end,
75 .object_field_start = do_object_field_start,
76 .object_field_end = do_object_field_end,
77 .array_start = do_array_start,
78 .array_end = do_array_end,
79 .array_element_start = do_array_element_start,
80 .array_element_end = do_array_element_end,
81 .scalar = do_scalar
82};
83
84 static bool lex_owns_tokens = false;
85
86int
87 main(int argc, char **argv)
88{
89 char buff[BUFSIZE];
90 FILE *json_file;
91 JsonParseErrorType result;
92 JsonLexContext *lex;
93 StringInfoData json;
94 int n_read;
95 size_t chunk_size = DEFAULT_CHUNK_SIZE;
96 bool run_chunk_ranges = false;
97 struct stat statbuf;
98 const JsonSemAction *testsem = &nullSemAction;
99 char *testfile;
100 int c;
101 bool need_strings = false;
102 int ret = 0;
103
104 pg_logging_init(argv[0]);
105
106 lex = calloc(1, sizeof(JsonLexContext));
107 if (!lex)
108 pg_fatal("out of memory");
109
110 while ((c = getopt(argc, argv, "r:c:os")) != -1)
111 {
112 switch (c)
113 {
114 case 'r': /* chunk range */
115 run_chunk_ranges = true;
116 /* fall through */
117 case 'c': /* chunk size */
118 chunk_size = strtou64(optarg, NULL, 10);
119 if (chunk_size > BUFSIZE)
120 pg_fatal("chunk size cannot exceed %d", BUFSIZE);
121 break;
122 case 'o': /* switch token ownership */
123 lex_owns_tokens = true;
124 break;
125 case 's': /* do semantic processing */
126 testsem = &sem;
127 sem.semstate = palloc(sizeof(struct DoState));
128 ((struct DoState *) sem.semstate)->lex = lex;
129 ((struct DoState *) sem.semstate)->buf = makeStringInfo();
130 need_strings = true;
131 break;
132 }
133 }
134
135 if (optind < argc)
136 {
137 testfile = argv[optind];
138 optind++;
139 }
140 else
141 {
142 usage(argv[0]);
143 exit(1);
144 }
145
146 initStringInfo(&json);
147
148 if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
149 pg_fatal("error opening input: %m");
150
151 if (fstat(fileno(json_file), &statbuf) != 0)
152 pg_fatal("error statting input: %m");
153
154 do
155 {
156 /*
157 * This outer loop only repeats in -r mode. Reset the parse state and
158 * our position in the input file for the inner loop, which performs
159 * the incremental parsing.
160 */
161 off_t bytes_left = statbuf.st_size;
162 size_t to_read = chunk_size;
163
164 makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
165 setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
166
167 rewind(json_file);
168 resetStringInfo(&json);
169
170 for (;;)
171 {
172 /* We will break when there's nothing left to read */
173
174 if (bytes_left < to_read)
175 to_read = bytes_left;
176
177 n_read = fread(buff, 1, to_read, json_file);
178 if (n_read < to_read)
179 pg_fatal("error reading input file: %d", ferror(json_file));
180
181 appendBinaryStringInfo(&json, buff, n_read);
182
183 /*
184 * Append some trailing junk to the buffer passed to the parser.
185 * This helps us ensure that the parser does the right thing even
186 * if the chunk isn't terminated with a '0円'.
187 */
188 appendStringInfoString(&json, "1+23 trailing junk");
189 bytes_left -= n_read;
190 if (bytes_left > 0)
191 {
192 result = pg_parse_json_incremental(lex, testsem,
193 json.data, n_read,
194 false);
195 if (result != JSON_INCOMPLETE)
196 {
197 fprintf(stderr, "%s\n", json_errdetail(result, lex));
198 ret = 1;
199 goto cleanup;
200 }
201 resetStringInfo(&json);
202 }
203 else
204 {
205 result = pg_parse_json_incremental(lex, testsem,
206 json.data, n_read,
207 true);
208 if (result != JSON_SUCCESS)
209 {
210 fprintf(stderr, "%s\n", json_errdetail(result, lex));
211 ret = 1;
212 goto cleanup;
213 }
214 if (!need_strings)
215 printf("SUCCESS!\n");
216 break;
217 }
218 }
219
220cleanup:
221 freeJsonLexContext(lex);
222
223 /*
224 * In -r mode, separate output with nulls so that the calling test can
225 * split it up, decrement the chunk size, and loop back to the top.
226 * All other modes immediately fall out of the loop and exit.
227 */
228 if (run_chunk_ranges)
229 {
230 fputc('0円', stdout);
231 fputc('0円', stderr);
232 }
233 } while (run_chunk_ranges && (--chunk_size > 0));
234
235 fclose(json_file);
236 free(json.data);
237 free(lex);
238
239 return ret;
240}
241
242/*
243 * The semantic routines here essentially just output the same json, except
244 * for white space. We could pretty print it but there's no need for our
245 * purposes. The result should be able to be fed to any JSON processor
246 * such as jq for validation.
247 */
248
249static JsonParseErrorType
250 do_object_start(void *state)
251{
252 DoState *_state = (DoState *) state;
253
254 printf("{\n");
255 _state->elem_is_first = true;
256
257 return JSON_SUCCESS;
258}
259
260static JsonParseErrorType
261 do_object_end(void *state)
262{
263 DoState *_state = (DoState *) state;
264
265 printf("\n}\n");
266 _state->elem_is_first = false;
267
268 return JSON_SUCCESS;
269}
270
271static JsonParseErrorType
272 do_object_field_start(void *state, char *fname, bool isnull)
273{
274 DoState *_state = (DoState *) state;
275
276 if (!_state->elem_is_first)
277 printf(",\n");
278 resetStringInfo(_state->buf);
279 escape_json(_state->buf, fname);
280 printf("%s: ", _state->buf->data);
281 _state->elem_is_first = false;
282
283 return JSON_SUCCESS;
284}
285
286static JsonParseErrorType
287 do_object_field_end(void *state, char *fname, bool isnull)
288{
289 if (!lex_owns_tokens)
290 free(fname);
291
292 return JSON_SUCCESS;
293}
294
295static JsonParseErrorType
296 do_array_start(void *state)
297{
298 DoState *_state = (DoState *) state;
299
300 printf("[\n");
301 _state->elem_is_first = true;
302
303 return JSON_SUCCESS;
304}
305
306static JsonParseErrorType
307 do_array_end(void *state)
308{
309 DoState *_state = (DoState *) state;
310
311 printf("\n]\n");
312 _state->elem_is_first = false;
313
314 return JSON_SUCCESS;
315}
316
317static JsonParseErrorType
318 do_array_element_start(void *state, bool isnull)
319{
320 DoState *_state = (DoState *) state;
321
322 if (!_state->elem_is_first)
323 printf(",\n");
324 _state->elem_is_first = false;
325
326 return JSON_SUCCESS;
327}
328
329static JsonParseErrorType
330 do_array_element_end(void *state, bool isnull)
331{
332 /* nothing to do */
333
334 return JSON_SUCCESS;
335}
336
337static JsonParseErrorType
338 do_scalar(void *state, char *token, JsonTokenType tokentype)
339{
340 DoState *_state = (DoState *) state;
341
342 if (tokentype == JSON_TOKEN_STRING)
343 {
344 resetStringInfo(_state->buf);
345 escape_json(_state->buf, token);
346 printf("%s", _state->buf->data);
347 }
348 else
349 printf("%s", token);
350
351 if (!lex_owns_tokens)
352 free(token);
353
354 return JSON_SUCCESS;
355}
356
357
358/* copied from backend code */
359static void
360 escape_json(StringInfo buf, const char *str)
361{
362 const char *p;
363
364 appendStringInfoCharMacro(buf, '"');
365 for (p = str; *p; p++)
366 {
367 switch (*p)
368 {
369 case '\b':
370 appendStringInfoString(buf, "\\b");
371 break;
372 case '\f':
373 appendStringInfoString(buf, "\\f");
374 break;
375 case '\n':
376 appendStringInfoString(buf, "\\n");
377 break;
378 case '\r':
379 appendStringInfoString(buf, "\\r");
380 break;
381 case '\t':
382 appendStringInfoString(buf, "\\t");
383 break;
384 case '"':
385 appendStringInfoString(buf, "\\\"");
386 break;
387 case '\\':
388 appendStringInfoString(buf, "\\\\");
389 break;
390 default:
391 if ((unsigned char) *p < ' ')
392 appendStringInfo(buf, "\\u%04x", (int) *p);
393 else
394 appendStringInfoCharMacro(buf, *p);
395 break;
396 }
397 }
398 appendStringInfoCharMacro(buf, '"');
399}
400
401static void
402 usage(const char *progname)
403{
404 fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname);
405 fprintf(stderr, "Options:\n");
406 fprintf(stderr, " -c chunksize size of piece fed to parser (default 64)\n");
407 fprintf(stderr, " -o set JSONLEX_CTX_OWNS_TOKENS for leak checking\n");
408 fprintf(stderr, " -s do semantic processing\n");
409
410}
static void cleanup(void)
Definition: bootstrap.c:715
#define PG_BINARY_R
Definition: c.h:1274
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
const char * str
#define calloc(a, b)
Definition: header.h:55
#define free(a)
Definition: header.h:65
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, const JsonSemAction *sem, const char *json, size_t len, bool is_last)
Definition: jsonapi.c:868
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
Definition: jsonapi.c:497
void setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
Definition: jsonapi.c:542
const JsonSemAction nullSemAction
Definition: jsonapi.c:287
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:2404
void freeJsonLexContext(JsonLexContext *lex)
Definition: jsonapi.c:687
JsonParseErrorType
Definition: jsonapi.h:35
@ JSON_SUCCESS
Definition: jsonapi.h:36
@ JSON_INCOMPLETE
Definition: jsonapi.h:37
JsonTokenType
Definition: jsonapi.h:18
@ JSON_TOKEN_STRING
Definition: jsonapi.h:20
void pg_logging_init(const char *argv0)
Definition: logging.c:83
const char * progname
Definition: main.c:44
void * palloc(Size size)
Definition: mcxt.c:1365
#define pg_fatal(...)
PGDLLIMPORT int optind
Definition: getopt.c:51
int getopt(int nargc, char *const *nargv, const char *ostr)
Definition: getopt.c:72
PGDLLIMPORT char * optarg
Definition: getopt.c:53
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
#define printf(...)
Definition: port.h:245
c
char * c
Definition: preproc-cursor.c:31
StringInfo makeStringInfo(void)
Definition: stringinfo.c:72
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:126
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:231
JsonLexContext * lex
json_struct_action object_start
Definition: jsonapi.h:154
void * semstate
Definition: jsonapi.h:153
char * data
Definition: stringinfo.h:48
Definition: win32_port.h:255
__int64 st_size
Definition: win32_port.h:263
Definition: regguts.h:323
Definition: oauth-curl.c:192
static void usage(const char *progname)
static bool lex_owns_tokens
static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull)
static JsonSemAction sem
int main(int argc, char **argv)
static JsonParseErrorType do_array_element_end(void *state, bool isnull)
struct DoState DoState
static JsonParseErrorType do_array_element_start(void *state, bool isnull)
static JsonParseErrorType do_object_end(void *state)
static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype)
#define DEFAULT_CHUNK_SIZE
static JsonParseErrorType do_array_start(void *state)
static JsonParseErrorType do_object_start(void *state)
static JsonParseErrorType do_array_end(void *state)
static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull)
static void escape_json(StringInfo buf, const char *str)
#define BUFSIZE
#define fstat
Definition: win32_port.h:273

AltStyle によって変換されたページ (->オリジナル) /