1/*-------------------------------------------------------------------------
4 * Parse a backup manifest in JSON format.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/common/parse_manifest.c
11 *-------------------------------------------------------------------------
20 * Semantic states for JSON manifest parsing.
42 * Possible fields for one file as described by the manifest.
55 * Possible fields for one file as described by the manifest.
65 * Internal state used while decoding the JSON-format backup manifest.
72 /* These fields are used for parsing objects in the list of files. */
81 /* These fields are used for parsing objects in the list of WAL ranges. */
87 /* Miscellaneous other stuff. */
94/* typedef appears in parse_manifest.h */
115 const char *buffer,
size_t size,
125 * Set up for incremental parsing of the manifest.
138 parse->context = context;
140 parse->saw_version_field =
false;
156 if (manifest_ctx == NULL)
157 context->
error_cb(context,
"out of memory");
159 context->
error_cb(context,
"could not initialize checksum of manifest");
166 * Free an incremental state object and its contents.
173 /* incstate->manifest_ctx has already been freed */
178 * parse the manifest in pieces.
180 * The caller must ensure that the final piece contains the final lines
181 * with the complete checksum.
186 const char *chunk,
size_t size,
bool is_last)
194 chunk, size, is_last);
208 (
const uint8 *) chunk, size) < 0)
209 context->
error_cb(context,
"could not update checksum of manifest");
219 * Main entrypoint to parse a JSON-format backup manifest.
221 * Caller should set up the parsing context and then invoke this function.
222 * For each file whose information is extracted from the manifest,
223 * context->per_file_cb is invoked. In case of trouble, context->error_cb is
224 * invoked and is expected not to return.
235 /* Set up our private parsing context. */
236 parse.context = context;
238 parse.saw_version_field =
false;
240 /* Create a JSON lexing context. */
243 /* Set up semantic actions. */
255 /* Run the actual JSON parser. */
262 /* Verify the manifest checksum. */
269 * Invoked at the start of each object in the JSON document.
271 * The document as a whole is expected to be an object; each file and each
272 * WAL range is also expected to be an object. If we're anywhere else in the
273 * document, it's an error.
280 switch (
parse->state)
287 parse->pathname = NULL;
288 parse->encoded_pathname = NULL;
290 parse->algorithm = NULL;
291 parse->checksum = NULL;
295 parse->timeline = NULL;
296 parse->start_lsn = NULL;
297 parse->end_lsn = NULL;
301 "unexpected object start");
309 * Invoked at the end of each object in the JSON document.
311 * The possible cases here are the same as for json_manifest_object_start.
312 * There's nothing special to do at the end of the document, but when we
313 * reach the end of an object representing a particular file or WAL range,
314 * we must call json_manifest_finalize_file() to save the associated details.
321 switch (
parse->state)
336 "unexpected object end");
344 * Invoked at the start of each array in the JSON document.
346 * Within the toplevel object, the value associated with the "Files" key
347 * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
355 switch (
parse->state)
365 "unexpected array start");
373 * Invoked at the end of each array in the JSON document.
375 * The cases here are analogous to those in json_manifest_array_start.
382 switch (
parse->state)
390 "unexpected array end");
398 * Invoked at the start of each object field in the JSON document.
405 switch (
parse->state)
410 * Inside toplevel object. The version indicator should always be
413 if (!
parse->saw_version_field)
415 if (strcmp(fname,
"PostgreSQL-Backup-Manifest-Version") != 0)
417 "expected version indicator");
419 parse->saw_version_field =
true;
423 /* Is this the system identifier? */
424 if (strcmp(fname,
"System-Identifier") == 0)
430 /* Is this the list of files? */
431 if (strcmp(fname,
"Files") == 0)
437 /* Is this the list of WAL ranges? */
438 if (strcmp(fname,
"WAL-Ranges") == 0)
444 /* Is this the manifest checksum? */
445 if (strcmp(fname,
"Manifest-Checksum") == 0)
451 /* It's not a field we recognize. */
453 "unrecognized top-level field");
457 /* Inside object for one file; which key have we got? */
458 if (strcmp(fname,
"Path") == 0)
460 else if (strcmp(fname,
"Encoded-Path") == 0)
462 else if (strcmp(fname,
"Size") == 0)
464 else if (strcmp(fname,
"Last-Modified") == 0)
466 else if (strcmp(fname,
"Checksum-Algorithm") == 0)
468 else if (strcmp(fname,
"Checksum") == 0)
472 "unexpected file field");
477 /* Inside object for one file; which key have we got? */
478 if (strcmp(fname,
"Timeline") == 0)
480 else if (strcmp(fname,
"Start-LSN") == 0)
482 else if (strcmp(fname,
"End-LSN") == 0)
486 "unexpected WAL range field");
492 "unexpected object field");
502 * Invoked at the start of each scalar in the JSON document.
504 * Object field names don't reach this code; those are handled by
505 * json_manifest_object_field_start. When we're inside of the object for
506 * a particular file or WAL range, that function will have noticed the name
507 * of the field, and we'll get the corresponding value here. When we're in
508 * the toplevel object, the parse state itself tells us which field this is.
510 * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
511 * can just check on the spot, the goal here is just to save the value in
512 * the parse state for later use. We don't actually do anything until we
513 * reach either the end of the object representing this file, or the end
514 * of the manifest, as the case may be.
521 switch (
parse->state)
536 switch (
parse->file_field)
561 switch (
parse->wal_range_field)
590 * Do additional parsing and sanity-checking of the manifest version, and invoke
591 * the callback so that the caller can gets that detail and take actions
592 * accordingly. This happens for each manifest when the corresponding JSON
593 * object is completely parsed.
605 version = strtoi64(
parse->manifest_version, &ep, 10);
608 "manifest version not an integer");
610 if (version != 1 && version != 2)
612 "unexpected manifest version");
614 /* Invoke the callback for version */
619 * Do additional parsing and sanity-checking of the system identifier, and
620 * invoke the callback so that the caller can gets that detail and take actions
632 /* Parse system identifier. */
633 system_identifier = strtou64(
parse->manifest_system_identifier, &ep, 10);
636 "system identifier in manifest not an integer");
638 /* Invoke the callback for system identifier */
643 * Do additional parsing and sanity-checking of the details gathered for one
644 * file, and invoke the per-file callback so that the caller gets those
645 * details. This happens for each file when the corresponding JSON object is
654 int checksum_string_length;
657 uint8 *checksum_payload;
659 /* Pathname and size are required. */
660 if (
parse->pathname == NULL &&
parse->encoded_pathname == NULL)
662 if (
parse->pathname != NULL &&
parse->encoded_pathname != NULL)
664 "both path name and encoded path name");
665 if (
parse->size == NULL)
667 if (
parse->algorithm == NULL &&
parse->checksum != NULL)
669 "checksum without algorithm");
671 /* Decode encoded pathname, if that's what we have. */
672 if (
parse->encoded_pathname != NULL)
674 int encoded_length = strlen(
parse->encoded_pathname);
675 int raw_length = encoded_length / 2;
678 if (encoded_length % 2 != 0 ||
680 parse->encoded_pathname,
683 "could not decode file name");
684 parse->pathname[raw_length] =
'0円';
686 parse->encoded_pathname = NULL;
690 size = strtou64(
parse->size, &ep, 10);
693 "file size is not an integer");
695 /* Parse the checksum algorithm, if it's present. */
696 if (
parse->algorithm == NULL)
699 context->
error_cb(context,
"unrecognized checksum algorithm: \"%s\"",
702 /* Parse the checksum payload, if it's present. */
703 checksum_string_length =
parse->checksum == NULL ? 0
704 : strlen(
parse->checksum);
705 if (checksum_string_length == 0)
708 checksum_payload = NULL;
712 checksum_length = checksum_string_length / 2;
713 checksum_payload =
palloc(checksum_length);
714 if (checksum_string_length % 2 != 0 ||
718 "invalid checksum for file \"%s\": \"%s\"",
722 /* Invoke the callback with the details we've gathered. */
724 checksum_type, checksum_length, checksum_payload);
726 /* Free memory we no longer need. */
727 if (
parse->size != NULL)
732 if (
parse->algorithm != NULL)
735 parse->algorithm = NULL;
737 if (
parse->checksum != NULL)
740 parse->checksum = NULL;
745 * Do additional parsing and sanity-checking of the details gathered for one
746 * WAL range, and invoke the per-WAL-range callback so that the caller gets
747 * those details. This happens for each WAL range when the corresponding JSON
748 * object is completely parsed.
759 /* Make sure all fields are present. */
760 if (
parse->timeline == NULL)
762 if (
parse->start_lsn == NULL)
764 if (
parse->end_lsn == NULL)
767 /* Parse timeline. */
768 tli = strtoul(
parse->timeline, &ep, 10);
771 "timeline is not an integer");
774 "could not parse start LSN");
777 "could not parse end LSN");
779 /* Invoke the callback with the details we've gathered. */
782 /* Free memory we no longer need. */
783 if (
parse->timeline != NULL)
786 parse->timeline = NULL;
788 if (
parse->start_lsn != NULL)
791 parse->start_lsn = NULL;
793 if (
parse->end_lsn != NULL)
796 parse->end_lsn = NULL;
801 * Verify that the manifest checksum is correct.
803 * The last line of the manifest file is excluded from the manifest checksum,
804 * because the last line is expected to contain the checksum that covers
805 * the rest of the file.
807 * For an incremental parse, this will just be called on the last chunk of the
808 * manifest, and the cryptohash context passed in. For a non-incremental
809 * parse incr_ctx will be NULL.
817 size_t number_of_newlines = 0;
818 size_t ultimate_newline = 0;
819 size_t penultimate_newline = 0;
824 /* Find the last two newlines in the file. */
825 for (
i = 0;
i < size; ++
i)
827 if (buffer[
i] ==
'\n')
829 ++number_of_newlines;
830 penultimate_newline = ultimate_newline;
831 ultimate_newline =
i;
836 * Make sure that the last newline is right at the end, and that there are
837 * at least two lines total. We need this to be true in order for the
838 * following code, which computes the manifest checksum, to work properly.
840 if (number_of_newlines < 2)
842 "expected at least 2 lines");
843 if (ultimate_newline != size - 1)
845 "last line not newline-terminated");
847 /* Checksum the rest. */
848 if (incr_ctx == NULL)
851 if (manifest_ctx == NULL)
852 context->
error_cb(context,
"out of memory");
854 context->
error_cb(context,
"could not initialize checksum of manifest");
858 manifest_ctx = incr_ctx;
861 context->
error_cb(context,
"could not update checksum of manifest");
863 sizeof(manifest_checksum_actual)) < 0)
864 context->
error_cb(context,
"could not finalize checksum of manifest");
867 if (
parse->manifest_checksum == NULL)
868 context->
error_cb(
parse->context,
"manifest has no checksum");
872 context->
error_cb(context,
"invalid manifest checksum: \"%s\"",
873 parse->manifest_checksum);
874 if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
876 context->
error_cb(context,
"manifest checksum mismatch");
881 * Report a parse error.
883 * This is intended to be used for fairly low-level failures that probably
884 * shouldn't occur unless somebody has deliberately constructed a bad manifest,
885 * or unless the server is generating bad manifests due to some bug. msg should
886 * be a short string giving some hint as to what the problem is.
891 context->
error_cb(context,
"could not parse backup manifest: %s", msg);
896 * Convert a character which represents a hexadecimal digit to an integer.
898 * Returns -1 if the character is not a hexadecimal digit.
903 if (
c >=
'0' &&
c <=
'9')
905 if (
c >=
'a' &&
c <=
'f')
907 if (
c >=
'A' &&
c <=
'F')
914 * Decode a hex string into a byte string, 2 hex chars per byte.
916 * Returns false if invalid characters are encountered; otherwise true.
923 for (
i = 0;
i < nbytes; ++
i)
928 if (n1 < 0 || n2 < 0)
930 result[
i] = n1 * 16 + n2;
937 * Parse an XLogRecPtr expressed using the usual string format.
945 if (sscanf(
input,
"%X/%08X", &hi, &lo) != 2)
947 *result = ((
uint64) hi) << 32 | lo;
bool pg_checksum_parse_type(char *name, pg_checksum_type *type)
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Assert(PointerIsAligned(start, uint64))
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, const JsonSemAction *sem, const char *json, size_t len, bool is_last)
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
JsonParseErrorType pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
JsonLexContext * makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json, size_t len, int encoding, bool need_escapes)
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
void freeJsonLexContext(JsonLexContext *lex)
void pfree(void *pointer)
JsonManifestSemanticState
@ JM_EXPECT_SYSTEM_IDENTIFIER_VALUE
@ JM_EXPECT_TOPLEVEL_START
@ JM_EXPECT_WAL_RANGES_START
@ JM_EXPECT_THIS_FILE_FIELD
@ JM_EXPECT_THIS_FILE_VALUE
@ JM_EXPECT_THIS_WAL_RANGE_VALUE
@ JM_EXPECT_VERSION_VALUE
@ JM_EXPECT_MANIFEST_CHECKSUM_VALUE
@ JM_EXPECT_THIS_WAL_RANGE_FIELD
@ JM_EXPECT_TOPLEVEL_FIELD
@ JM_EXPECT_WAL_RANGES_NEXT
static JsonParseErrorType json_manifest_array_start(void *state)
void json_parse_manifest(JsonManifestParseContext *context, const char *buffer, size_t size)
JsonManifestWALRangeField
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
static bool parse_xlogrecptr(XLogRecPtr *result, char *input)
static void json_manifest_finalize_wal_range(JsonManifestParseState *parse)
static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname, bool isnull)
static void json_manifest_finalize_file(JsonManifestParseState *parse)
static JsonParseErrorType json_manifest_object_end(void *state)
static JsonParseErrorType json_manifest_object_start(void *state)
static void json_manifest_finalize_version(JsonManifestParseState *parse)
static JsonParseErrorType json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
static pg_noreturn void json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
static JsonParseErrorType json_manifest_array_end(void *state)
@ JMFF_CHECKSUM_ALGORITHM
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, const char *chunk, size_t size, bool is_last)
static void json_manifest_finalize_system_identifier(JsonManifestParseState *parse)
static void verify_manifest_checksum(JsonManifestParseState *parse, const char *buffer, size_t size, pg_cryptohash_ctx *incr_ctx)
static int hexdecode_char(char c)
static bool hexdecode_string(uint8 *result, char *input, int nbytes)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
#define PG_SHA256_DIGEST_LENGTH
json_manifest_per_wal_range_callback per_wal_range_cb
json_manifest_system_identifier_callback system_identifier_cb
json_manifest_error_callback error_cb
json_manifest_per_file_callback per_file_cb
json_manifest_version_callback version_cb
pg_cryptohash_ctx * manifest_ctx
char * manifest_system_identifier
JsonManifestWALRangeField wal_range_field
JsonManifestParseContext * context
JsonManifestFileField file_field
pg_checksum_type checksum_algorithm
JsonManifestSemanticState state
json_struct_action array_end
json_struct_action object_start
json_ofield_action object_field_start
json_aelem_action array_element_start
json_scalar_action scalar
json_aelem_action array_element_end
json_struct_action array_start
json_struct_action object_end
json_ofield_action object_field_end