1/*-------------------------------------------------------------------------
5 * Archive streamer for verification of a tar format backup (including
6 * compressed tar format backups).
8 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * src/bin/pg_verifybackup/astreamer_verify.c
12 *-------------------------------------------------------------------------
23 /* These fields don't change once initialized. */
29 /* These fields change for each archive member. */
64 * Create an astreamer that can verify a tar file.
68 char *archive_name,
Oid tblspc_oid)
84 return &streamer->
base;
88 * Main entry point of the archive streamer for verifying tar members.
102 /* Initial setup plus decide which checks to perform. */
107 /* Incremental work required to verify file contents. */
115 /* Now we've got all the file data. */
121 /* Reset for next archive member. */
129 /* Shouldn't happen. */
130 pg_fatal(
"unexpected state while parsing tar archive");
135 * End-of-stream processing for a astreamer_verify stream.
144 * Free memory associated with a astreamer_verify stream.
158 * Prepare to validate the next archive member.
167 /* We are only interested in normal files. */
172 * The backup manifest stores a relative path to the base directory for
173 * files belonging to a tablespace, while the tablespace backup tar
174 * archive does not include this path.
176 * The pathname taken from the tar file could contain '.' or '..'
177 * references, which we want to remove, so apply canonicalize_path(). It
178 * could also be an absolute pathname, which we want to treat as a
179 * relative path, so prepend "./" if we're not adding a tablespace prefix
180 * to make sure that canonicalize_path() does what we want.
189 /* Ignore any files that are listed in the ignore list. */
193 /* Check whether there's an entry in the manifest hash. */
198 "file \"%s\" is present in archive \"%s\" but not in the manifest",
202 mystreamer->
mfile = m;
204 /* Flag this entry as having been encountered in a tar archive. */
207 /* Check that the size matches. */
211 "file \"%s\" has size %llu in archive \"%s\" but size %" PRIu64
" in the manifest",
213 (
unsigned long long) member->
size,
221 * Decide whether we're going to verify the checksum for this file, and
222 * whether we're going to perform the additional validation that we do
223 * only for the control file.
231 /* If we're going to verify the checksum, initial a checksum context. */
236 "%s: could not initialize checksum of file \"%s\"",
240 * Checksum verification cannot be performed without proper context
248 * Computes the checksum incrementally for the received file content.
250 * Should have a correctly initialized checksum_ctx, which will be used for
251 * incremental checksum computation.
265 * Update the total count of computed checksum bytes so that we can
266 * cross-check against the file size.
270 /* Feed these bytes to the checksum calculation. */
274 "could not update checksum of file \"%s\"",
281 * Perform the final computation and checksum verification after the entire
282 * file content has been processed.
295 * It's unclear how this could fail, but let's check anyway to be safe.
300 "file \"%s\" in archive \"%s\" should contain %" PRIu64
" bytes, but %" PRIu64
" bytes were read",
307 /* Get the final checksum. */
312 "could not finalize checksum of file \"%s\"",
317 /* And check it against the manifest. */
320 "file \"%s\" in archive \"%s\" has checksum of length %d, but expected %d",
325 "checksum mismatch for file \"%s\" in archive \"%s\"",
330 * Stores the pg_control file contents into a local buffer; we need the entire
331 * control file data for verification.
339 /* Should be here only for control file */
343 * Copy the new data into the control file buffer, but do not overrun the
344 * buffer. Note that the on-disk length of the control file is expected to
345 * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
346 * shorter, just sizeof(ControlFileData).
358 /* Remember how many bytes we saw, even if we didn't buffer them. */
363 * Performs the CRC calculation of pg_control data and then calls the routines
364 * that execute the final verification of the control file information.
373 /* Should be here only for control file */
378 * If the control file is not the right length, that's a big problem.
380 * NB: There is a theoretical overflow risk here from casting to int, but
381 * it isn't likely to be a real problem and this enables us to match the
382 * same format string that pg_rewind uses for this case. Perhaps both this
383 * and pg_rewind should use an unsigned 64-bit value, but for now we don't
391 /* Compute the CRC. */
397 /* Control file contents not meaningful if CRC is bad. */
403 /* Can't interpret control file if not current version. */
409 /* System identifiers should match. */
412 report_fatal_error(
"%s: %s: manifest system identifier is %" PRIu64
", but control file has %" PRIu64,
420 * Reset flags and free memory allocations for member file verification.
427 mystreamer->
mfile = NULL;
astreamer_archive_context
@ ASTREAMER_MEMBER_HEADER
@ ASTREAMER_MEMBER_CONTENTS
@ ASTREAMER_MEMBER_TRAILER
@ ASTREAMER_ARCHIVE_TRAILER
static void member_copy_control_data(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void member_verify_header(astreamer *streamer, astreamer_member *member)
static void astreamer_verify_finalize(astreamer *streamer)
static void member_compute_checksum(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void astreamer_verify_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
astreamer * astreamer_verify_content_new(astreamer *next, verifier_context *context, char *archive_name, Oid tblspc_oid)
static void member_reset_info(astreamer *streamer)
struct astreamer_verify astreamer_verify
static void member_verify_control_data(astreamer *streamer)
static const astreamer_ops astreamer_verify_ops
static void astreamer_verify_free(astreamer *streamer)
static void member_verify_checksum(astreamer *streamer)
#define OidIsValid(objectId)
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
void * pg_malloc(size_t size)
Assert(PointerIsAligned(start, uint64))
void pfree(void *pointer)
void * palloc0(Size size)
struct ControlFileData ControlFileData
#define PG_CONTROL_VERSION
#define PG_CONTROL_FILE_SIZE
#define COMP_CRC32C(crc, data, len)
#define EQ_CRC32C(c1, c2)
void report_fatal_error(const char *pg_restrict fmt,...)
bool should_ignore_relpath(verifier_context *context, const char *relpath)
void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
#define should_verify_checksum(m)
void canonicalize_path(char *path)
uint32 pg_control_version
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
verifier_context * context
ControlFileData control_file
uint64 control_file_bytes
pg_checksum_context * checksum_ctx
const astreamer_ops * bbs_ops
manifest_files_hash * files
pg_checksum_type checksum_type
#define XLOG_CONTROL_FILE