1/*-------------------------------------------------------------------------
4 * Reconstruct full file from incremental file and backup chain.
6 * Copyright (c) 2017-2025, PostgreSQL Global Development Group
9 * src/bin/pg_combinebackup/reconstruct.c
11 *-------------------------------------------------------------------------
26 * An rfile stores the data that we need in order to be able to use some file
27 * on disk for reconstruction. For any given output file, we create one rfile
28 * per backup that we need to consult when we constructing that output file.
30 * If we find a full version of the file in the backup chain, then only
31 * filename and fd are initialized; the remaining fields are 0 or NULL.
32 * For an incremental file, header_length, num_blocks, relative_block_numbers,
33 * and truncation_block_length are also set.
35 * num_blocks_read and highest_offset_read always start out as 0.
57 unsigned block_length,
71 * Reconstruct a full file from an incremental file and a chain of prior
74 * input_filename should be the path to the incremental file, and
75 * output_filename should be the path where the reconstructed file is to be
78 * relative_path should be the path to the directory containing this file,
79 * relative to the root of the backup (NOT relative to the root of the
80 * tablespace). It must always end with a trailing slash. bare_file_name
81 * should be the name of the file within that directory, without
84 * n_prior_backups is the number of prior backups, and prior_backup_dirs is
85 * an array of pathnames where those backups can be found.
93 char **prior_backup_dirs,
98 uint8 **checksum_payload,
104 rfile *latest_source = NULL;
107 unsigned block_length;
109 unsigned sidx = n_prior_backups;
110 bool full_copy_possible =
true;
111 int copy_source_index = -1;
112 rfile *copy_source = NULL;
115 /* Sanity check the relative_path. */
116 Assert(relative_path[0] !=
'0円');
117 Assert(relative_path[strlen(relative_path) - 1] ==
'/');
120 * Every block must come either from the latest version of the file or
121 * from one of the prior backups.
126 * Use the information from the latest incremental file to figure out how
127 * long the reconstructed file should be.
130 source[n_prior_backups] = latest_source;
134 * For each block in the output file, we need to know from which file we
135 * need to obtain it and at what offset in that file it's stored.
136 * sourcemap gives us the first of these things, and offsetmap the latter.
139 offsetmap =
pg_malloc0(
sizeof(off_t) * block_length);
142 * Every block that is present in the newest incremental file should be
143 * sourced from that file. If it precedes the truncation_block_length,
144 * it's a block that we would otherwise have had to find in an older
145 * backup and thus reduces the number of blocks remaining to be found by
146 * one; otherwise, it's an extra block that needs to be included in the
147 * output but would not have needed to be found in an older backup if it
148 * had not been present.
155 sourcemap[
b] = latest_source;
159 * A full copy of a file from an earlier backup is only possible if no
160 * blocks are needed from any later incremental file.
162 full_copy_possible =
false;
171 * Move to the next backup in the chain. If there are no more, then
179 * Look for the full file in the previous backup. If not found, then
180 * look for an incremental file instead.
183 prior_backup_dirs[sidx], relative_path, bare_file_name);
184 if ((s =
make_rfile(source_filename,
true)) == NULL)
187 prior_backup_dirs[sidx], relative_path, bare_file_name);
193 * If s->header_length == 0, then this is a full file; otherwise, it's
194 * an incremental file.
202 /* We need to know the length of the file. */
207 * Since we found a full file, source all blocks from it that
210 * Note that there may be blocks that don't exist either in this
211 * file or in any incremental file but that precede
212 * truncation_block_length. These are, presumably, zero-filled
213 * blocks that result from the server extending the file but
214 * taking no action on those blocks that generated any WAL.
216 * Sadly, we have no way of validating that this is really what
217 * happened, and neither does the server. From its perspective,
218 * an unmodified block that contains data looks exactly the same
219 * as a zero-filled block that never had any data: either way,
220 * it's not mentioned in any WAL summary and the server has no
221 * reason to read it. From our perspective, all we know is that
222 * nobody had a reason to back up the block. That certainly means
223 * that the block didn't exist at the time of the full backup, but
224 * the supposition that it was all zeroes at the time of every
225 * later backup is one that we can't validate.
227 blocklength = sb.
st_size / BLCKSZ;
230 if (sourcemap[
b] == NULL &&
b < blocklength)
233 offsetmap[
b] =
b * BLCKSZ;
238 * If a full copy looks possible, check whether the resulting file
239 * should be exactly as long as the source file is. If so, a full
240 * copy is acceptable, otherwise not.
242 if (full_copy_possible)
248 expected_length *= BLCKSZ;
252 copy_source_index = sidx;
256 /* We don't need to consider any further sources. */
261 * Since we found another incremental file, source all blocks from it
262 * that we need but don't yet have.
268 if (b < latest_source->truncation_block_length &&
269 sourcemap[
b] == NULL)
275 * A full copy of a file from an earlier backup is only
276 * possible if no blocks are needed from any later incremental
279 full_copy_possible =
false;
285 * If a checksum of the required type already exists in the
286 * backup_manifest for the relevant input directory, we can save some work
287 * by reusing that checksum instead of computing a new one.
289 if (copy_source_index >= 0 && manifests[copy_source_index] != NULL &&
294 mfile = manifest_files_lookup(manifests[copy_source_index]->files,
298 char *path =
psprintf(
"%s/backup_manifest",
299 prior_backup_dirs[copy_source_index]);
302 * The directory is out of sync with the backup_manifest, so emit
305 pg_log_warning(
"manifest file \"%s\" contains no entry for file \"%s\"",
313 *checksum_payload =
pg_malloc(*checksum_length);
320 /* Prepare for checksum calculation, if required. */
324 * If the full file can be created by copying a file from an older backup
325 * in the chain without needing to overwrite any blocks or truncate the
326 * result, then forget about performing reconstruction and just copy that
327 * file in its entirety.
329 * If we have only incremental files, and there's no full file at any
330 * point in the backup chain, something has gone wrong. Emit an error.
332 * Otherwise, reconstruct.
334 if (copy_source != NULL)
336 &checksum_ctx, copy_method,
dry_run);
337 else if (sidx == 0 &&
source[0]->header_length != 0)
339 pg_fatal(
"full backup contains unexpected incremental file \"%s\"",
345 block_length, sourcemap, offsetmap,
346 &checksum_ctx, copy_method,
351 /* Save results of checksum calculation. */
360 * Close files and release memory.
362 for (
i = 0;
i <= n_prior_backups; ++
i)
380 * Perform post-reconstruction logging and sanity checks.
387 for (
i = 0;
i < n_source; ++
i)
391 /* Ignore source if not used. */
395 /* If no data is needed from this file, we can ignore it. */
408 * In dry-run mode, we don't actually try to read data from the file,
409 * but we do try to verify that the file is long enough that we could
410 * have read the data if we'd tried.
412 * If this fails, then it means that a non-dry-run attempt would fail,
413 * complaining of not being able to read the required bytes from the
423 pg_fatal(
"file \"%s\" is too short: expected %llu, found %llu",
426 (
unsigned long long) sb.
st_size);
432 * When we perform reconstruction using an incremental file, the output file
433 * should be at least as long as the truncation_block_length. Any blocks
434 * present in the incremental file increase the output length as far as is
435 * necessary to include those blocks.
451 * Initialize an incremental rfile, reading the header so that we know which
452 * blocks it contains.
462 /* Read and validate magic number. */
465 pg_fatal(
"file \"%s\" has bad incremental magic number (0x%x, expected 0x%x)",
468 /* Read block count. */
471 pg_fatal(
"file \"%s\" has block count %u in excess of segment size %u",
474 /* Read truncation block length. */
478 pg_fatal(
"file \"%s\" has truncation block length %u in excess of segment size %u",
481 /* Read block numbers if there are any. */
490 /* Remember length of header. */
496 * Round header length to a multiple of BLCKSZ, so that blocks contents
497 * are properly aligned. Only do this when the file actually has data for
507 * Allocate and perform basic initialization of an rfile.
518 if (missing_ok && errno == ENOENT)
530 * Read the indicated number of bytes from an rfile into the buffer.
535 int rb =
read(rf->
fd, buffer, length);
542 pg_fatal(
"could not read file \"%s\": read %d of %u",
548 * Write out a reconstructed file.
553 unsigned block_length,
563 unsigned zero_blocks = 0;
565 /* Debugging output. */
569 unsigned start_of_range = 0;
570 unsigned current_block = 0;
572 /* Basic information about the output file to be produced. */
574 pg_log_debug(
"would reconstruct \"%s\" (%u blocks, checksum %s)",
578 pg_log_debug(
"reconstructing \"%s\" (%u blocks, checksum %s)",
582 /* Print out the plan for reconstructing this file. */
584 while (current_block < block_length)
586 rfile *s = sourcemap[current_block];
588 /* Extend range, if possible. */
589 if (current_block + 1 < block_length &&
590 s == sourcemap[current_block + 1])
596 /* Add details about this range. */
599 if (current_block == start_of_range)
603 start_of_range, current_block);
607 if (current_block == start_of_range)
610 (
uint64) offsetmap[current_block]);
613 start_of_range, current_block,
615 (
uint64) offsetmap[current_block]);
618 /* Begin new range. */
619 start_of_range = ++current_block;
621 /* If the output is very long or we are done, dump it now. */
622 if (current_block == block_length || debug_buf.
len > 1024)
633 /* Open the output file, except in dry_run mode. */
640 /* Read and write the blocks as required. */
641 for (
i = 0;
i < block_length; ++
i)
643 uint8 buffer[BLCKSZ];
646 /* Update accounting information. */
653 offsetmap[
i] + BLCKSZ);
656 /* Skip the rest of this in dry-run mode. */
660 /* Read or zero-fill the block as appropriate. */
664 * New block not mentioned in the WAL summary. Should have been an
665 * uninitialized block, so just zero-fill it.
667 memset(buffer, 0, BLCKSZ);
669 /* Write out the block, update the checksum if needed. */
672 /* Nothing else to do for zero-filled blocks. */
676 /* Copy the block using the appropriate copy method. */
680 * Read the block from the correct source file, and then write it
681 * out, possibly with a checksum update.
686 else /* use copy_file_range */
688#if defined(HAVE_COPY_FILE_RANGE)
689 /* copy_file_range modifies the offset, so use a local copy */
690 off_t off = offsetmap[
i];
694 * Retry until we've written all the bytes (the offset is updated
695 * by copy_file_range, and so is the wfd file offset).
701 wb = copy_file_range(s->
fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
704 pg_fatal(
"error while copying file range from \"%s\" to \"%s\": %m",
709 }
while (BLCKSZ > nwritten);
712 * When checksum calculation not needed, we're done, otherwise
713 * read the block and pass it to the checksum calculation.
721 pg_fatal(
"could not update checksum of file \"%s\"",
724 pg_fatal(
"copy_file_range not supported on this platform");
729 /* Debugging output. */
733 pg_log_debug(
"would have zero-filled %u blocks", zero_blocks);
738 /* Close the output file. */
739 if (wfd >= 0 &&
close(wfd) != 0)
744 * Write the block into the file (using the file descriptor), and
745 * if needed update the checksum calculation.
747 * The buffer is expected to contain BLCKSZ bytes. The filename is
748 * provided only for the error message.
756 if ((wb =
write(
fd, buffer, BLCKSZ)) != BLCKSZ)
761 pg_fatal(
"could not write file \"%s\": wrote %d of %d",
765 /* Update the checksum computation. */
767 pg_fatal(
"could not update checksum of file \"%s\"",
772 * Read a block of data (BLCKSZ bytes) into the buffer.
779 /* Read the block from the correct source, except if dry-run. */
786 pg_fatal(
"could not read from file \"%s\", offset %llu: read %d of %d",
787 s->
filename, (
unsigned long long) off, rb, BLCKSZ);
#define INCREMENTAL_MAGIC
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
char * pg_checksum_type_name(pg_checksum_type type)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
@ COPY_METHOD_COPY_FILE_RANGE
void copy_file(const char *fromfile, const char *tofile)
void * pg_malloc(size_t size)
void * pg_malloc0(size_t size)
Assert(PointerIsAligned(start, uint64))
if(TABLE==NULL||TABLE_index==NULL)
#define pg_log_debug(...)
char * pstrdup(const char *in)
void pfree(void *pointer)
static rewind_source * source
#define pg_log_warning(...)
static int fd(const char *x, int i)
char * psprintf(const char *fmt,...)
static rfile * make_rfile(char *filename, bool missing_ok)
static rfile * make_incremental_rfile(char *filename)
static void debug_reconstruction(int n_source, rfile **sources, bool dry_run)
void reconstruct_from_incremental_file(char *input_filename, char *output_filename, char *relative_path, char *bare_file_name, int n_prior_backups, char **prior_backup_dirs, manifest_data **manifests, char *manifest_path, pg_checksum_type checksum_type, int *checksum_length, uint8 **checksum_payload, CopyMethod copy_method, bool debug, bool dry_run)
static void read_block(rfile *s, off_t off, uint8 *buffer)
static void read_bytes(rfile *rf, void *buffer, unsigned length)
static void write_reconstructed_file(char *input_filename, char *output_filename, unsigned block_length, rfile **sourcemap, off_t *offsetmap, pg_checksum_context *checksum_ctx, CopyMethod copy_method, bool debug, bool dry_run)
static unsigned find_reconstructed_block_length(rfile *s)
static void write_block(int fd, char *output_filename, uint8 *buffer, pg_checksum_context *checksum_ctx)
void resetStringInfo(StringInfo str)
void appendStringInfo(StringInfo str, const char *fmt,...)
void initStringInfo(StringInfo str)
pg_checksum_type checksum_type
off_t highest_offset_read
BlockNumber * relative_block_numbers
unsigned truncation_block_length