1/*-------------------------------------------------------------------------
4 * Two-phase commit support functions.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/twophase.c
13 * Each global transaction is associated with a global transaction
14 * identifier (GID). The client assigns a GID to a postgres
15 * transaction with the PREPARE TRANSACTION command.
17 * We keep all active global transactions in a shared memory array.
18 * When the PREPARE TRANSACTION command is issued, the GID is
19 * reserved for the transaction in the array. This is done before
20 * a WAL entry is made, because the reservation checks for duplicate
21 * GIDs and aborts the transaction if there already is a global
22 * transaction in prepared state with the same GID.
24 * A global transaction (gxact) also has dummy PGPROC; this is what keeps
25 * the XID considered running by TransactionIdIsInProgress. It is also
26 * convenient as a PGPROC to hook the gxact's locks to.
28 * Information to recover prepared transactions in case of crash is
29 * now stored in WAL for the common case. In some cases there will be
30 * an extended period between preparing a GXACT and commit/abort, in
31 * which case we need to separately record prepared transaction data
32 * in permanent storage. This includes locking information, pending
33 * notifications etc. All that state information is written to the
34 * per-transaction state file in the pg_twophase directory.
35 * All prepared transactions will be written prior to shutdown.
37 * Life track of state data is following:
39 * * On PREPARE TRANSACTION backend writes state data only to the WAL and
40 * stores pointer to the start of the WAL record in
41 * gxact->prepare_start_lsn.
42 * * If COMMIT occurs before checkpoint then backend reads data from WAL
43 * using prepare_start_lsn.
44 * * On checkpoint state data copied to files in pg_twophase directory and
46 * * If COMMIT happens after checkpoint then backend reads state data from
49 * During replay and replication, TwoPhaseState also holds information
50 * about active prepared transactions that haven't been moved to disk yet.
52 * Replay of twophase records happens by the following rules:
54 * * At the beginning of recovery, pg_twophase is scanned once, filling
55 * TwoPhaseState with entries marked with gxact->inredo and
56 * gxact->ondisk. Two-phase file data older than the XID horizon of
57 * the redo position are discarded.
58 * * On PREPARE redo, the transaction is added to TwoPhaseState->prepXacts.
59 * gxact->inredo is set to true for such entries.
60 * * On Checkpoint we iterate through TwoPhaseState->prepXacts entries
61 * that have gxact->inredo set and are behind the redo_horizon. We
62 * save them to disk and then switch gxact->ondisk to true.
63 * * On COMMIT/ABORT we delete the entry from TwoPhaseState->prepXacts.
64 * If gxact->ondisk is true, the corresponding entry from the disk
65 * is additionally deleted.
66 * * RecoverPreparedTransactions(), StandbyRecoverPreparedTransactions()
67 * and PrescanPreparedTransactions() have been modified to go through
68 * gxact->inredo entries that have not made it to disk.
70 *-------------------------------------------------------------------------
111 * Directory where Two-phase commit files reside within PGDATA
113 #define TWOPHASE_DIR "pg_twophase"
115/* GUC variable, can't be changed after startup */
119 * This struct describes one global transaction that is in prepared state
120 * or attempting to become prepared.
122 * The lifecycle of a global transaction is:
124 * 1. After checking that the requested GID is not in use, set up an entry in
125 * the TwoPhaseState->prepXacts array with the correct GID and valid = false,
126 * and mark it as locked by my backend.
128 * 2. After successfully completing prepare, set valid = true and enter the
129 * referenced PGPROC into the global ProcArray.
131 * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry is
132 * valid and not locked, then mark the entry as locked by storing my current
133 * proc number into locking_backend. This prevents concurrent attempts to
134 * commit or rollback the same prepared xact.
136 * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
137 * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
140 * Note that if the preparing transaction fails between steps 1 and 2, the
141 * entry must be removed so that the GID and the GlobalTransaction struct
142 * can be reused. See AtAbort_Twophase().
144 * typedef struct GlobalTransactionData *GlobalTransaction appears in
155 * Note that we need to keep track of two LSNs for each GXACT. We keep
156 * track of the start LSN because this is the address we must use to read
157 * state data back from WAL when committing a prepared GXACT. We keep
158 * track of the end LSN because that is the LSN we need to wait for prior
167 bool valid;
/* true if PGPROC entry is in proc array */
168 bool ondisk;
/* true if prepare state file is on disk */
169 bool inredo;
/* true if entry was added via xlog_redo */
170 char gid[
GIDSIZE];
/* The GID assigned to the prepared xact */
174 * Two Phase Commit shared state. Access to this struct is protected
175 * by TwoPhaseStateLock.
179 /* Head of linked list of free GlobalTransactionData structs */
182 /* Number of valid prepXacts entries. */
185 /* There are max_prepared_xacts items in this array */
192 * Global transaction entry currently locked by us, if any. Note that any
193 * access to the entry pointed to by this variable must be protected by
194 * TwoPhaseStateLock, though obviously the pointer itself doesn't need to be
195 * (since it's just local memory).
228 bool fromdisk,
bool setParent,
bool setNextXid);
236 * Initialization of shared memory
243 /* Need the fixed struct, the array of pointers, and the GTD structs */
272 * Initialize the linked list of free GlobalTransactionData structs
280 /* insert into linked list */
284 /* associate it with a PGPROC assigned by InitProcGlobal */
293 * Exit hook to unlock the global transaction entry we're working on.
298 /* same logic as abort */
303 * Abort hook to unlock the global transaction entry we're working on.
312 * What to do with the locked global transaction entry? If we were in the
313 * process of preparing the transaction, but haven't written the WAL
314 * record and state file yet, the transaction must not be considered as
315 * prepared. Likewise, if we are in the process of finishing an
316 * already-prepared transaction, and fail after having already written the
317 * 2nd phase commit or rollback record to the WAL, the transaction should
318 * not be considered as prepared anymore. In those cases, just remove the
319 * entry from shared memory.
321 * Otherwise, the entry must be left in place so that the transaction can
322 * be finished later, so just unlock it.
324 * If we abort during prepare, after having written the WAL record, we
325 * might not have transferred all locks and other state to the prepared
326 * transaction yet. Likewise, if we abort during commit or rollback,
327 * after having written the WAL record, we might not have released all the
328 * resources held by the transaction yet. In those cases, the in-memory
329 * state can be wrong, but it's too late to back out.
342 * This is called after we have finished transferring state to the prepared
358 * Reserve the GID for the given transaction.
369 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
370 errmsg(
"transaction identifier \"%s\" is too long",
373 /* fail immediately if feature is disabled */
376 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
377 errmsg(
"prepared transactions are disabled"),
378 errhint(
"Set \"max_prepared_transactions\" to a nonzero value.")));
380 /* on first call, register the exit hook */
389 /* Check for conflicting GID */
393 if (strcmp(gxact->
gid, gid) == 0)
397 errmsg(
"transaction identifier \"%s\" is already in use",
402 /* Get a free gxact from the freelist */
405 (
errcode(ERRCODE_OUT_OF_MEMORY),
406 errmsg(
"maximum number of prepared transactions reached"),
407 errhint(
"Increase \"max_prepared_transactions\" (currently %d).",
416 /* And insert it into the active array */
426 * MarkAsPreparingGuts
428 * This uses a gxact struct and puts it into the active array.
429 * NOTE: this is also used when reloading a gxact after a crash; so avoid
430 * assuming that we can use very much backend context.
432 * Note: This function should be called with appropriate locks held.
448 /* Initialize the PGPROC entry */
454 /* clone VXID, for TwoPhaseGetXidByVirtualXID() to find */
461 /* GetLockConflicts() uses this to specify a wait on the XID */
481 /* subxid data must be filled later by GXactLoadSubxactData */
487 gxact->
owner = owner;
489 gxact->
valid =
false;
491 strcpy(gxact->
gid, gid);
494 * Remember that we have this GlobalTransaction entry locked for us. If we
495 * abort after this, we must release it.
501 * GXactLoadSubxactData
503 * If the transaction being persisted had any subtransactions, this must
504 * be called before MarkAsPrepared() to load information into the dummy
513 /* We need no extra lock since the GXACT isn't valid yet */
529 * Mark the GXACT as fully valid, and enter it into the global ProcArray.
531 * lock_held indicates whether caller already holds TwoPhaseStateLock.
536 /* Lock here may be overkill, but I'm not convinced of that ... */
545 * Put it into the global ProcArray so TransactionIdIsInProgress considers
546 * the XID as still running.
553 * Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
560 /* on first call, register the exit hook */
574 /* Ignore not-yet-valid GIDs */
577 if (strcmp(gxact->
gid, gid) != 0)
580 /* Found it, but has someone else got it locked? */
583 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
584 errmsg(
"prepared transaction with identifier \"%s\" is busy",
589 (
errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
590 errmsg(
"permission denied to finish prepared transaction"),
591 errhint(
"Must be superuser or the user that prepared the transaction.")));
594 * Note: it probably would be possible to allow committing from
595 * another database; but at the moment NOTIFY is known not to work and
596 * there may be some other issues as well. Hence disallow until
597 * someone gets motivated to make it work.
601 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
602 errmsg(
"prepared transaction belongs to another database"),
603 errhint(
"Connect to the database where the transaction was prepared to finish it.")));
605 /* OK for me to lock it */
617 (
errcode(ERRCODE_UNDEFINED_OBJECT),
618 errmsg(
"prepared transaction with identifier \"%s\" does not exist",
627 * Remove the prepared transaction from the shared memory array.
629 * NB: caller should have already removed it from ProcArray
642 /* remove from the active array */
646 /* and put it back in the freelist */
654 elog(
ERROR,
"failed to find %p in GlobalTransaction array", gxact);
658 * Returns an array of all prepared transactions for the user-level
659 * function pg_prepared_xact.
661 * The returned array and all its elements are copies of internal data
662 * structures, to minimize the time we need to hold the TwoPhaseStateLock.
664 * WARNING -- we return even those transactions that are not fully prepared
665 * yet. The caller should filter them out if he doesn't want them.
667 * The returned array is palloc'd.
689 for (
i = 0;
i < num;
i++)
699/* Working status for pg_prepared_xact */
709 * Produce a view with one row per prepared transaction.
711 * This function is here so we don't have to export the
712 * GlobalTransactionData struct definition.
725 /* create a function context for cross-call persistence */
729 * Switch to memory context appropriate for multiple function calls
733 /* build tupdesc for result tuples */
734 /* this had better match pg_prepared_xacts view in system_views.sql */
741 TIMESTAMPTZOID, -1, 0);
750 * Collect all the 2PC status information that we will format and send
751 * out as a result set.
778 * Form tuple with appropriate data.
797 * Get the GlobalTransaction struct for a prepared transaction
800 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
801 * caller had better hold it.
815 * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
816 * repeatedly for the same XID. We can save work with a simple cache.
838 if (result == NULL)
/* should not happen */
839 elog(
ERROR,
"failed to find GlobalTransaction for xid %u",
843 cached_gxact = result;
849 * TwoPhaseGetXidByVirtualXID
850 * Lookup VXID among xacts prepared since last startup.
852 * (This won't find recovered xacts.) If more than one matches, return any
853 * and set "have_more" to true. To witness multiple matches, a single
854 * proc number must consume 2^32 LXIDs, with no intervening database restart.
879 * Startup process sets proc->vxid.procNumber to
880 * INVALID_PROC_NUMBER.
899 * TwoPhaseGetDummyProcNumber
900 * Get the dummy proc number for prepared transaction
902 * Dummy proc numbers are similar to proc numbers of real backends. They
903 * start at MaxBackends, and are unique across all currently active real
904 * backends and prepared transactions. If lock_held is set to true,
905 * TwoPhaseStateLock will not be taken, so the caller had better hold it.
916 * TwoPhaseGetDummyProc
917 * Get the PGPROC that represents a prepared transaction
919 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
920 * caller had better hold it.
930/************************************************************************/
931/* State file support */
932/************************************************************************/
935 * Compute the FullTransactionId for the given TransactionId.
937 * This is safe if the xid has not yet reached COMMIT PREPARED or ROLLBACK
938 * PREPARED. After those commands, concurrent vac_truncate_clog() may make
939 * the xid cease to qualify as allowable. XXX Not all callers limit their
958 * 2PC state file format:
960 * 1. TwoPhaseFileHeader
961 * 2. TransactionId[] (subtransactions)
962 * 3. RelFileLocator[] (files to be deleted at commit)
963 * 4. RelFileLocator[] (files to be deleted at abort)
964 * 5. SharedInvalidationMessage[] (inval messages to be sent at commit)
965 * 6. TwoPhaseRecordOnDisk
967 * 8. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
968 * 9. checksum (CRC-32C)
970 * Each segment except the final checksum is MAXALIGN'd.
974 * Header for a 2PC state file
976 #define TWOPHASE_MAGIC 0x57F94534 /* format identifier */
981 * Header for each record in a state file
983 * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
984 * The rmgr data will be stored starting on a MAXALIGN boundary.
994 * During prepare, the state file is assembled in memory before writing it
995 * to WAL and the actual state file. We use a chain of StateFileChunk blocks
1016 * Append a block of data to records data structure.
1018 * NB: each block is padded to a MAXALIGN multiple. This must be
1019 * accounted for when the file is later read!
1021 * The data is copied, so the caller is free to modify it afterwards.
1047 * Start preparing a state file.
1049 * Initializes data structure and inserts the 2PC file header record.
1064 /* Initialize linked list */
1079 hdr.
total_len = 0;
/* EndPrepare will fill this in */
1093 hdr.
gidlen = strlen(gxact->
gid) + 1;
/* Include '0円' */
1094 /* EndPrepare will fill the origin data, if necessary */
1102 * Add the additional info about subxacts, deletable files and cache
1103 * invalidation messages.
1108 /* While we have the child-xact data, stuff it in the gxact too */
1142 * Finish preparing state data and writing it to WAL.
1151 /* Add the end sentinel to the list of 2PC records */
1155 /* Go back and fill in total_len in the file header record */
1170 * If the data size exceeds MaxAllocSize, we won't be able to read it in
1171 * ReadTwoPhaseFile. Check for that now, rather than fail in the case
1172 * where we write data to file and then re-read at commit time.
1176 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1177 errmsg(
"two-phase state file maximum length exceeded")));
1180 * Now writing 2PC state data to WAL. We let the WAL's CRC protection
1181 * cover us, so no need to calculate a separate CRC.
1183 * We have to set DELAY_CHKPT_START here, too; otherwise a checkpoint
1184 * starting immediately after the WAL record is inserted could complete
1185 * without fsync'ing our state file. (This is essentially the same kind
1186 * of race condition as the COMMIT-to-clog-write case that
1187 * RecordTransactionCommit uses DELAY_CHKPT_IN_COMMIT for; see notes
1188 * there.) Note that DELAY_CHKPT_IN_COMMIT is used to find transactions in
1189 * the critical commit section. We need to know about such transactions
1190 * for conflict detection in logical replication. See
1191 * GetOldestActiveTransactionId(true, false) and its use.
1193 * We save the PREPARE record's location in the gxact for later use by
1194 * CheckPointTwoPhase.
1213 /* Move LSNs forward for this replication origin */
1220 /* If we crash now, we have prepared: WAL replay will fix things */
1222 /* Store record's start location to read that later on Commit */
1226 * Mark the prepared transaction as valid. As soon as xact.c marks MyProc
1227 * as not running our XID (which it will do immediately after this
1228 * function returns), others can commit/rollback the xact.
1230 * NB: a side effect of this is to make a dummy ProcArray entry for the
1231 * prepared XID. This must happen before we clear the XID from MyProc /
1232 * ProcGlobal->xids[], else there is a window where the XID is not running
1233 * according to TransactionIdIsInProgress, and onlookers would be entitled
1234 * to assume the xact crashed. Instead we have a window where the same
1235 * XID appears twice in ProcArray, which is OK.
1240 * Now we can mark ourselves as out of the commit critical section: a
1241 * checkpoint starting after this will certainly see the gxact as a
1242 * candidate for fsyncing.
1247 * Remember that we have this GlobalTransaction entry locked for us. If
1248 * we crash after this point, it's too late to abort, but we must unlock
1249 * it so that the prepared transaction can be committed or rolled back.
1256 * Wait for synchronous replication, if required.
1258 * Note that at this stage we have marked the prepare, but still show as
1259 * running in the procarray (twice!) and continue to hold locks.
1268 * Register a 2PC record to be written to state file.
1286 * Read and validate the state file for xid.
1288 * If it looks OK (has a valid magic number and CRC), return the palloc'd
1289 * contents of the file, issuing an error when finding corrupted data. If
1290 * missing_ok is true, which indicates that missing files can be safely
1291 * ignored, then return NULL. This state can be reached when doing recovery
1292 * after discarding two-phase files from frozen epochs.
1312 if (missing_ok && errno == ENOENT)
1317 errmsg(
"could not open file \"%s\": %m", path)));
1321 * Check file length. We can determine a lower bound pretty easily. We
1322 * set an upper bound to avoid palloc() failure on a corrupt file, though
1323 * we can't guarantee that we won't get an out of memory error anyway,
1324 * even on a valid file.
1329 errmsg(
"could not stat file \"%s\": %m", path)));
1338 "incorrect size of file \"%s\": %lld bytes",
1343 if (crc_offset !=
MAXALIGN(crc_offset))
1346 errmsg(
"incorrect alignment of CRC offset for file \"%s\"",
1350 * OK, slurp in the file.
1361 errmsg(
"could not read file \"%s\": %m", path)));
1364 (
errmsg(
"could not read file \"%s\": read %d of %lld",
1373 errmsg(
"could not close file \"%s\": %m", path)));
1379 errmsg(
"invalid magic number stored in file \"%s\"",
1385 errmsg(
"invalid size stored in file \"%s\"",
1397 errmsg(
"calculated CRC checksum does not match value stored in file \"%s\"",
1405 * Reads 2PC data from xlog. During checkpoint this data will be moved to
1406 * twophase files and ReadTwoPhaseFile should be used instead.
1408 * Note clearly that this function can access WAL during normal operation,
1409 * similarly to the way WALSender or Logical Decoding would do.
1425 (
errcode(ERRCODE_OUT_OF_MEMORY),
1427 errdetail(
"Failed while allocating a WAL reading processor.")));
1437 errmsg(
"could not read two-phase state from WAL at %X/%08X: %s",
1442 errmsg(
"could not read two-phase state from WAL at %X/%08X",
1450 errmsg(
"expected two-phase state data is not present in WAL at %X/%08X",
1464 * Confirms an xid is prepared, during recovery
1477 return false;
/* nothing to do */
1479 /* Read and validate file */
1485 /* Check header also */
1494 * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
1518 * Validate the GID, and lock the GXACT to ensure that two backends do not
1519 * try to commit the same GID at once.
1527 * Read and validate 2PC state data. State data will typically be stored
1528 * in WAL files if the LSN is after the last checkpoint record, or moved
1529 * to disk if for some reason they have lived for a long time.
1538 * Disassemble the header area
1557 /* compute latestXid among all children */
1560 /* Prevent cancel/die interrupt while cleaning up */
1564 * The order of operations here is critical: make the XLOG entry for
1565 * commit or abort, then mark the transaction committed or aborted in
1566 * pg_xact, then remove its PGPROC from the global ProcArray (which means
1567 * TransactionIdIsInProgress will stop saying the prepared xact is in
1568 * progress), then run the post-commit or post-abort callbacks. The
1569 * callbacks will release the locks the transaction held.
1590 * In case we fail while running the callbacks, mark the gxact invalid so
1591 * no one else will try to commit/rollback, and so it will be recycled if
1592 * we fail after this point. It is still locked by our backend so it
1593 * won't go away yet.
1595 * (We assume it's safe to do this without taking TwoPhaseStateLock.)
1597 gxact->
valid =
false;
1600 * We have to remove any files that were supposed to be dropped. For
1601 * consistency with the regular xact.c code paths, must do this before
1602 * releasing locks, so do it before running the callbacks.
1604 * NB: this code knows that we couldn't be dropping any temp rels ...
1608 delrels = commitrels;
1613 delrels = abortrels;
1617 /* Make sure files supposed to be dropped are dropped */
1626 * Handle cache invalidation messages.
1628 * Relcache init file invalidation requires processing both before and
1629 * after we send the SI messages, only when committing. See
1642 * Acquire the two-phase lock. We want to work on the two-phase callbacks
1643 * while holding it to avoid potential conflicts with other transactions
1644 * attempting to use the same GID, so the lock is released once the shared
1645 * memory state is cleared.
1649 /* And now do the callbacks */
1658 * Read this value while holding the two-phase lock, as the on-disk 2PC
1659 * file is physically removed after the lock is released.
1663 /* Clear shared memory state */
1667 * Release the lock as all callbacks are called and shared memory cleanup
1672 /* Count the prepared xact as committed or aborted */
1676 * And now we can clean up any files we may have left.
1689 * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
1705 if (callbacks[record->
rmid] != NULL)
1706 callbacks[record->
rmid] (fxid, record->
info, bufptr, record->
len);
1713 * Remove the 2PC file.
1715 * If giveWarning is false, do not complain about file-not-present;
1716 * this is an expected case during WAL replay.
1718 * This routine is used at early stages at recovery where future and
1719 * past orphaned files are checked, hence the FullTransactionId to build
1720 * a complete file name fit for the removal.
1729 if (errno != ENOENT || giveWarning)
1732 errmsg(
"could not remove file \"%s\": %m", path)));
1736 * Recreates a state file. This is used in WAL replay and during
1737 * checkpoint creation.
1739 * Note: content and len don't include CRC.
1756 O_CREAT | O_TRUNC | O_WRONLY |
PG_BINARY);
1760 errmsg(
"could not recreate file \"%s\": %m", path)));
1762 /* Write content and CRC */
1767 /* if write didn't set errno, assume problem is no disk space */
1772 errmsg(
"could not write file \"%s\": %m", path)));
1776 /* if write didn't set errno, assume problem is no disk space */
1781 errmsg(
"could not write file \"%s\": %m", path)));
1786 * We must fsync the file because the end-of-replay checkpoint will not do
1787 * so, there being no GXACT in shared memory yet to tell it to.
1793 errmsg(
"could not fsync file \"%s\": %m", path)));
1799 errmsg(
"could not close file \"%s\": %m", path)));
1803 * CheckPointTwoPhase -- handle 2PC component of checkpointing.
1805 * We must fsync the state file of any GXACT that is valid or has been
1806 * generated during redo and has a PREPARE LSN <= the checkpoint's redo
1807 * horizon. (If the gxact isn't valid yet, has not been generated in
1808 * redo, or has a later LSN, this checkpoint is not responsible for
1811 * This is deliberately run as late as possible in the checkpoint sequence,
1812 * because GXACTs ordinarily have short lifespans, and so it is quite
1813 * possible that GXACTs that were valid at checkpoint start will no longer
1814 * exist if we wait a little bit. With typical checkpoint settings this
1815 * will be about 3 minutes for an online checkpoint, so as a result we
1816 * expect that there will be no GXACTs that need to be copied to disk.
1818 * If a GXACT remains valid across multiple checkpoints, it will already
1819 * be on disk so we don't bother to repeat that write.
1825 int serialized_xacts = 0;
1828 return;
/* nothing to do */
1830 TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_START();
1833 * We are expecting there to be zero GXACTs that need to be copied to
1834 * disk, so we perform all I/O while holding TwoPhaseStateLock for
1835 * simplicity. This prevents any new xacts from preparing while this
1836 * occurs, which shouldn't be a problem since the presence of long-lived
1837 * prepared xacts indicates the transaction manager isn't active.
1839 * It's also possible to move I/O out of the lock, but on every error we
1840 * should check whether somebody committed our transaction in different
1841 * backend. Let's leave this optimization for future, if somebody will
1842 * spot that this place cause bottleneck.
1844 * Note that it isn't possible for there to be a GXACT with a
1845 * prepare_end_lsn set prior to the last checkpoint yet is marked invalid,
1846 * because of the efforts with delayChkptFlags.
1852 * Note that we are using gxact not PGPROC so this works in recovery
1876 * Flush unconditionally the parent directory to make any information
1877 * durable on disk. Two-phase files could have been removed and those
1878 * removals need to be made persistent as well as any files newly created
1879 * previously since the last checkpoint.
1883 TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
1888 "for a long-running prepared transaction",
1889 "%u two-phase state files were written "
1890 "for long-running prepared transactions",
1892 serialized_xacts)));
1896 * restoreTwoPhaseData
1898 * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data.
1899 * This is called once at the beginning of recovery, saving any extra
1900 * lookups in the future. Two-phase files that are newer than the
1901 * minimum XID horizon are discarded on the way.
1913 if (strlen(clde->
d_name) == 16 &&
1914 strspn(clde->
d_name,
"0123456789ABCDEF") == 16)
1922 true,
false,
false);
1935 * PrescanPreparedTransactions
1937 * Scan the shared memory entries of TwoPhaseState and determine the range
1938 * of valid XIDs present. This is run during database startup, after we
1939 * have completed reading WAL. TransamVariables->nextXid has been set to
1940 * one more than the highest XID for which evidence exists in WAL.
1942 * We throw away any prepared xacts with main XID beyond nextXid --- if any
1943 * are present, it suggests that the DBA has done a PITR recovery to an
1944 * earlier point in time without cleaning out pg_twophase. We dare not
1945 * try to recover such prepared xacts since they likely depend on database
1946 * state that doesn't exist now.
1948 * However, we will advance nextXid beyond any subxact XIDs belonging to
1949 * valid prepared xacts. We need to do this since subxact commit doesn't
1950 * write a WAL entry, and so there might be no evidence in WAL of those
1953 * On corrupted two-phase files, fail immediately. Keeping around broken
1954 * entries and let replay continue causes harm on the system, and a new
1955 * backup should be rolled in.
1957 * Our other responsibility is to determine and return the oldest valid XID
1958 * among the prepared xacts (if none, return TransamVariables->nextXid).
1959 * This is needed to synchronize pg_subtrans startup properly.
1961 * If xids_p and nxids_p are not NULL, pointer to a palloc'd array of all
1962 * top-level xids is stored in *xids_p. The number of entries in the array
1963 * is returned in *nxids_p.
1987 gxact->
ondisk,
false,
true);
1993 * OK, we think this file is valid. Incorporate xid into the
1994 * running-minimum result.
2002 if (nxids == allocsize)
2011 allocsize = allocsize * 2;
2015 xids[nxids++] = xid;
2032 * StandbyRecoverPreparedTransactions
2034 * Scan the shared memory entries of TwoPhaseState and setup all the required
2035 * information to allow standby queries to treat prepared transactions as still
2038 * This is never called at the end of recovery - we use
2039 * RecoverPreparedTransactions() at that point.
2041 * This updates pg_subtrans, so that any subtransactions will be correctly
2042 * seen as in-progress in snapshots taken during recovery.
2059 gxact->
ondisk,
true,
false);
2067 * RecoverPreparedTransactions
2069 * Scan the shared memory entries of TwoPhaseState and reload the state for
2070 * each prepared transaction (reacquire locks, etc).
2072 * This is run at the end of recovery, but before we allow backends to write
2075 * At the end of recovery the way we take snapshots will change. We now need
2076 * to mark all running transactions with their full SubTransSetParent() info
2077 * to allow normal snapshots to work correctly if snapshots overflow.
2078 * We do this here because by definition prepared transactions are the only
2079 * type of write transaction still running, so this is necessary and
2099 * Reconstruct subtrans state for the transaction --- needed because
2100 * pg_subtrans is not preserved over a restart. Note that we are
2101 * linking all the subtransactions directly to the top-level XID;
2102 * there may originally have been a more complex hierarchy, but
2103 * there's no need to restore that exactly. It's possible that
2104 * SubTransSetParent has been set before, if the prepared transaction
2105 * generated xid assignment records.
2109 gxact->
ondisk,
true,
false);
2114 (
errmsg(
"recovering prepared transaction %u of epoch %u from shared memory",
2122 gid = (
const char *) bufptr;
2133 * Recreate its GXACT and dummy PGPROC. But, check whether it was
2134 * added in redo and already has a shmem entry for it.
2140 /* recovered, so reset the flag for entries generated by redo */
2149 * Recover other state (notably locks) using resource managers.
2154 * Release locks held by the standby process after we process each
2155 * prepared transaction. As a result, we don't need too many
2156 * additional locks at any one time.
2162 * We're done with recovering this transaction. Clear MyLockedGxact,
2163 * like we do in PrepareTransaction() during normal operation.
2176 * ProcessTwoPhaseBuffer
2178 * Given a FullTransactionId, read it either from disk or read it directly
2179 * via shmem xlog record pointer using the provided "prepare_start_lsn".
2181 * If setParent is true, set up subtransaction parent linkages.
2183 * If setNextXid is true, set TransamVariables->nextXid to the newest
2190 bool setParent,
bool setNextXid)
2203 /* Already processed? */
2210 (
errmsg(
"removing stale two-phase state file for transaction %u of epoch %u",
2218 (
errmsg(
"removing stale two-phase state from memory for transaction %u of epoch %u",
2226 /* Reject XID if too new */
2232 (
errmsg(
"removing future two-phase state file for transaction %u of epoch %u",
2240 (
errmsg(
"removing future two-phase state from memory for transaction %u of epoch %u",
2250 /* Read and validate file */
2255 /* Read xlog data */
2259 /* Deconstruct header */
2266 errmsg(
"corrupted two-phase state file for transaction %u of epoch %u",
2272 errmsg(
"corrupted two-phase state in memory for transaction %u of epoch %u",
2278 * Examine subtransaction XIDs ... they should all follow main XID, and
2279 * they may force us to advance nextXid.
2290 /* update nextXid if needed */
2303 * RecordTransactionCommitPrepared
2305 * This is basically the same as RecordTransactionCommit (q.v. if you change
2306 * this function): in particular, we must set DELAY_CHKPT_IN_COMMIT to avoid a
2309 * We know the transaction made at least one XLOG entry (its PREPARE),
2310 * so it is never possible to optimize out the commit record.
2330 * Are we using the replication origins feature? Or, in other words, are
2331 * we replaying remote actions?
2336 /* Load the injection point before entering the critical section */
2341 /* See notes in RecordTransactionCommit */
2348 * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before
2349 * commit time is written.
2354 * Note it is important to set committs value after marking ourselves as
2355 * in the commit critical section (DELAY_CHKPT_IN_COMMIT). This is because
2356 * we want to ensure all transactions that have acquired commit timestamp
2357 * are finished before we allow the logical replication client to advance
2358 * its xid which is used to hold back dead rows for conflict detection.
2359 * See comments atop worker.c.
2364 * Emit the XLOG commit record. Note that we mark 2PC commits as
2365 * potentially having AccessExclusiveLocks since we don't know whether or
2369 nchildren, children, nrels, rels,
2371 ninvalmsgs, invalmsgs,
2378 /* Move LSNs forward for this replication origin */
2383 * Record commit timestamp. The value comes from plain commit timestamp
2384 * if replorigin is not enabled, or replorigin already set a value for us
2385 * in replorigin_session_origin_timestamp otherwise.
2387 * We don't need to WAL-log anything here, as the commit record written
2388 * above already contains the data.
2398 * We don't currently try to sleep before flush here ... nor is there any
2399 * support for async commit of a prepared xact (the very idea is probably
2403 /* Flush XLOG to disk */
2406 /* Mark the transaction committed in pg_xact */
2409 /* Checkpoint can proceed now */
2415 * Wait for synchronous replication, if required.
2417 * Note that at this stage we have marked clog, but still show as running
2418 * in the procarray and continue to hold locks.
2424 * RecordTransactionAbortPrepared
2426 * This is basically the same as RecordTransactionAbort.
2428 * We know the transaction made at least one XLOG entry (its PREPARE),
2429 * so it is never possible to optimize out the abort record.
2445 * Are we using the replication origins feature? Or, in other words, are
2446 * we replaying remote actions?
2452 * Catch the scenario where we aborted partway through
2453 * RecordTransactionCommitPrepared ...
2456 elog(
PANIC,
"cannot abort transaction %u, it was already committed",
2462 * Emit the XLOG commit record. Note that we mark 2PC aborts as
2463 * potentially having AccessExclusiveLocks since we don't know whether or
2467 nchildren, children,
2474 /* Move LSNs forward for this replication origin */
2478 /* Always flush, since we're about to remove the 2PC state file */
2482 * Mark the transaction aborted in clog. This is not absolutely necessary
2483 * but we may as well do it while we are here.
2490 * Wait for synchronous replication, if required.
2492 * Note that at this stage we have marked clog, but still show as running
2493 * in the procarray and continue to hold locks.
2501 * Store pointers to the start/end of the WAL record along with the xid in
2502 * a gxact entry in shared memory TwoPhaseState structure. If caller
2503 * specifies InvalidXLogRecPtr as WAL location to fetch the two-phase
2504 * data, the entry is marked as located on disk.
2527 gid = (
const char *) bufptr;
2530 * Reserve the GID for the given transaction in the redo code path.
2532 * This creates a gxact struct and puts it into the active array.
2534 * In redo, this struct is mainly used to track PREPARE/COMMIT entries in
2535 * shared memory. Hence, we only fill up the bare minimum contents here.
2536 * The gxact also gets marked with gxact->inredo set to true to indicate
2537 * that it got added in the redo phase
2541 * In the event of a crash while a checkpoint was running, it may be
2542 * possible that some two-phase data found its way to disk while its
2543 * corresponding record needs to be replayed in the follow-up recovery. As
2544 * the 2PC data was on disk, it has already been restored at the beginning
2545 * of recovery with restoreTwoPhaseData(), so skip this record to avoid
2546 * duplicates in TwoPhaseState. If a consistent state has been reached,
2547 * the record is added to TwoPhaseState and it should have no
2548 * corresponding file in pg_twophase.
2557 if (
access(path, F_OK) == 0)
2560 (
errmsg(
"could not recover two-phase state file for transaction %u",
2562 errdetail(
"Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.",
2567 if (errno != ENOENT)
2570 errmsg(
"could not access file \"%s\": %m", path)));
2573 /* Get a free gxact from the freelist */
2576 (
errcode(ERRCODE_OUT_OF_MEMORY),
2577 errmsg(
"maximum number of prepared transactions reached"),
2578 errhint(
"Increase \"max_prepared_transactions\" (currently %d).",
2589 gxact->
valid =
false;
2591 gxact->
inredo =
true;
/* yes, added in redo */
2592 strcpy(gxact->
gid, gid);
2594 /* And insert it into the active array */
2600 /* recover apply progress */
2602 false /* backward */ ,
false /* WAL */ );
2605 elog(
DEBUG2,
"added 2PC data in shared memory for transaction %u of epoch %u",
2611 * PrepareRedoRemoveFull
2613 * Remove the corresponding gxact entry from TwoPhaseState. Also remove
2614 * the 2PC file if a prepared transaction was saved via an earlier checkpoint.
2616 * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
2642 * Just leave if there is nothing, this is expected during WAL replay.
2648 * And now we can clean up any files we may have left.
2650 elog(
DEBUG2,
"removing 2PC data for transaction %u of epoch %u ",
2661 * Wrapper of PrepareRedoRemoveFull(), for TransactionIds.
2674 * Check if the prepared transaction with the given GID, lsn and timestamp
2677 * Note that we always compare with the LSN where prepare ends because that is
2678 * what is stored as origin_lsn in the 2PC file.
2680 * This function is primarily used to check if the prepared transaction
2681 * received from the upstream (remote node) already exists. Checking only GID
2682 * is not sufficient because a different prepared xact with the same GID can
2683 * exist on the same node. So, we are ensuring to match origin_lsn and
2684 * origin_timestamp of prepared xact to avoid the possibility of a match of
2685 * prepared xact from two different nodes.
2699 /* Ignore not-yet-valid GIDs. */
2700 if (gxact->
valid && strcmp(gxact->
gid, gid) == 0)
2706 * We are not expecting collisions of GXACTs (same gid) between
2707 * publisher and subscribers, so we perform all I/O while holding
2708 * TwoPhaseStateLock for simplicity.
2710 * To move the I/O out of the lock, we need to ensure that no
2711 * other backend commits the prepared xact in the meantime. We can
2712 * do this optimization if we encounter many collisions in GID
2713 * between publisher and subscriber.
2741 * TwoPhaseTransactionGid
2742 * Form the prepared transaction GID for two_phase transactions.
2744 * Return the GID in the supplied buffer.
2753 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
2756 snprintf(gid_res, szgid,
"pg_gid_%u_%u", subid, xid);
2760 * IsTwoPhaseTransactionGidForSubid
2761 * Check whether the given GID (as formed by TwoPhaseTransactionGid) is
2762 * for the specified 'subid'.
2772 /* Extract the subid and xid from the given GID */
2773 ret = sscanf(gid,
"pg_gid_%u_%u", &subid_from_gid, &xid_from_gid);
2776 * Check that the given GID has expected format, and at least the subid
2779 if (ret != 2 || subid != subid_from_gid)
2783 * Reconstruct a temporary GID based on the subid and xid extracted from
2784 * the given GID and check whether the temporary GID and the given GID
2789 return strcmp(gid, gid_tmp) == 0;
2793 * LookupGXactBySubid
2794 * Check if the prepared transaction done by apply worker exists.
2806 /* Ignore not-yet-valid GIDs. */
2820 * TwoPhaseGetXidByLockingProc
2821 * Return the oldest transaction ID from prepared transactions that are
2822 * currently in the commit critical section.
2824 * This function only considers transactions in the currently connected
2825 * database. If no matching transactions are found, it returns
2826 * InvalidTransactionId.
2848 * Get the backend that is handling the transaction. It's safe to
2849 * access this backend while holding TwoPhaseStateLock, as the backend
2850 * can only be destroyed after either removing or unlocking the
2851 * current global transaction, both of which require an exclusive
2852 * TwoPhaseStateLock.
2866 oldestRunningXid = xid;
2871 return oldestRunningXid;
#define pg_write_barrier()
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
TimestampTz GetCurrentTimestamp(void)
static Datum values[MAXATTR]
#define CStringGetTextDatum(s)
#define FLEXIBLE_ARRAY_MEMBER
#define MemSet(start, val, len)
#define OidIsValid(objectId)
void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, RepOriginId nodeid)
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
int errmsg_internal(const char *fmt,...)
int errcode_for_file_access(void)
int errdetail(const char *fmt,...)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
int CloseTransientFile(int fd)
void fsync_fname(const char *fname, bool isdir)
DIR * AllocateDir(const char *dirname)
struct dirent * ReadDir(DIR *dir, const char *dirname)
int OpenTransientFile(const char *fileName, int fileFlags)
#define SRF_IS_FIRSTCALL()
#define SRF_PERCALL_SETUP()
#define SRF_RETURN_NEXT(_funcctx, _result)
#define SRF_FIRSTCALL_INIT()
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
#define SRF_RETURN_DONE(_funcctx)
bool IsPostmasterEnvironment
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
static void dlist_init(dlist_head *head)
static void dlist_node_init(dlist_node *node)
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
int xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
#define VirtualTransactionIdIsValid(vxid)
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
#define LocalTransactionIdIsValid(lxid)
#define VirtualTransactionIdEquals(vxid1, vxid2)
bool LWLockHeldByMe(LWLock *lock)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
void LWLockRelease(LWLock *lock)
#define NUM_LOCK_PARTITIONS
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
#define RESUME_INTERRUPTS()
#define AmStartupProcess()
#define START_CRIT_SECTION()
#define HOLD_INTERRUPTS()
#define END_CRIT_SECTION()
TimestampTz replorigin_session_origin_timestamp
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
RepOriginId replorigin_session_origin
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
XLogRecPtr replorigin_session_origin_lsn
#define InvalidRepOriginId
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
#define ERRCODE_DATA_CORRUPTED
#define COMP_CRC32C(crc, data, len)
#define EQ_CRC32C(c1, c2)
void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo)
void AtEOXact_PgStat(bool isCommit, bool parallel)
int pgstat_get_transactional_drops(bool isCommit, xl_xact_stats_item **items)
static Datum TransactionIdGetDatum(TransactionId X)
static Datum ObjectIdGetDatum(Oid X)
void PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
static int fd(const char *x, int i)
#define DELAY_CHKPT_IN_COMMIT
#define GetPGProcByNumber(n)
#define PGPROC_MAX_CACHED_SUBXIDS
#define GetNumberFromPGProc(proc)
#define DELAY_CHKPT_START
void ProcArrayAdd(PGPROC *proc)
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
#define INVALID_PROC_NUMBER
void RelationCacheInitFilePostInvalidate(void)
void RelationCacheInitFilePreInvalidate(void)
Size add_size(Size s1, Size s2)
Size mul_size(Size s1, Size s2)
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs, int n)
PGPROC * PreparedXactProcs
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
int smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr)
#define ERRCODE_DUPLICATE_OBJECT
MemoryContext multi_call_memory_ctx
XLogRecPtr prepare_start_lsn
XLogRecPtr prepare_end_lsn
ProcNumber locking_backend
pg_atomic_uint64 waitStart
XidCacheStatus subxidStatus
dlist_head myProcLocks[NUM_LOCK_PARTITIONS]
ProcWaitStatus waitStatus
struct StateFileChunk * next
FullTransactionId nextXid
GlobalTransaction freeGXacts
GlobalTransaction prepXacts[FLEXIBLE_ARRAY_MEMBER]
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
TimestampTz origin_timestamp
void SubTransSetParent(TransactionId xid, TransactionId parent)
bool superuser_arg(Oid roleid)
void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
bool TransactionIdDidCommit(TransactionId transactionId)
void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids)
void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids)
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
bool TransactionIdDidAbort(TransactionId transactionId)
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
#define FullTransactionIdEquals(a, b)
#define InvalidTransactionId
static FullTransactionId FullTransactionIdFromAllowableAt(FullTransactionId nextFullXid, TransactionId xid)
#define EpochFromFullTransactionId(x)
static FullTransactionId FullTransactionIdFromU64(uint64 value)
#define FullTransactionIdFollowsOrEquals(a, b)
#define TransactionIdEquals(id1, id2)
#define XidFromFullTransactionId(x)
#define TransactionIdIsValid(xid)
#define FullTransactionIdIsValid(x)
TupleDesc CreateTemplateTupleDesc(int natts)
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
static char * ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok)
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
TransactionId TwoPhaseGetOldestXidInCommit(void)
static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[])
void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid_res, int szgid)
void RecoverPreparedTransactions(void)
static bool twophaseExitRegistered
void restoreTwoPhaseData(void)
static GlobalTransaction TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held)
bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, TimestampTz origin_prepare_timestamp)
Size TwoPhaseShmemSize(void)
GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
static void RecordTransactionAbortPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, const char *gid)
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
static FullTransactionId AdjustToFullTransactionId(TransactionId xid)
static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, int ninvalmsgs, SharedInvalidationMessage *invalmsgs, bool initfileinval, const char *gid)
static void RemoveGXact(GlobalTransaction gxact)
struct TwoPhaseStateData TwoPhaseStateData
static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning)
static GlobalTransaction MyLockedGxact
static TwoPhaseStateData * TwoPhaseState
void AtAbort_Twophase(void)
struct GlobalTransactionData GlobalTransactionData
static void save_state_data(const void *data, uint32 len)
void FinishPreparedTransaction(const char *gid, bool isCommit)
struct TwoPhaseRecordOnDisk TwoPhaseRecordOnDisk
TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more)
static char * ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid)
static void GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts, TransactionId *children)
void PrepareRedoRemove(TransactionId xid, bool giveWarning)
Datum pg_prepared_xact(PG_FUNCTION_ARGS)
void EndPrepare(GlobalTransaction gxact)
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
void StartPrepare(GlobalTransaction gxact)
static int GetPreparedTransactionList(GlobalTransaction *gxacts)
void TwoPhaseShmemInit(void)
void StandbyRecoverPreparedTransactions(void)
static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len)
static void AtProcExit_Twophase(int code, Datum arg)
static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning)
static int TwoPhaseFilePath(char *path, FullTransactionId fxid)
static void MarkAsPrepared(GlobalTransaction gxact, bool lock_held)
void PostPrepare_Twophase(void)
bool LookupGXactBySubid(Oid subid)
PGPROC * TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held)
xl_xact_prepare TwoPhaseFileHeader
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
struct StateFileChunk StateFileChunk
bool StandbyTransactionIdIsPrepared(TransactionId xid)
static GlobalTransaction LockGXact(const char *gid, Oid user)
static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
static bool IsTwoPhaseTransactionGidForSubid(Oid subid, char *gid)
void PrepareRedoAdd(FullTransactionId fxid, char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn, RepOriginId origin_id)
static struct xllist records
struct GlobalTransactionData * GlobalTransaction
const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID+1]
void(* TwoPhaseCallback)(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
#define TWOPHASE_RM_MAX_ID
#define TWOPHASE_RM_END_ID
static Datum TimestampTzGetDatum(TimestampTz X)
FullTransactionId ReadNextFullTransactionId(void)
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
TransamVariablesData * TransamVariables
static void pgstat_report_wait_start(uint32 wait_event_info)
static void pgstat_report_wait_end(void)
XLogRecPtr XactLogCommitRecord(TimestampTz commit_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInval, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
int xactGetCommittedChildren(TransactionId **ptr)
XLogRecPtr XactLogAbortRecord(TimestampTz abort_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
#define XLOG_XACT_PREPARE
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
XLogRecPtr ProcLastRecPtr
bool RecoveryInProgress(void)
XLogRecPtr XactLastRecEnd
void XLogFlush(XLogRecPtr record)
#define XLOG_INCLUDE_ORIGIN
#define LSN_FORMAT_ARGS(lsn)
#define XLogRecPtrIsInvalid(r)
#define InvalidXLogRecPtr
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
void XLogRegisterData(const void *data, uint32 len)
void XLogSetRecordFlags(uint8 flags)
void XLogBeginInsert(void)
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
void XLogReaderFree(XLogReaderState *state)
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
#define XLogRecGetDataLen(decoder)
#define XLogRecGetInfo(decoder)
#define XLogRecGetRmid(decoder)
#define XLogRecGetData(decoder)
static XLogReaderState * xlogreader
void wal_segment_close(XLogReaderState *state)
void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)