1/*-------------------------------------------------------------------------
4 * per-process shared memory data structures
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/storage/proc.h
12 *-------------------------------------------------------------------------
27 * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
28 * for non-aborted subtransactions of its current top transaction. These
29 * have to be treated as running XIDs by other backends.
31 * We also keep track of whether the cache overflowed (ie, the transaction has
32 * generated at least one subtransaction that didn't fit in the cache).
33 * If none of the caches have overflowed, we can assume that an XID that's not
34 * listed anywhere in the PGPROC array is not a running transaction. Else we
35 * have to look at pg_subtrans.
37 * See src/test/isolation/specs/subxid-overflow.spec if you change this.
39 #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
43 /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */
45 /* has PGPROC->subxids overflowed */
55 * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[]
57 #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
58 #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
59 #define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX
60 * CONCURRENTLY or REINDEX
61 * CONCURRENTLY on non-expressional,
62 * non-partial index */
63#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
64#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical
65 * decoding outside xact */
66#define PROC_AFFECTS_ALL_HORIZONS 0x20 /* this proc's xmin must be
67 * included in vacuum horizons
70/* flags reset at EOXact */
71#define PROC_VACUUM_STATE_MASK \
72 (PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND)
75 * Xmin-related flags. Make sure any flags that affect how the process' Xmin
76 * value is interpreted by VACUUM are included here.
78#define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC)
81 * We allow a limited number of "weak" relation locks (AccessShareLock,
82 * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
83 * (or rather in shared memory referenced from PGPROC) rather than the main
84 * lock table. This eases contention on the lock manager LWLocks. See
85 * storage/lmgr/README for additional details.
90 * Define the maximum number of fast-path locking groups per backend.
91 * This must be a power-of-two value. The actual number of fast-path
92 * lock groups is calculated in InitializeFastPathLocks() based on
93 * max_locks_per_transaction. 1024 is an arbitrary upper limit (matching
94 * max_locks_per_transaction = 16k). Values over 1024 are unlikely to be
95 * beneficial as there are bottlenecks we'll hit way before that.
97#define FP_LOCK_GROUPS_PER_BACKEND_MAX 1024
98#define FP_LOCK_SLOTS_PER_GROUP 16 /* don't change */
99#define FastPathLockSlotsPerBackend() \
100 (FP_LOCK_SLOTS_PER_GROUP * FastPathLockGroupsPerBackend)
103 * Flags for PGPROC.delayChkptFlags
105 * These flags can be used to delay the start or completion of a checkpoint
106 * for short periods. A flag is in effect if the corresponding bit is set in
107 * the PGPROC of any backend.
109 * For our purposes here, a checkpoint has three phases: (1) determine the
110 * location to which the redo pointer will be moved, (2) write all the
111 * data durably to disk, and (3) WAL-log the checkpoint.
113 * Setting DELAY_CHKPT_START prevents the system from moving from phase 1
114 * to phase 2. This is useful when we are performing a WAL-logged modification
115 * of data that will be flushed to disk in phase 2. By setting this flag
116 * before writing WAL and clearing it after we've both written WAL and
117 * performed the corresponding modification, we ensure that if the WAL record
118 * is inserted prior to the new redo point, the corresponding data changes will
119 * also be flushed to disk before the checkpoint can complete. (In the
120 * extremely common case where the data being modified is in shared buffers
121 * and we acquire an exclusive content lock and MarkBufferDirty() on the
122 * relevant buffers before writing WAL, this mechanism is not needed, because
123 * phase 2 will block until we release the content lock and then flush the
124 * modified data to disk. See transam/README and SyncOneBuffer().)
126 * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
127 * to phase 3. This is useful if we are performing a WAL-logged operation that
128 * might invalidate buffers, such as relation truncation. In this case, we need
129 * to ensure that any buffers which were invalidated and thus not flushed by
130 * the checkpoint are actually destroyed on disk. Replay can cope with a file
131 * or block that doesn't exist, but not with a block that has the wrong
134 * Setting DELAY_CHKPT_IN_COMMIT is similar to setting DELAY_CHKPT_START, but
135 * it explicitly indicates that the reason for delaying the checkpoint is due
136 * to a transaction being within a critical commit section. We need this new
137 * flag to ensure all the transactions that have acquired commit timestamp are
138 * finished before we allow the logical replication client to advance its xid
139 * which is used to hold back dead rows for conflict detection.
141 #define DELAY_CHKPT_START (1<<0)
142 #define DELAY_CHKPT_COMPLETE (1<<1)
143 #define DELAY_CHKPT_IN_COMMIT (DELAY_CHKPT_START | 1<<2)
153 * Each backend has a PGPROC struct in shared memory. There is also a list of
154 * currently-unused PGPROC structs that will be reallocated to new backends.
156 * links: list link for any list the PGPROC is in. When waiting for a lock,
157 * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
158 * is linked into ProcGlobal's freeProcs list.
160 * Note: twophase.c also sets up a dummy PGPROC struct for each currently
161 * prepared transaction. These PGPROCs appear in the ProcArray data structure
162 * so that the prepared transactions appear to be still running and are
163 * correctly shown as holding locks. A prepared transaction PGPROC can be
164 * distinguished from a real one at need by the fact that it has pid == 0.
165 * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
166 * but its myProcLocks[] lists are valid.
168 * We allow many fields of this struct to be accessed without locks, such as
169 * delayChkptFlags and isRegularBackend. However, keep in mind that writing
170 * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in
171 * at least shared mode, so that pgxactoff does not change concurrently.
175 * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an
176 * element of more densely packed ProcGlobal arrays. These arrays are indexed
177 * by PGPROC->pgxactoff. Both copies need to be maintained coherently.
179 * NB: The pgxactoff indexed value can *never* be accessed without holding
182 * See PROC_HDR for details.
196 * executed by this proc, if running and XID
197 * is assigned; else InvalidTransactionId.
198 * mirrored in ProcGlobal->xids[pgxactoff] */
201 * starting our xact, excluding LAZY VACUUM:
202 * vacuum must not remove tuples deleted by
205 int pid;
/* Backend's process ID; 0 if prepared xact */
207 int pgxactoff;
/* offset into various ProcGlobal->arrays with
208 * data mirrored from this PGPROC */
211 * Currently running top-level transaction's virtual xid. Together these
212 * form a VirtualTransactionId, but we don't use that struct because this
213 * is not atomically assignable as whole, and we want to enforce code to
214 * consider both parts separately. See comments at VirtualTransactionId.
219 * GetNumberFromPGProc(proc). For prepared
220 * xacts, ID of the original backend that
221 * processed the transaction. For unused
222 * PGPROC entries, INVALID_PROC_NUMBER. */
224 * currently * being executed by this
225 * proc, if running; else
226 * InvalidLocalTransactionId */
229 /* These fields are zero while a backend is still starting up: */
231 Oid roleId;
/* OID of role using this backend */
239 * While in hot standby mode, shows that a conflict signal has been sent
240 * for the current transaction. Set/cleared while holding ProcArrayLock,
241 * though not required. Accessed without lock, if needed.
245 /* Info about LWLock the process is currently waiting for, if any. */
250 /* Support for condition variables. */
253 /* Info about lock the process is currently waiting for, if any. */
254 /* waitLock and waitProcLock are NULL if not currently waiting. */
259 * lock object by this backend */
267 * ProcGlobal->statusFlags[pgxactoff] */
270 * Info to allow us to wait for synchronous replication, if needed.
271 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
272 * syncRepState must not be touched except by owning process or WALSender.
273 * syncRepLinks used only while holding SyncRepLock.
280 * All PROCLOCK objects for locks held or awaited by this backend are
281 * linked into one of these lists, according to the partition number of
287 * ProcGlobal->subxidStates[i] */
290 /* Support for group XID clearing. */
291 /* true, if member of ProcArray group waiting for XID clear */
293 /* next ProcArray group member waiting for XID clear */
297 * latest transaction id among the transaction's main XID and
304 /* Support for group transaction status update. */
311 * transaction id of clog group member */
315 /* Lock manager data, recording fast-path locks taken by this backend. */
319 bool fpVXIDLock;
/* are we holding a fast-path VXID lock? */
324 * Support for lock groups. Use LockHashPartitionLockByProc on the group
325 * leader to get the LWLock protecting these fields.
332/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
338 * There is one ProcGlobal struct for the whole database cluster.
340 * Adding/Removing an entry into the procarray requires holding *both*
341 * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are
342 * needed because the dense arrays (see below) are accessed from
343 * GetNewTransactionId() and GetSnapshotData(), and we don't want to add
344 * further contention by both using the same lock. Adding/Removing a procarray
345 * entry is much less frequent.
347 * Some fields in PGPROC are mirrored into more densely packed arrays (e.g.
348 * xids), with one entry for each backend. These arrays only contain entries
349 * for PGPROCs that have been added to the shared array with ProcArrayAdd()
350 * (in contrast to PGPROC array which has unused PGPROCs interspersed).
352 * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent
353 * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray
354 * member to change. Therefore it is only safe to use PGPROC->pgxactoff to
355 * access the dense array while holding either ProcArrayLock or XidGenLock.
357 * As long as a PGPROC is in the procarray, the mirrored values need to be
358 * maintained in both places in a coherent manner.
360 * The denser separate arrays are beneficial for three main reasons: First, to
361 * allow for as tight loops accessing the data as possible. Second, to prevent
362 * updates of frequently changing data (e.g. xmin) from invalidating
363 * cachelines also containing less frequently changing data (e.g. xid,
364 * statusFlags). Third to condense frequently accessed data into as few
365 * cachelines as possible.
367 * There are two main reasons to have the data mirrored between these dense
368 * arrays and PGPROC. First, as explained above, a PGPROC's array entries can
369 * only be accessed with either ProcArrayLock or XidGenLock held, whereas the
370 * PGPROC entries do not require that (obviously there may still be locking
371 * requirements around the individual field, separate from the concerns
372 * here). That is particularly important for a backend to efficiently checks
373 * it own values, which it often can safely do without locking. Second, the
374 * PGPROC fields allow to avoid unnecessary accesses and modification to the
375 * dense arrays. A backend's own PGPROC is more likely to be in a local cache,
376 * whereas the cachelines for the dense array will be modified by other
377 * backends (often removing it from the cache for other cores/sockets). At
378 * commit/abort time a check of the PGPROC value can avoid accessing/dirtying
379 * the corresponding array value.
381 * Basically it makes sense to access the PGPROC variable when checking a
382 * single backend's data, especially when already looking at the PGPROC for
383 * other reasons already. It makes sense to look at the "dense" arrays if we
384 * need to look at many / most entries, because we then benefit from the
385 * reduced indirection and better cross-process cache-ability.
387 * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data
388 * in the dense arrays is initialized from the PGPROC while it already holds
393 /* Array of PGPROC structures (not including dummies for prepared txns) */
396 /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
400 * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
406 * Array mirroring PGPROC.statusFlags for each PGPROC currently in the
411 /* Length of allProcs array */
413 /* Head of list of free PGPROC structures */
415 /* Head of list of autovacuum & special worker free PGPROC structures */
417 /* Head of list of bgworker free PGPROC structures */
419 /* Head of list of walsender free PGPROC structures */
421 /* First pgproc waiting for group XID clear */
423 /* First pgproc waiting for group transaction status update */
427 * Current slot numbers of some auxiliary processes. There can be only one
428 * of each of these running at a time.
433 /* Current shared estimate of appropriate spins_per_delay value */
435 /* Buffer id of the buffer that Startup process waits for pin on, or -1 */
444 * Accessors for getting PGPROC given a ProcNumber and vice versa.
446#define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
447#define GetNumberFromPGProc(proc) ((proc) - &ProcGlobal->allProcs[0])
450 * We set aside some extra PGPROC structures for "special worker" processes,
451 * which are full-fledged backends (they can run transactions)
452 * but are unique animals that there's never more than one of.
453 * Currently there are two such processes: the autovacuum launcher
454 * and the slotsync worker.
456#define NUM_SPECIAL_WORKER_PROCS 2
459 * We set aside some extra PGPROC structures for auxiliary processes,
460 * ie things that aren't full-fledged backends (they cannot run transactions
461 * or take heavyweight locks) but need shmem access.
463 * Background writer, checkpointer, WAL writer, WAL summarizer, and archiver
464 * run during normal operation. Startup process and WAL receiver also consume
465 * 2 slots, but WAL writer is launched only after startup has exited, so we
468#define MAX_IO_WORKERS 32
469#define NUM_AUXILIARY_PROCS (6 + MAX_IO_WORKERS)
472/* configurable options */
488 * Function Prototypes
513 int *lockHoldersNum);
uint32 LocalTransactionId
#define NUM_LOCK_PARTITIONS
ProcWaitStatus JoinWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
void ProcSendSignal(ProcNumber procNumber)
PGDLLIMPORT int IdleInTransactionSessionTimeout
Size ProcGlobalShmemSize(void)
void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus)
PGDLLIMPORT int IdleSessionTimeout
bool HaveNFreeProcs(int n, int *nfree)
void InitAuxiliaryProcess(void)
void GetLockHoldersAndWaiters(LOCALLOCK *locallock, StringInfo lock_holders_sbuf, StringInfo lock_waiters_sbuf, int *lockHoldersNum)
PGDLLIMPORT PROC_HDR * ProcGlobal
int GetStartupBufferPinWaitBufId(void)
ProcWaitStatus ProcSleep(LOCALLOCK *locallock)
PGDLLIMPORT PGPROC * MyProc
void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
#define PGPROC_MAX_CACHED_SUBXIDS
int ProcGlobalSemas(void)
void ProcReleaseLocks(bool isCommit)
void LockErrorCleanup(void)
bool BecomeLockGroupMember(PGPROC *leader, int pid)
PGDLLIMPORT int StatementTimeout
void BecomeLockGroupLeader(void)
PGDLLIMPORT int DeadlockTimeout
PGDLLIMPORT int LockTimeout
void CheckDeadLockAlert(void)
void InitProcessPhase2(void)
void InitProcGlobal(void)
PGDLLIMPORT bool log_lock_waits
@ PROC_WAIT_STATUS_WAITING
PGDLLIMPORT PGPROC * PreparedXactProcs
PGDLLIMPORT int TransactionTimeout
PGPROC * AuxiliaryPidGetProc(int pid)
struct XidCacheStatus XidCacheStatus
void SetStartupBufferPinWaitBufId(int bufid)
void ProcWaitForSignal(uint32 wait_event_info)
PGDLLIMPORT int FastPathLockGroupsPerBackend
NON_EXEC_STATIC PGPROC * AuxiliaryProcs
NON_EXEC_STATIC slock_t * ProcStructLock
bool procArrayGroupMember
XLogRecPtr clogGroupMemberLsn
pg_atomic_uint32 procArrayGroupNext
dlist_head lockGroupMembers
dlist_head * procgloballist
bool recoveryConflictPending
TransactionId clogGroupMemberXid
int64 clogGroupMemberPage
pg_atomic_uint64 waitStart
pg_atomic_uint32 clogGroupNext
XidStatus clogGroupMemberXidStatus
XidCacheStatus subxidStatus
LocalTransactionId fpLocalTransactionId
TransactionId procArrayGroupMemberXid
dlist_head myProcLocks[NUM_LOCK_PARTITIONS]
ProcWaitStatus waitStatus
XidCacheStatus * subxidStates
dlist_head autovacFreeProcs
ProcNumber checkpointerProc
int startupBufferPinWaitBufId
pg_atomic_uint32 clogGroupFirst
dlist_head walsenderFreeProcs
dlist_head bgworkerFreeProcs
pg_atomic_uint32 procArrayGroupFirst
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]