1/*-------------------------------------------------------------------------
3 * predicate_internals.h
4 * POSTGRES internal predicate locking definitions.
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/storage/predicate_internals.h
12 *-------------------------------------------------------------------------
14#ifndef PREDICATE_INTERNALS_H
15#define PREDICATE_INTERNALS_H
27 * Reserved commit sequence numbers:
28 * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
29 * used as a SerCommitSeqNo, even an invalid one
30 * - InvalidSerCommitSeqNo is used to indicate a transaction that
31 * hasn't committed yet, so use a number greater than all valid
32 * ones to make comparison do the expected thing
33 * - RecoverySerCommitSeqNo is used to refer to transactions that
34 * happened before a crash/recovery, since we restart the sequence
35 * at that point. It's earlier than all normal sequence numbers,
36 * and is only used by recovered prepared transactions
38 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
39 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
40 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
43 * The SERIALIZABLEXACT struct contains information needed for each
44 * serializable database transaction to support SSI techniques.
46 * A home-grown list is maintained in shared memory to manage these.
47 * An entry is used when the serializable transaction acquires a snapshot.
48 * Unless the transaction is rolled back, this entry must generally remain
49 * until all concurrent transactions have completed. (There are special
50 * optimizations for READ ONLY transactions which often allow them to be
51 * cleaned up earlier.) A transaction which is rolled back is cleaned up
52 * as soon as possible.
54 * Eligibility for cleanup of committed transactions is generally determined
55 * by comparing the transaction's finishedBefore field to
64 * We use two numbers to track the order that transactions commit. Before
65 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
66 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
67 * This doesn't give a strict commit order, but these two values together
68 * are good enough for us, as we can always err on the safe side and
69 * assume that there's a conflict, if we can't be sure of the exact
70 * ordering of two commits.
72 * Note that a transaction is marked as prepared for a short period during
73 * commit processing, even if two-phase commit is not used. But with
74 * two-phase commit, a transaction can stay in prepared state for some
80 /* these values are not both interesting at the same time */
94 * FinishedSerializableTransactions */
98 * perXactPredicateListLock is only used in parallel queries: it protects
99 * this SERIALIZABLEXACT's predicate lock list against other workers of
105 * for r/o transactions: list of concurrent r/w transactions that we could
106 * potentially have conflicts with, and vice versa for r/w transactions
111 * exists; else invalid */
113 * struct expires when no serializable
114 * xids are before this. */
117 int pid;
/* pid of associated process */
121 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
122 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
123 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
124 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
126 * The following flag actually means that the flagged transaction has a
127 * conflict out *to a transaction which committed ahead of it*. It's hard
128 * to get that into a name of a reasonable length.
130 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
131 #define SXACT_FLAG_READ_ONLY 0x00000020
132 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
133 #define SXACT_FLAG_RO_SAFE 0x00000080
134 #define SXACT_FLAG_RO_UNSAFE 0x00000100
135 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
136 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
138 * The following flag means the transaction has been partially released
139 * already, but is being preserved because parallel workers might have a
140 * reference to it. It'll be recycled by the leader at end-of-transaction.
142 #define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
150 * These global variables are maintained when registering and cleaning up
151 * serializable transactions. They must be global across all backends,
152 * but are not needed outside the predicate.c source file. Protected by
153 * SerializableXactHashLock.
158 * transactions have this xmin */
160 * transactions are active */
162 * increasing number for commits
163 * of serializable transactions */
164 /* Protected by SerializableXactHashLock. */
166 * inConflicts for committed
167 * transactions through this seq
169 /* Protected by SerializableFinishedListLock. */
179 #define PredXactListDataSize \
180 ((Size)MAXALIGN(sizeof(PredXactListData)))
184 * The following types are used to provide lists of rw-conflicts between
185 * pairs of transactions. Since exactly the same information is needed,
186 * they are also used to record possible unsafe transaction relationships
187 * for purposes of identifying safe snapshots for read-only transactions.
189 * When a RWConflictData is not in use to record either type of relationship
190 * between a pair of transactions, it is kept on an "available" list. The
191 * outLink field is used for maintaining that list.
203 #define RWConflictDataSize \
204 ((Size)MAXALIGN(sizeof(RWConflictData)))
214 #define RWConflictPoolHeaderDataSize \
215 ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
219 * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
220 * transaction or any of its subtransactions.
228 * The SERIALIZABLEXID struct provides a link from a TransactionId for a
229 * serializable transaction to the related SERIALIZABLEXACT record, even if
230 * the transaction has completed and its connection has been closed.
232 * These are created as new top level transaction IDs are first assigned to
233 * transactions which are participating in predicate locking. This may
234 * never happen for a particular transaction if it doesn't write anything.
235 * They are removed with their related serializable transaction objects.
237 * The SubTransGetTopmostTransaction method is used where necessary to get
238 * from an XID which might be from a subtransaction to the top level XID.
251 * The PREDICATELOCKTARGETTAG struct identifies a database object which can
252 * be the target of predicate locks.
254 * Note that the hash function being used doesn't properly respect tag
255 * length -- if the length of the structure isn't a multiple of four bytes it
256 * will go to a four byte boundary past the end of the tag. If you change
257 * this struct, make sure any slack space is initialized, so that any random
258 * bytes in the middle or at the end are not included in the hash.
260 * TODO SSI: If we always use the same fields for the same type of value, we
261 * should rename these. Holding off until it's clear there are no exceptions.
262 * Since indexes are relations with blocks and tuples, it's looking likely that
263 * the rename will be possible. If not, we may need to divide the last field
264 * and use part of it for a target type, so that we know how to interpret the
276 * The PREDICATELOCKTARGET struct represents a database object on which there
277 * are predicate locks.
279 * A hash list of these objects is maintained in shared memory. An entry is
280 * added when a predicate lock is requested on an object which doesn't
281 * already have one. An entry is removed when the last lock is removed from
291 * predicate lock target */
296 * The PREDICATELOCKTAG struct identifies an individual predicate lock.
298 * It is the combination of predicate lock target (which is a lockable
299 * object) and a serializable transaction which has acquired a lock on that
309 * The PREDICATELOCK struct represents an individual lock.
311 * An entry can be created here when the related database object is read, or
312 * by promotion of multiple finer-grained targets. All entries related to a
313 * serializable transaction are removed when that serializable transaction is
314 * cleaned up. Entries can also be removed when they are combined into a
315 * single coarser-grained lock entry.
332 * The LOCALPREDICATELOCK struct represents a local copy of data which is
333 * also present in the PREDICATELOCK table, organized for fast access without
334 * needing to acquire a LWLock. It is strictly for optimization.
336 * Each serializable transaction creates its own local hash table to hold a
337 * collection of these. This information is used to determine when a number
338 * of fine-grained locks should be promoted to a single coarser-grained lock.
339 * The information is maintained more-or-less in parallel to the
340 * PREDICATELOCK data, but because this data is not protected by locks and is
341 * only used in an optimization heuristic, it is allowed to drift in a few
342 * corner cases where maintaining exact data would be expensive.
344 * The hash table is created when the serializable transaction acquires its
345 * snapshot, and its memory is released upon completion of the transaction.
353 bool held;
/* is lock held, or just its children? */
359 * The types of predicate locks which can be acquired.
366 /* TODO SSI: Other types may be needed for index locking */
371 * This structure is used to quickly capture a copy of all predicate
372 * locks. This is currently used only by the pg_lock_status function,
373 * which in turn is used by the pg_locks view.
384 * These macros define how we map logical IDs of lockable objects into the
385 * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
386 * rather than accessing the fields directly. Note multiple eval of target!
388 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
389 ((locktag).locktag_field1 = (dboid), \
390 (locktag).locktag_field2 = (reloid), \
391 (locktag).locktag_field3 = InvalidBlockNumber, \
392 (locktag).locktag_field4 = InvalidOffsetNumber)
394 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
395 ((locktag).locktag_field1 = (dboid), \
396 (locktag).locktag_field2 = (reloid), \
397 (locktag).locktag_field3 = (blocknum), \
398 (locktag).locktag_field4 = InvalidOffsetNumber)
400 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
401 ((locktag).locktag_field1 = (dboid), \
402 (locktag).locktag_field2 = (reloid), \
403 (locktag).locktag_field3 = (blocknum), \
404 (locktag).locktag_field4 = (offnum))
406 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
407 ((Oid) (locktag).locktag_field1)
408 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
409 ((Oid) (locktag).locktag_field2)
410 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
411 ((BlockNumber) (locktag).locktag_field3)
412 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
413 ((OffsetNumber) (locktag).locktag_field4)
414 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
415 (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
416 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
417 PREDLOCKTAG_RELATION))
420 * Two-phase commit statefile records. There are two types: for each
421 * transaction, we generate one per-transaction record and a variable
422 * number of per-predicate-lock records.
431 * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
432 * much is needed because most of it not meaningful for a recovered
433 * prepared transaction.
435 * In particular, we do not record the in and out conflict lists for a
436 * prepared transaction because the associated SERIALIZABLEXACTs will
437 * not be available after recovery. Instead, we simply record the
438 * existence of each type of conflict by setting the transaction's
439 * summary conflict in/out flag.
465 * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
467 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
471 * Function definitions for functions needing awareness of predicate
476 int *
output,
int output_size);
478#endif /* PREDICATE_INTERNALS_H */
PredicateLockData * GetPredicateLockStatusData(void)
struct RWConflictData RWConflictData
struct PredXactListData * PredXactList
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
struct SERIALIZABLEXACT SERIALIZABLEXACT
TwoPhasePredicateRecordType
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
struct PREDICATELOCKTAG PREDICATELOCKTAG
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
struct SERIALIZABLEXID SERIALIZABLEXID
struct PredicateLockData PredicateLockData
struct RWConflictData * RWConflict
struct PredXactListData PredXactListData
struct PREDICATELOCK PREDICATELOCK
PREDICATELOCKTARGETTAG tag
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
dlist_head predicateLocks
SerCommitSeqNo commitSeqNo
SERIALIZABLEXACT * element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
PREDICATELOCKTARGETTAG * locktags
SERIALIZABLEXACT * sxactIn
SERIALIZABLEXACT * sxactOut
VirtualTransactionId vxid
LWLock perXactPredicateListLock
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
union SERIALIZABLEXACT::@128 SeqNo
SerCommitSeqNo prepareSeqNo
dlist_head predicateLocks
SerCommitSeqNo commitSeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
union TwoPhasePredicateRecord::@129 data
TwoPhasePredicateLockRecord lockRecord
TwoPhasePredicateXactRecord xactRecord