PostgreSQL Source Code: src/include/storage/predicate_internals.h Source File

PostgreSQL Source Code git master
predicate_internals.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * predicate_internals.h
4 * POSTGRES internal predicate locking definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/storage/predicate_internals.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef PREDICATE_INTERNALS_H
15#define PREDICATE_INTERNALS_H
16
17#include "lib/ilist.h"
18#include "storage/lock.h"
19#include "storage/lwlock.h"
20
21/*
22 * Commit number.
23 */
24 typedef uint64 SerCommitSeqNo;
25
26/*
27 * Reserved commit sequence numbers:
28 * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
29 * used as a SerCommitSeqNo, even an invalid one
30 * - InvalidSerCommitSeqNo is used to indicate a transaction that
31 * hasn't committed yet, so use a number greater than all valid
32 * ones to make comparison do the expected thing
33 * - RecoverySerCommitSeqNo is used to refer to transactions that
34 * happened before a crash/recovery, since we restart the sequence
35 * at that point. It's earlier than all normal sequence numbers,
36 * and is only used by recovered prepared transactions
37 */
38 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
39 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
40 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
41
42/*
43 * The SERIALIZABLEXACT struct contains information needed for each
44 * serializable database transaction to support SSI techniques.
45 *
46 * A home-grown list is maintained in shared memory to manage these.
47 * An entry is used when the serializable transaction acquires a snapshot.
48 * Unless the transaction is rolled back, this entry must generally remain
49 * until all concurrent transactions have completed. (There are special
50 * optimizations for READ ONLY transactions which often allow them to be
51 * cleaned up earlier.) A transaction which is rolled back is cleaned up
52 * as soon as possible.
53 *
54 * Eligibility for cleanup of committed transactions is generally determined
55 * by comparing the transaction's finishedBefore field to
56 * SxactGlobalXmin.
57 */
58 typedef struct SERIALIZABLEXACT
59{
60 VirtualTransactionId vxid; /* The executing process always has one of
61 * these. */
62
63 /*
64 * We use two numbers to track the order that transactions commit. Before
65 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
66 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
67 * This doesn't give a strict commit order, but these two values together
68 * are good enough for us, as we can always err on the safe side and
69 * assume that there's a conflict, if we can't be sure of the exact
70 * ordering of two commits.
71 *
72 * Note that a transaction is marked as prepared for a short period during
73 * commit processing, even if two-phase commit is not used. But with
74 * two-phase commit, a transaction can stay in prepared state for some
75 * time.
76 */
77 SerCommitSeqNo prepareSeqNo;
78 SerCommitSeqNo commitSeqNo;
79
80 /* these values are not both interesting at the same time */
81 union
82 {
83 SerCommitSeqNo earliestOutConflictCommit; /* when committed with
84 * conflict out */
85 SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
86 * no conflict out */
87 } SeqNo;
88 dlist_head outConflicts; /* list of write transactions whose data we
89 * couldn't read. */
90 dlist_head inConflicts; /* list of read transactions which couldn't
91 * see our write. */
92 dlist_head predicateLocks; /* list of associated PREDICATELOCK objects */
93 dlist_node finishedLink; /* list link in
94 * FinishedSerializableTransactions */
95 dlist_node xactLink; /* PredXact->activeList/availableList */
96
97 /*
98 * perXactPredicateListLock is only used in parallel queries: it protects
99 * this SERIALIZABLEXACT's predicate lock list against other workers of
100 * the same session.
101 */
102 LWLock perXactPredicateListLock;
103
104 /*
105 * for r/o transactions: list of concurrent r/w transactions that we could
106 * potentially have conflicts with, and vice versa for r/w transactions
107 */
108 dlist_head possibleUnsafeConflicts;
109
110 TransactionId topXid; /* top level xid for the transaction, if one
111 * exists; else invalid */
112 TransactionId finishedBefore; /* invalid means still running; else the
113 * struct expires when no serializable
114 * xids are before this. */
115 TransactionId xmin; /* the transaction's snapshot xmin */
116 uint32 flags; /* OR'd combination of values defined below */
117 int pid; /* pid of associated process */
118 int pgprocno; /* pgprocno of associated process */
119 } SERIALIZABLEXACT;
120
121 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
122 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
123 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
124 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
125/*
126 * The following flag actually means that the flagged transaction has a
127 * conflict out *to a transaction which committed ahead of it*. It's hard
128 * to get that into a name of a reasonable length.
129 */
130 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
131 #define SXACT_FLAG_READ_ONLY 0x00000020
132 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
133 #define SXACT_FLAG_RO_SAFE 0x00000080
134 #define SXACT_FLAG_RO_UNSAFE 0x00000100
135 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
136 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
137/*
138 * The following flag means the transaction has been partially released
139 * already, but is being preserved because parallel workers might have a
140 * reference to it. It'll be recycled by the leader at end-of-transaction.
141 */
142 #define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
143
144 typedef struct PredXactListData
145{
146 dlist_head availableList;
147 dlist_head activeList;
148
149 /*
150 * These global variables are maintained when registering and cleaning up
151 * serializable transactions. They must be global across all backends,
152 * but are not needed outside the predicate.c source file. Protected by
153 * SerializableXactHashLock.
154 */
155 TransactionId SxactGlobalXmin; /* global xmin for active serializable
156 * transactions */
157 int SxactGlobalXminCount; /* how many active serializable
158 * transactions have this xmin */
159 int WritableSxactCount; /* how many non-read-only serializable
160 * transactions are active */
161 SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
162 * increasing number for commits
163 * of serializable transactions */
164 /* Protected by SerializableXactHashLock. */
165 SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
166 * inConflicts for committed
167 * transactions through this seq
168 * no */
169 /* Protected by SerializableFinishedListLock. */
170 SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
171 * seq no */
172 SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
173
174 SERIALIZABLEXACT *element;
175 } PredXactListData;
176
177 typedef struct PredXactListData *PredXactList;
178
179 #define PredXactListDataSize \
180 ((Size)MAXALIGN(sizeof(PredXactListData)))
181
182
183/*
184 * The following types are used to provide lists of rw-conflicts between
185 * pairs of transactions. Since exactly the same information is needed,
186 * they are also used to record possible unsafe transaction relationships
187 * for purposes of identifying safe snapshots for read-only transactions.
188 *
189 * When a RWConflictData is not in use to record either type of relationship
190 * between a pair of transactions, it is kept on an "available" list. The
191 * outLink field is used for maintaining that list.
192 */
193 typedef struct RWConflictData
194{
195 dlist_node outLink; /* link for list of conflicts out from a sxact */
196 dlist_node inLink; /* link for list of conflicts in to a sxact */
197 SERIALIZABLEXACT *sxactOut;
198 SERIALIZABLEXACT *sxactIn;
199 } RWConflictData;
200
201 typedef struct RWConflictData *RWConflict;
202
203 #define RWConflictDataSize \
204 ((Size)MAXALIGN(sizeof(RWConflictData)))
205
206 typedef struct RWConflictPoolHeaderData
207{
208 dlist_head availableList;
209 RWConflict element;
210 } RWConflictPoolHeaderData;
211
212 typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
213
214 #define RWConflictPoolHeaderDataSize \
215 ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
216
217
218/*
219 * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
220 * transaction or any of its subtransactions.
221 */
222 typedef struct SERIALIZABLEXIDTAG
223{
224 TransactionId xid;
225 } SERIALIZABLEXIDTAG;
226
227/*
228 * The SERIALIZABLEXID struct provides a link from a TransactionId for a
229 * serializable transaction to the related SERIALIZABLEXACT record, even if
230 * the transaction has completed and its connection has been closed.
231 *
232 * These are created as new top level transaction IDs are first assigned to
233 * transactions which are participating in predicate locking. This may
234 * never happen for a particular transaction if it doesn't write anything.
235 * They are removed with their related serializable transaction objects.
236 *
237 * The SubTransGetTopmostTransaction method is used where necessary to get
238 * from an XID which might be from a subtransaction to the top level XID.
239 */
240 typedef struct SERIALIZABLEXID
241{
242 /* hash key */
243 SERIALIZABLEXIDTAG tag;
244
245 /* data */
246 SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
247 } SERIALIZABLEXID;
248
249
250/*
251 * The PREDICATELOCKTARGETTAG struct identifies a database object which can
252 * be the target of predicate locks.
253 *
254 * Note that the hash function being used doesn't properly respect tag
255 * length -- if the length of the structure isn't a multiple of four bytes it
256 * will go to a four byte boundary past the end of the tag. If you change
257 * this struct, make sure any slack space is initialized, so that any random
258 * bytes in the middle or at the end are not included in the hash.
259 *
260 * TODO SSI: If we always use the same fields for the same type of value, we
261 * should rename these. Holding off until it's clear there are no exceptions.
262 * Since indexes are relations with blocks and tuples, it's looking likely that
263 * the rename will be possible. If not, we may need to divide the last field
264 * and use part of it for a target type, so that we know how to interpret the
265 * data..
266 */
267 typedef struct PREDICATELOCKTARGETTAG
268{
269 uint32 locktag_field1; /* a 32-bit ID field */
270 uint32 locktag_field2; /* a 32-bit ID field */
271 uint32 locktag_field3; /* a 32-bit ID field */
272 uint32 locktag_field4; /* a 32-bit ID field */
273 } PREDICATELOCKTARGETTAG;
274
275/*
276 * The PREDICATELOCKTARGET struct represents a database object on which there
277 * are predicate locks.
278 *
279 * A hash list of these objects is maintained in shared memory. An entry is
280 * added when a predicate lock is requested on an object which doesn't
281 * already have one. An entry is removed when the last lock is removed from
282 * its list.
283 */
284 typedef struct PREDICATELOCKTARGET
285{
286 /* hash key */
287 PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
288
289 /* data */
290 dlist_head predicateLocks; /* list of PREDICATELOCK objects assoc. with
291 * predicate lock target */
292 } PREDICATELOCKTARGET;
293
294
295/*
296 * The PREDICATELOCKTAG struct identifies an individual predicate lock.
297 *
298 * It is the combination of predicate lock target (which is a lockable
299 * object) and a serializable transaction which has acquired a lock on that
300 * target.
301 */
302 typedef struct PREDICATELOCKTAG
303{
304 PREDICATELOCKTARGET *myTarget;
305 SERIALIZABLEXACT *myXact;
306 } PREDICATELOCKTAG;
307
308/*
309 * The PREDICATELOCK struct represents an individual lock.
310 *
311 * An entry can be created here when the related database object is read, or
312 * by promotion of multiple finer-grained targets. All entries related to a
313 * serializable transaction are removed when that serializable transaction is
314 * cleaned up. Entries can also be removed when they are combined into a
315 * single coarser-grained lock entry.
316 */
317 typedef struct PREDICATELOCK
318{
319 /* hash key */
320 PREDICATELOCKTAG tag; /* unique identifier of lock */
321
322 /* data */
323 dlist_node targetLink; /* list link in PREDICATELOCKTARGET's list of
324 * predicate locks */
325 dlist_node xactLink; /* list link in SERIALIZABLEXACT's list of
326 * predicate locks */
327 SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
328 } PREDICATELOCK;
329
330
331/*
332 * The LOCALPREDICATELOCK struct represents a local copy of data which is
333 * also present in the PREDICATELOCK table, organized for fast access without
334 * needing to acquire a LWLock. It is strictly for optimization.
335 *
336 * Each serializable transaction creates its own local hash table to hold a
337 * collection of these. This information is used to determine when a number
338 * of fine-grained locks should be promoted to a single coarser-grained lock.
339 * The information is maintained more-or-less in parallel to the
340 * PREDICATELOCK data, but because this data is not protected by locks and is
341 * only used in an optimization heuristic, it is allowed to drift in a few
342 * corner cases where maintaining exact data would be expensive.
343 *
344 * The hash table is created when the serializable transaction acquires its
345 * snapshot, and its memory is released upon completion of the transaction.
346 */
347 typedef struct LOCALPREDICATELOCK
348{
349 /* hash key */
350 PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
351
352 /* data */
353 bool held; /* is lock held, or just its children? */
354 int childLocks; /* number of child locks currently held */
355 } LOCALPREDICATELOCK;
356
357
358/*
359 * The types of predicate locks which can be acquired.
360 */
361 typedef enum PredicateLockTargetType
362{
363 PREDLOCKTAG_RELATION,
364 PREDLOCKTAG_PAGE,
365 PREDLOCKTAG_TUPLE,
366 /* TODO SSI: Other types may be needed for index locking */
367 } PredicateLockTargetType;
368
369
370/*
371 * This structure is used to quickly capture a copy of all predicate
372 * locks. This is currently used only by the pg_lock_status function,
373 * which in turn is used by the pg_locks view.
374 */
375 typedef struct PredicateLockData
376{
377 int nelements;
378 PREDICATELOCKTARGETTAG *locktags;
379 SERIALIZABLEXACT *xacts;
380 } PredicateLockData;
381
382
383/*
384 * These macros define how we map logical IDs of lockable objects into the
385 * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
386 * rather than accessing the fields directly. Note multiple eval of target!
387 */
388 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
389 ((locktag).locktag_field1 = (dboid), \
390 (locktag).locktag_field2 = (reloid), \
391 (locktag).locktag_field3 = InvalidBlockNumber, \
392 (locktag).locktag_field4 = InvalidOffsetNumber)
393
394 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
395 ((locktag).locktag_field1 = (dboid), \
396 (locktag).locktag_field2 = (reloid), \
397 (locktag).locktag_field3 = (blocknum), \
398 (locktag).locktag_field4 = InvalidOffsetNumber)
399
400 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
401 ((locktag).locktag_field1 = (dboid), \
402 (locktag).locktag_field2 = (reloid), \
403 (locktag).locktag_field3 = (blocknum), \
404 (locktag).locktag_field4 = (offnum))
405
406 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
407 ((Oid) (locktag).locktag_field1)
408 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
409 ((Oid) (locktag).locktag_field2)
410 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
411 ((BlockNumber) (locktag).locktag_field3)
412 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
413 ((OffsetNumber) (locktag).locktag_field4)
414 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
415 (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
416 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
417 PREDLOCKTAG_RELATION))
418
419/*
420 * Two-phase commit statefile records. There are two types: for each
421 * transaction, we generate one per-transaction record and a variable
422 * number of per-predicate-lock records.
423 */
424 typedef enum TwoPhasePredicateRecordType
425{
426 TWOPHASEPREDICATERECORD_XACT,
427 TWOPHASEPREDICATERECORD_LOCK,
428 } TwoPhasePredicateRecordType;
429
430/*
431 * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
432 * much is needed because most of it not meaningful for a recovered
433 * prepared transaction.
434 *
435 * In particular, we do not record the in and out conflict lists for a
436 * prepared transaction because the associated SERIALIZABLEXACTs will
437 * not be available after recovery. Instead, we simply record the
438 * existence of each type of conflict by setting the transaction's
439 * summary conflict in/out flag.
440 */
441 typedef struct TwoPhasePredicateXactRecord
442{
443 TransactionId xmin;
444 uint32 flags;
445 } TwoPhasePredicateXactRecord;
446
447/* Per-lock state */
448 typedef struct TwoPhasePredicateLockRecord
449{
450 PREDICATELOCKTARGETTAG target;
451 uint32 filler; /* to avoid length change in back-patched fix */
452 } TwoPhasePredicateLockRecord;
453
454 typedef struct TwoPhasePredicateRecord
455{
456 TwoPhasePredicateRecordType type;
457 union
458 {
459 TwoPhasePredicateXactRecord xactRecord;
460 TwoPhasePredicateLockRecord lockRecord;
461 } data;
462 } TwoPhasePredicateRecord;
463
464/*
465 * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
466 */
467 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
468
469
470/*
471 * Function definitions for functions needing awareness of predicate
472 * locking internals.
473 */
474extern PredicateLockData *GetPredicateLockStatusData(void);
475extern int GetSafeSnapshotBlockingPids(int blocked_pid,
476 int *output, int output_size);
477
478#endif /* PREDICATE_INTERNALS_H */
uint64_t uint64
Definition: c.h:539
uint32_t uint32
Definition: c.h:538
uint32 TransactionId
Definition: c.h:657
FILE * output
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1445
struct RWConflictData RWConflictData
struct PredXactListData * PredXactList
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
struct SERIALIZABLEXACT SERIALIZABLEXACT
TwoPhasePredicateRecordType
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1628
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
PredicateLockTargetType
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
struct PREDICATELOCKTAG PREDICATELOCKTAG
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
struct SERIALIZABLEXID SERIALIZABLEXID
struct PredicateLockData PredicateLockData
struct RWConflictData * RWConflict
uint64 SerCommitSeqNo
struct PredXactListData PredXactListData
struct PREDICATELOCK PREDICATELOCK
PREDICATELOCKTARGETTAG tag
Definition: lwlock.h:42
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
PREDICATELOCKTAG tag
SerCommitSeqNo commitSeqNo
dlist_node targetLink
dlist_node xactLink
SERIALIZABLEXACT * element
dlist_head availableList
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
PREDICATELOCKTARGETTAG * locktags
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * sxactIn
SERIALIZABLEXACT * sxactOut
dlist_node outLink
VirtualTransactionId vxid
LWLock perXactPredicateListLock
TransactionId xmin
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
dlist_head inConflicts
union SERIALIZABLEXACT::@128 SeqNo
dlist_head outConflicts
SerCommitSeqNo prepareSeqNo
dlist_head predicateLocks
SerCommitSeqNo commitSeqNo
TransactionId finishedBefore
TransactionId topXid
SerCommitSeqNo earliestOutConflictCommit
dlist_node finishedLink
SERIALIZABLEXIDTAG tag
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
union TwoPhasePredicateRecord::@129 data
TwoPhasePredicateLockRecord lockRecord
TwoPhasePredicateXactRecord xactRecord
Definition: ilist.h:152
Definition: ilist.h:138

AltStyle によって変換されたページ (->オリジナル) /