1/*-------------------------------------------------------------------------
3 * Replication slot management.
5 * Copyright (c) 2012-2025, PostgreSQL Global Development Group
7 *-------------------------------------------------------------------------
20/* directory to store replication slot data in */
21 #define PG_REPLSLOT_DIR "pg_replslot"
24 * The reserved name for a replication slot used to retain dead tuples for
25 * conflict detection in logical replication. See
26 * maybe_advance_nonremovable_xid() for detail.
28 #define CONFLICT_DETECTION_SLOT "pg_conflict_detection"
31 * Behaviour of replication slots, upon release or crash.
33 * Slots marked as PERSISTENT are crash-safe and will not be dropped when
34 * released. Slots marked as EPHEMERAL will be dropped when released or after
35 * restarts. Slots marked TEMPORARY will be dropped at the end of a session
38 * EPHEMERAL is used as a not-quite-ready state when creating persistent
39 * slots. EPHEMERAL slots can be made PERSISTENT by calling
40 * ReplicationSlotPersist(). For a slot that goes away at the end of a
41 * session, TEMPORARY is the appropriate choice.
51 * Slots can be invalidated, e.g. due to max_slot_wal_keep_size. If so, the
52 * 'invalidated' field is set to a value other than _NONE.
54 * When adding a new invalidation cause here, the value must be powers of 2
55 * (e.g., 1, 2, 4...) for proper bitwise operations. Also, remember to update
56 * RS_INVAL_MAX_CAUSES below, and SlotInvalidationCauses in slot.c.
61 /* required WAL has been removed */
63 /* required rows have been removed */
65 /* wal_level insufficient for slot */
67 /* idle slot timeout has occurred */
71/* Maximum number of invalidation causes */
72 #define RS_INVAL_MAX_CAUSES 4
75 * On-Disk data of a replication slot, preserved across restarts.
79 /* The slot's identifier */
82 /* database the slot is active on */
86 * The slot's behaviour when being dropped (or restored after a crash).
91 * xmin horizon for data
93 * NB: This may represent a value that hasn't been written to disk yet;
94 * see notes for effective_xmin, below.
99 * xmin horizon for catalog tuples
101 * NB: This may represent a value that hasn't been written to disk yet;
102 * see notes for effective_xmin, below.
106 /* oldest LSN that might be required by this replication slot */
109 /* RS_INVAL_NONE if valid, or the reason for having been invalidated */
113 * Oldest LSN that the client has acked receipt for. This is used as the
114 * start_lsn point in case the client doesn't specify one, and also as a
115 * safety measure to jump forwards in case the client specifies a
116 * start_lsn that's further in the past than this value.
121 * LSN at which we enabled two_phase commit for this slot or LSN at which
122 * we found a consistent point at the time of slot creation.
127 * Allow decoding of prepared transactions?
135 * Was this slot synchronized from the primary server?
140 * Is this a failover slot (sync candidate for standbys)? Only relevant
141 * for logical slots on the primary server.
147 * Shared memory state of a single replication slot.
149 * The in-memory data of replication slots follows a locking model based
150 * on two linked concepts:
151 * - A replication slot's in_use flag is switched when added or discarded using
152 * the LWLock ReplicationSlotControlLock, which needs to be hold in exclusive
153 * mode when updating the flag by the backend owning the slot and doing the
154 * operation, while readers (concurrent backends not owning the slot) need
155 * to hold it in shared mode when looking at replication slot data.
156 * - Individual fields are protected by mutex where only the backend owning
157 * the slot is authorized to update the fields from its own slot. The
158 * backend owning the slot does not need to take this lock when reading its
159 * own fields, while concurrent backends not owning this slot should take the
160 * lock when reading this slot's data.
164 /* lock, on same cacheline as effective_xmin */
167 /* is this slot defined */
170 /* Who is streaming out changes for this slot? 0 in unused slots. */
173 /* any outstanding modifications? */
178 * For logical decoding, it's extremely important that we never remove any
179 * data that's still needed for decoding purposes, even after a crash;
180 * otherwise, decoding will produce wrong answers. Ordinary streaming
181 * replication also needs to prevent old row versions from being removed
182 * too soon, but the worst consequence we might encounter there is
183 * unwanted query cancellations on the standby. Thus, for logical
184 * decoding, this value represents the latest xmin that has actually been
185 * written to disk, whereas for streaming replication, it's just the same
186 * as the persistent value (data.xmin).
191 /* data surviving shutdowns and crashes */
194 /* is somebody performing io on this slot? */
197 /* Condition variable signaled when active_pid changes */
200 /* all the remaining data is only used for logical slots */
203 * When the client has confirmed flushes >= candidate_xmin_lsn we can
204 * advance the catalog xmin. When restart_valid has been passed,
205 * restart_lsn can be increased.
213 * This value tracks the last confirmed_flush LSN flushed which is used
214 * during a shutdown checkpoint to decide if logical's slot data should be
215 * forcibly flushed or not.
220 * The time when the slot became inactive. For synced slots on a standby
221 * server, it represents the time when slot synchronization was most
227 * Latest restart_lsn that has been flushed to disk. For persistent slots
228 * the flushed LSN should be taken into account when calculating the
229 * oldest LSN for WAL segments removal.
231 * Do not assume that restart_lsn will always move forward, i.e., that the
232 * previously flushed restart_lsn is always behind data.restart_lsn. In
233 * streaming replication using a physical slot, the restart_lsn is updated
234 * based on the flushed WAL position reported by the walreceiver.
236 * This replication mode allows duplicate WAL records to be received and
237 * overwritten. If the walreceiver receives older WAL records and then
238 * reports them as flushed to the walsender, the restart_lsn may appear to
241 * This typically occurs at the beginning of replication. One reason is
242 * that streaming replication starts at the beginning of a segment, so, if
243 * restart_lsn is in the middle of a segment, it will be updated to an
244 * earlier LSN, see RequestXLogStreaming. Another reason is that the
245 * walreceiver chooses its startpoint based on the replayed LSN, so, if
246 * some records have been received but not yet applied, they will be
247 * received again and leads to updating the restart_lsn to an earlier
254 #define SlotIsPhysical(slot) ((slot)->data.database == InvalidOid)
255 #define SlotIsLogical(slot) ((slot)->data.database != InvalidOid)
258 * Shared memory control area for all of replication slots.
263 * This array should be declared [FLEXIBLE_ARRAY_MEMBER], but for some
264 * reason you can't do that in an otherwise-empty struct.
270 * Set slot's inactive_since property unless it was previously invalidated.
287 * Pointers to shared memory
297/* shmem initialization functions */
301/* management of individual slots */
313 bool error_if_invalid);
322 bool allow_reserved_name,
int ReplicationSlotIndex(ReplicationSlot *slot)
struct ReplicationSlotCtlData ReplicationSlotCtlData
PGDLLIMPORT char * synchronized_standby_slots
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
PGDLLIMPORT ReplicationSlot * MyReplicationSlot
void CheckPointReplicationSlots(bool is_shutdown)
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
void ReplicationSlotDropAcquired(void)
void ReplicationSlotMarkDirty(void)
void ReplicationSlotReserveWal(void)
bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
void ReplicationSlotsDropDBSlots(Oid dboid)
XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void)
PGDLLIMPORT int idle_replication_slot_timeout_secs
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
void ReplicationSlotPersist(void)
void ReplicationSlotDrop(const char *name, bool nowait)
bool SlotExistsInSyncStandbySlots(const char *slot_name)
struct ReplicationSlotPersistentData ReplicationSlotPersistentData
ReplicationSlotPersistency
void ReplicationSlotSave(void)
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, Size szslot)
void CheckSlotPermissions(void)
bool ReplicationSlotName(int index, Name name)
void ReplicationSlotsShmemInit(void)
bool ReplicationSlotValidateName(const char *name, bool allow_reserved_name, int elevel)
void ReplicationSlotAlter(const char *name, const bool *failover, const bool *two_phase)
void ReplicationSlotRelease(void)
void WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
PGDLLIMPORT ReplicationSlotCtlData * ReplicationSlotCtl
bool StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
ReplicationSlotInvalidationCause
void ReplicationSlotsComputeRequiredLSN(void)
void ReplicationSlotCleanup(bool synced_only)
void ReplicationSlotInitialize(void)
PGDLLIMPORT int max_replication_slots
struct ReplicationSlot ReplicationSlot
void StartupReplicationSlots(void)
void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok)
void CheckSlotRequirements(void)
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Size ReplicationSlotsShmemSize(void)
const char * GetSlotInvalidationCauseName(ReplicationSlotInvalidationCause cause)
#define SpinLockRelease(lock)
#define SpinLockAcquire(lock)
ReplicationSlot replication_slots[1]
TransactionId catalog_xmin
XLogRecPtr confirmed_flush
ReplicationSlotPersistency persistency
ReplicationSlotInvalidationCause invalidated
XLogRecPtr candidate_xmin_lsn
TransactionId effective_catalog_xmin
XLogRecPtr candidate_restart_valid
XLogRecPtr last_saved_confirmed_flush
TransactionId effective_xmin
XLogRecPtr last_saved_restart_lsn
XLogRecPtr candidate_restart_lsn
LWLock io_in_progress_lock
ConditionVariable active_cv
TransactionId candidate_catalog_xmin
ReplicationSlotPersistentData data
TimestampTz inactive_since
static WalReceiverConn * wrconn