[フレーム]

slotsync.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 * slotsync.c

3 * Functionality for synchronizing slots to a standby server from the

4 * primary server.

5 *

7 *

8 * IDENTIFICATION

9 * src/backend/replication/logical/slotsync.c

10 *

11 * This file contains the code for slot synchronization on a physical standby

12 * to fetch logical failover slots information from the primary server, create

13 * the slots on the standby and synchronize them periodically.

14 *

15 * Slot synchronization can be performed either automatically by enabling slot

16 * sync worker or manually by calling SQL function pg_sync_replication_slots().

17 *

18 * If the WAL corresponding to the remote's restart_lsn is not available on the

19 * physical standby or the remote's catalog_xmin precedes the oldest xid for

20 * which it is guaranteed that rows wouldn't have been removed then we cannot

21 * create the local standby slot because that would mean moving the local slot

22 * backward and decoding won't be possible via such a slot. In this case, the

23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,

24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after

25 * which slot sync worker can perform the sync periodically or user can call

26 * pg_sync_replication_slots() periodically to perform the syncs.

27 *

28 * If synchronized slots fail to build a consistent snapshot from the

29 * restart_lsn before reaching confirmed_flush_lsn, they would become

30 * unreliable after promotion due to potential data loss from changes

31 * before reaching a consistent point. This can happen because the slots can

32 * be synced at some random time and we may not reach the consistent point

33 * at the same WAL location as the primary. So, we mark such slots as

34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a

35 * consistent point, they will be marked as RS_PERSISTENT.

36 *

37 * The slot sync worker waits for some time before the next synchronization,

38 * with the duration varying based on whether any slots were updated during

39 * the last cycle. Refer to the comments above wait_for_slot_activity() for

40 * more details.

41 *

42 * Any standby synchronized slots will be dropped if they no longer need

43 * to be synchronized. See comment atop drop_local_obsolete_slots() for more

44 * details.

45 *---------------------------------------------------------------------------

46 */

47

48#include "postgres.h"

49

50#include <time.h>

51

52#include "access/xlog_internal.h"

53#include "access/xlogrecovery.h"

54#include "catalog/pg_database.h"

55#include "libpq/pqsignal.h"

56#include "pgstat.h"

57#include "postmaster/interrupt.h"

58#include "replication/logical.h"

59#include "replication/slotsync.h"

60#include "replication/snapbuild.h"

61#include "storage/ipc.h"

62#include "storage/lmgr.h"

63#include "storage/proc.h"

64#include "storage/procarray.h"

65#include "tcop/tcopprot.h"

66#include "utils/builtins.h"

67#include "utils/pg_lsn.h"

68#include "utils/ps_status.h"

69#include "utils/timeout.h"

70

71/*

72 * Struct for sharing information to control slot synchronization.

73 *

74 * The slot sync worker's pid is needed by the startup process to shut it

75 * down during promotion. The startup process shuts down the slot sync worker

76 * and also sets stopSignaled=true to handle the race condition when the

77 * postmaster has not noticed the promotion yet and thus may end up restarting

78 * the slot sync worker. If stopSignaled is set, the worker will exit in such a

79 * case. The SQL function pg_sync_replication_slots() will also error out if

80 * this flag is set. Note that we don't need to reset this variable as after

81 * promotion the slot sync worker won't be restarted because the pmState

82 * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting

83 * primary without restarting the server. See LaunchMissingBackgroundProcesses.

84 *

85 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot

86 * overwrites.

87 *

88 * The 'last_start_time' is needed by postmaster to start the slot sync worker

89 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart

90 * is expected (e.g., slot sync GUCs change), slot sync worker will reset

91 * last_start_time before exiting, so that postmaster can start the worker

92 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.

93 */

94 typedef struct SlotSyncCtxStruct

95{

96 pid_t pid;

97 bool stopSignaled;

98 bool syncing;

99 time_t last_start_time;

100 slock_t mutex;

101 } SlotSyncCtxStruct;

102

103 static SlotSyncCtxStruct *SlotSyncCtx = NULL;

104

105/* GUC variable */

106 bool sync_replication_slots = false;

107

108/*

109 * The sleep time (ms) between slot-sync cycles varies dynamically

110 * (within a MIN/MAX range) according to slot activity. See

111 * wait_for_slot_activity() for details.

112 */

113 #define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200

114 #define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */

115

116 static long sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;

117

118/* The restart interval for slot sync work used by postmaster */

119 #define SLOTSYNC_RESTART_INTERVAL_SEC 10

120

121/*

122 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag

123 * in SlotSyncCtxStruct, this flag is true only if the current process is

124 * performing slot synchronization.

125 */

126 static bool syncing_slots = false;

127

128/*

129 * Structure to hold information fetched from the primary server about a logical

130 * replication slot.

131 */

132 typedef struct RemoteSlot

133{

134 char *name;

135 char *plugin;

136 char *database;

137 bool two_phase;

138 bool failover;

139 XLogRecPtr restart_lsn;

140 XLogRecPtr confirmed_lsn;

141 XLogRecPtr two_phase_at;

142 TransactionId catalog_xmin;

143

144 /* RS_INVAL_NONE if valid, or the reason of invalidation */

145 ReplicationSlotInvalidationCause invalidated;

146 } RemoteSlot;

147

148static void slotsync_failure_callback(int code, Datum arg);

149static void update_synced_slots_inactive_since(void);

150

151/*

152 * If necessary, update the local synced slot's metadata based on the data

153 * from the remote slot.

154 *

155 * If no update was needed (the data of the remote slot is the same as the

156 * local slot) return false, otherwise true.

157 *

158 * *found_consistent_snapshot will be true iff the remote slot's LSN or xmin is

159 * modified, and decoding from the corresponding LSN's can reach a

160 * consistent snapshot.

161 *

162 * *remote_slot_precedes will be true if the remote slot's LSN or xmin

163 * precedes locally reserved position.

164 */

165static bool

166 update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,

167 bool *found_consistent_snapshot,

168 bool *remote_slot_precedes)

169{

170 ReplicationSlot *slot = MyReplicationSlot;

171 bool updated_xmin_or_lsn = false;

172 bool updated_config = false;

173

174 Assert(slot->data.invalidated == RS_INVAL_NONE);

175

176 if (found_consistent_snapshot)

177 *found_consistent_snapshot = false;

178

179 if (remote_slot_precedes)

180 *remote_slot_precedes = false;

181

182 /*

183 * Don't overwrite if we already have a newer catalog_xmin and

184 * restart_lsn.

185 */

186 if (remote_slot->restart_lsn < slot->data.restart_lsn ||

187 TransactionIdPrecedes(remote_slot->catalog_xmin,

188 slot->data.catalog_xmin))

189 {

190 /*

191 * This can happen in following situations:

192 *

193 * If the slot is temporary, it means either the initial WAL location

194 * reserved for the local slot is ahead of the remote slot's

195 * restart_lsn or the initial xmin_horizon computed for the local slot

196 * is ahead of the remote slot.

197 *

198 * If the slot is persistent, both restart_lsn and catalog_xmin of the

199 * synced slot could still be ahead of the remote slot. Since we use

200 * slot advance functionality to keep snapbuild/slot updated, it is

201 * possible that the restart_lsn and catalog_xmin are advanced to a

202 * later position than it has on the primary. This can happen when

203 * slot advancing machinery finds running xacts record after reaching

204 * the consistent state at a later point than the primary where it

205 * serializes the snapshot and updates the restart_lsn.

206 *

207 * We LOG the message if the slot is temporary as it can help the user

208 * to understand why the slot is not sync-ready. In the case of a

209 * persistent slot, it would be a more common case and won't directly

210 * impact the users, so we used DEBUG1 level to log the message.

211 */

212 ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,

213 errmsg("could not synchronize replication slot \"%s\"",

214 remote_slot->name),

215 errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",

216 LSN_FORMAT_ARGS(remote_slot->restart_lsn),

217 remote_slot->catalog_xmin,

218 LSN_FORMAT_ARGS(slot->data.restart_lsn),

219 slot->data.catalog_xmin));

220

221 if (remote_slot_precedes)

222 *remote_slot_precedes = true;

223

224 /*

225 * Skip updating the configuration. This is required to avoid syncing

226 * two_phase_at without syncing confirmed_lsn. Otherwise, the prepared

227 * transaction between old confirmed_lsn and two_phase_at will

228 * unexpectedly get decoded and sent to the downstream after

229 * promotion. See comments in ReorderBufferFinishPrepared.

230 */

231 return false;

232 }

233

234 /*

235 * Attempt to sync LSNs and xmins only if remote slot is ahead of local

236 * slot.

237 */

238 if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||

239 remote_slot->restart_lsn > slot->data.restart_lsn ||

240 TransactionIdFollows(remote_slot->catalog_xmin,

241 slot->data.catalog_xmin))

242 {

243 /*

244 * We can't directly copy the remote slot's LSN or xmin unless there

245 * exists a consistent snapshot at that point. Otherwise, after

246 * promotion, the slots may not reach a consistent point before the

247 * confirmed_flush_lsn which can lead to a data loss. To avoid data

248 * loss, we let slot machinery advance the slot which ensures that

249 * snapbuilder/slot statuses are updated properly.

250 */

251 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))

252 {

253 /*

254 * Update the slot info directly if there is a serialized snapshot

255 * at the restart_lsn, as the slot can quickly reach consistency

256 * at restart_lsn by restoring the snapshot.

257 */

258 SpinLockAcquire(&slot->mutex);

259 slot->data.restart_lsn = remote_slot->restart_lsn;

260 slot->data.confirmed_flush = remote_slot->confirmed_lsn;

261 slot->data.catalog_xmin = remote_slot->catalog_xmin;

262 SpinLockRelease(&slot->mutex);

263

264 if (found_consistent_snapshot)

265 *found_consistent_snapshot = true;

266 }

267 else

268 {

269 LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,

270 found_consistent_snapshot);

271

272 /* Sanity check */

273 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)

274 ereport(ERROR,

275 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",

276 remote_slot->name),

277 errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",

278 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),

279 LSN_FORMAT_ARGS(slot->data.confirmed_flush)));

280 }

281

282 updated_xmin_or_lsn = true;

283 }

284

285 if (remote_dbid != slot->data.database ||

286 remote_slot->two_phase != slot->data.two_phase ||

287 remote_slot->failover != slot->data.failover ||

288 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||

289 remote_slot->two_phase_at != slot->data.two_phase_at)

290 {

291 NameData plugin_name;

292

293 /* Avoid expensive operations while holding a spinlock. */

294 namestrcpy(&plugin_name, remote_slot->plugin);

295

296 SpinLockAcquire(&slot->mutex);

297 slot->data.plugin = plugin_name;

298 slot->data.database = remote_dbid;

299 slot->data.two_phase = remote_slot->two_phase;

300 slot->data.two_phase_at = remote_slot->two_phase_at;

301 slot->data.failover = remote_slot->failover;

302 SpinLockRelease(&slot->mutex);

303

304 updated_config = true;

305

306 /*

307 * Ensure that there is no risk of sending prepared transactions

308 * unexpectedly after the promotion.

309 */

310 Assert(slot->data.two_phase_at <= slot->data.confirmed_flush);

311 }

312

313 /*

314 * We have to write the changed xmin to disk *before* we change the

315 * in-memory value, otherwise after a crash we wouldn't know that some

316 * catalog tuples might have been removed already.

317 */

318 if (updated_config || updated_xmin_or_lsn)

319 {

320 ReplicationSlotMarkDirty();

321 ReplicationSlotSave();

322 }

323

324 /*

325 * Now the new xmin is safely on disk, we can let the global value

326 * advance. We do not take ProcArrayLock or similar since we only advance

327 * xmin here and there's not much harm done by a concurrent computation

328 * missing that.

329 */

330 if (updated_xmin_or_lsn)

331 {

332 SpinLockAcquire(&slot->mutex);

333 slot->effective_catalog_xmin = remote_slot->catalog_xmin;

334 SpinLockRelease(&slot->mutex);

335

336 ReplicationSlotsComputeRequiredXmin(false);

337 ReplicationSlotsComputeRequiredLSN();

338 }

339

340 return updated_config || updated_xmin_or_lsn;

341}

342

343/*

344 * Get the list of local logical slots that are synchronized from the

345 * primary server.

346 */

347static List *

348 get_local_synced_slots(void)

349{

350 List *local_slots = NIL;

351

352 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);

353

354 for (int i = 0; i < max_replication_slots; i++)

355 {

356 ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];

357

358 /* Check if it is a synchronized slot */

359 if (s->in_use && s->data.synced)

360 {

361 Assert(SlotIsLogical(s));

362 local_slots = lappend(local_slots, s);

363 }

364 }

365

366 LWLockRelease(ReplicationSlotControlLock);

367

368 return local_slots;

369}

370

371/*

372 * Helper function to check if local_slot is required to be retained.

373 *

374 * Return false either if local_slot does not exist in the remote_slots list

375 * or is invalidated while the corresponding remote slot is still valid,

376 * otherwise true.

377 */

378static bool

379 local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)

380{

381 bool remote_exists = false;

382 bool locally_invalidated = false;

383

384 foreach_ptr(RemoteSlot, remote_slot, remote_slots)

385 {

386 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)

387 {

388 remote_exists = true;

389

390 /*

391 * If remote slot is not invalidated but local slot is marked as

392 * invalidated, then set locally_invalidated flag.

393 */

394 SpinLockAcquire(&local_slot->mutex);

395 locally_invalidated =

396 (remote_slot->invalidated == RS_INVAL_NONE) &&

397 (local_slot->data.invalidated != RS_INVAL_NONE);

398 SpinLockRelease(&local_slot->mutex);

399

400 break;

401 }

402 }

403

404 return (remote_exists && !locally_invalidated);

405}

406

407/*

408 * Drop local obsolete slots.

409 *

410 * Drop the local slots that no longer need to be synced i.e. these either do

411 * not exist on the primary or are no longer enabled for failover.

412 *

413 * Additionally, drop any slots that are valid on the primary but got

414 * invalidated on the standby. This situation may occur due to the following

415 * reasons:

416 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL

417 * records from the restart_lsn of the slot.

418 * - 'primary_slot_name' is temporarily reset to null and the physical slot is

419 * removed.

420 * These dropped slots will get recreated in next sync-cycle and it is okay to

421 * drop and recreate such slots as long as these are not consumable on the

422 * standby (which is the case currently).

423 *

424 * Note: Change of 'wal_level' on the primary server to a level lower than

425 * logical may also result in slot invalidation and removal on the standby.

426 * This is because such 'wal_level' change is only possible if the logical

427 * slots are removed on the primary server, so it's expected to see the

428 * slots being invalidated and removed on the standby too (and re-created

429 * if they are re-created on the primary server).

430 */

431static void

432 drop_local_obsolete_slots(List *remote_slot_list)

433{

434 List *local_slots = get_local_synced_slots();

435

436 foreach_ptr(ReplicationSlot, local_slot, local_slots)

437 {

438 /* Drop the local slot if it is not required to be retained. */

439 if (!local_sync_slot_required(local_slot, remote_slot_list))

440 {

441 bool synced_slot;

442

443 /*

444 * Use shared lock to prevent a conflict with

445 * ReplicationSlotsDropDBSlots(), trying to drop the same slot

446 * during a drop-database operation.

447 */

448 LockSharedObject(DatabaseRelationId, local_slot->data.database,

449 0, AccessShareLock);

450

451 /*

452 * In the small window between getting the slot to drop and

453 * locking the database, there is a possibility of a parallel

454 * database drop by the startup process and the creation of a new

455 * slot by the user. This new user-created slot may end up using

456 * the same shared memory as that of 'local_slot'. Thus check if

457 * local_slot is still the synced one before performing actual

458 * drop.

459 */

460 SpinLockAcquire(&local_slot->mutex);

461 synced_slot = local_slot->in_use && local_slot->data.synced;

462 SpinLockRelease(&local_slot->mutex);

463

464 if (synced_slot)

465 {

466 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);

467 ReplicationSlotDropAcquired();

468 }

469

470 UnlockSharedObject(DatabaseRelationId, local_slot->data.database,

471 0, AccessShareLock);

472

473 ereport(LOG,

474 errmsg("dropped replication slot \"%s\" of database with OID %u",

475 NameStr(local_slot->data.name),

476 local_slot->data.database));

477 }

478 }

479}

480

481/*

482 * Reserve WAL for the currently active local slot using the specified WAL

483 * location (restart_lsn).

484 *

485 * If the given WAL location has been removed, reserve WAL using the oldest

486 * existing WAL segment.

487 */

488static void

489 reserve_wal_for_local_slot(XLogRecPtr restart_lsn)

490{

491 XLogSegNo oldest_segno;

492 XLogSegNo segno;

493 ReplicationSlot *slot = MyReplicationSlot;

494

495 Assert(slot != NULL);

496 Assert(XLogRecPtrIsInvalid(slot->data.restart_lsn));

497

498 while (true)

499 {

500 SpinLockAcquire(&slot->mutex);

501 slot->data.restart_lsn = restart_lsn;

502 SpinLockRelease(&slot->mutex);

503

504 /* Prevent WAL removal as fast as possible */

505 ReplicationSlotsComputeRequiredLSN();

506

507 XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);

508

509 /*

510 * Find the oldest existing WAL segment file.

511 *

512 * Normally, we can determine it by using the last removed segment

513 * number. However, if no WAL segment files have been removed by a

514 * checkpoint since startup, we need to search for the oldest segment

515 * file from the current timeline existing in XLOGDIR.

516 *

517 * XXX: Currently, we are searching for the oldest segment in the

518 * current timeline as there is less chance of the slot's restart_lsn

519 * from being some prior timeline, and even if it happens, in the

520 * worst case, we will wait to sync till the slot's restart_lsn moved

521 * to the current timeline.

522 */

523 oldest_segno = XLogGetLastRemovedSegno() + 1;

524

525 if (oldest_segno == 1)

526 {

527 TimeLineID cur_timeline;

528

529 GetWalRcvFlushRecPtr(NULL, &cur_timeline);

530 oldest_segno = XLogGetOldestSegno(cur_timeline);

531 }

532

533 elog(DEBUG1, "segno: " UINT64_FORMAT " of purposed restart_lsn for the synced slot, oldest_segno: " UINT64_FORMAT " available",

534 segno, oldest_segno);

535

536 /*

537 * If all required WAL is still there, great, otherwise retry. The

538 * slot should prevent further removal of WAL, unless there's a

539 * concurrent ReplicationSlotsComputeRequiredLSN() after we've written

540 * the new restart_lsn above, so normally we should never need to loop

541 * more than twice.

542 */

543 if (segno >= oldest_segno)

544 break;

545

546 /* Retry using the location of the oldest wal segment */

547 XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);

548 }

549}

550

551/*

552 * If the remote restart_lsn and catalog_xmin have caught up with the

553 * local ones, then update the LSNs and persist the local synced slot for

554 * future synchronization; otherwise, do nothing.

555 *

556 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise

557 * false.

558 */

559static bool

560 update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)

561{

562 ReplicationSlot *slot = MyReplicationSlot;

563 bool found_consistent_snapshot = false;

564 bool remote_slot_precedes = false;

565

566 (void) update_local_synced_slot(remote_slot, remote_dbid,

567 &found_consistent_snapshot,

568 &remote_slot_precedes);

569

570 /*

571 * Check if the primary server has caught up. Refer to the comment atop

572 * the file for details on this check.

573 */

574 if (remote_slot_precedes)

575 {

576 /*

577 * The remote slot didn't catch up to locally reserved position.

578 *

579 * We do not drop the slot because the restart_lsn can be ahead of the

580 * current location when recreating the slot in the next cycle. It may

581 * take more time to create such a slot. Therefore, we keep this slot

582 * and attempt the synchronization in the next cycle.

583 */

584 return false;

585 }

586

587 /*

588 * Don't persist the slot if it cannot reach the consistent point from the

589 * restart_lsn. See comments atop this file.

590 */

591 if (!found_consistent_snapshot)

592 {

593 ereport(LOG,

594 errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),

595 errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",

596 LSN_FORMAT_ARGS(slot->data.restart_lsn)));

597

598 return false;

599 }

600

601 ReplicationSlotPersist();

602

603 ereport(LOG,

604 errmsg("newly created replication slot \"%s\" is sync-ready now",

605 remote_slot->name));

606

607 return true;

608}

609

610/*

611 * Synchronize a single slot to the given position.

612 *

613 * This creates a new slot if there is no existing one and updates the

614 * metadata of the slot as per the data received from the primary server.

615 *

616 * The slot is created as a temporary slot and stays in the same state until the

617 * remote_slot catches up with locally reserved position and local slot is

618 * updated. The slot is then persisted and is considered as sync-ready for

619 * periodic syncs.

620 *

621 * Returns TRUE if the local slot is updated.

622 */

623static bool

624 synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)

625{

626 ReplicationSlot *slot;

627 XLogRecPtr latestFlushPtr;

628 bool slot_updated = false;

629

630 /*

631 * Make sure that concerned WAL is received and flushed before syncing

632 * slot to target lsn received from the primary server.

633 */

634 latestFlushPtr = GetStandbyFlushRecPtr(NULL);

635 if (remote_slot->confirmed_lsn > latestFlushPtr)

636 {

637 /*

638 * Can get here only if GUC 'synchronized_standby_slots' on the

639 * primary server was not configured correctly.

640 */

641 ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,

642 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

643 errmsg("skipping slot synchronization because the received slot sync"

644 " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",

645 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),

646 remote_slot->name,

647 LSN_FORMAT_ARGS(latestFlushPtr)));

648

649 return false;

650 }

651

652 /* Search for the named slot */

653 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))

654 {

655 bool synced;

656

657 SpinLockAcquire(&slot->mutex);

658 synced = slot->data.synced;

659 SpinLockRelease(&slot->mutex);

660

661 /* User-created slot with the same name exists, raise ERROR. */

662 if (!synced)

663 ereport(ERROR,

664 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

665 errmsg("exiting from slot synchronization because same"

666 " name slot \"%s\" already exists on the standby",

667 remote_slot->name));

668

669 /*

670 * The slot has been synchronized before.

671 *

672 * It is important to acquire the slot here before checking

673 * invalidation. If we don't acquire the slot first, there could be a

674 * race condition that the local slot could be invalidated just after

675 * checking the 'invalidated' flag here and we could end up

676 * overwriting 'invalidated' flag to remote_slot's value. See

677 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly

678 * if the slot is not acquired by other processes.

679 *

680 * XXX: If it ever turns out that slot acquire/release is costly for

681 * cases when none of the slot properties is changed then we can do a

682 * pre-check to ensure that at least one of the slot properties is

683 * changed before acquiring the slot.

684 */

685 ReplicationSlotAcquire(remote_slot->name, true, false);

686

687 Assert(slot == MyReplicationSlot);

688

689 /*

690 * Copy the invalidation cause from remote only if local slot is not

691 * invalidated locally, we don't want to overwrite existing one.

692 */

693 if (slot->data.invalidated == RS_INVAL_NONE &&

694 remote_slot->invalidated != RS_INVAL_NONE)

695 {

696 SpinLockAcquire(&slot->mutex);

697 slot->data.invalidated = remote_slot->invalidated;

698 SpinLockRelease(&slot->mutex);

699

700 /* Make sure the invalidated state persists across server restart */

701 ReplicationSlotMarkDirty();

702 ReplicationSlotSave();

703

704 slot_updated = true;

705 }

706

707 /* Skip the sync of an invalidated slot */

708 if (slot->data.invalidated != RS_INVAL_NONE)

709 {

710 ReplicationSlotRelease();

711 return slot_updated;

712 }

713

714 /* Slot not ready yet, let's attempt to make it sync-ready now. */

715 if (slot->data.persistency == RS_TEMPORARY)

716 {

717 slot_updated = update_and_persist_local_synced_slot(remote_slot,

718 remote_dbid);

719 }

720

721 /* Slot ready for sync, so sync it. */

722 else

723 {

724 /*

725 * Sanity check: As long as the invalidations are handled

726 * appropriately as above, this should never happen.

727 *

728 * We don't need to check restart_lsn here. See the comments in

729 * update_local_synced_slot() for details.

730 */

731 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)

732 ereport(ERROR,

733 errmsg_internal("cannot synchronize local slot \"%s\"",

734 remote_slot->name),

735 errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",

736 LSN_FORMAT_ARGS(slot->data.confirmed_flush),

737 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));

738

739 slot_updated = update_local_synced_slot(remote_slot, remote_dbid,

740 NULL, NULL);

741 }

742 }

743 /* Otherwise create the slot first. */

744 else

745 {

746 NameData plugin_name;

747 TransactionId xmin_horizon = InvalidTransactionId;

748

749 /* Skip creating the local slot if remote_slot is invalidated already */

750 if (remote_slot->invalidated != RS_INVAL_NONE)

751 return false;

752

753 /*

754 * We create temporary slots instead of ephemeral slots here because

755 * we want the slots to survive after releasing them. This is done to

756 * avoid dropping and re-creating the slots in each synchronization

757 * cycle if the restart_lsn or catalog_xmin of the remote slot has not

758 * caught up.

759 */

760 ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,

761 remote_slot->two_phase,

762 remote_slot->failover,

763 true);

764

765 /* For shorter lines. */

766 slot = MyReplicationSlot;

767

768 /* Avoid expensive operations while holding a spinlock. */

769 namestrcpy(&plugin_name, remote_slot->plugin);

770

771 SpinLockAcquire(&slot->mutex);

772 slot->data.database = remote_dbid;

773 slot->data.plugin = plugin_name;

774 SpinLockRelease(&slot->mutex);

775

776 reserve_wal_for_local_slot(remote_slot->restart_lsn);

777

778 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

779 xmin_horizon = GetOldestSafeDecodingTransactionId(true);

780 SpinLockAcquire(&slot->mutex);

781 slot->effective_catalog_xmin = xmin_horizon;

782 slot->data.catalog_xmin = xmin_horizon;

783 SpinLockRelease(&slot->mutex);

784 ReplicationSlotsComputeRequiredXmin(true);

785 LWLockRelease(ProcArrayLock);

786

787 update_and_persist_local_synced_slot(remote_slot, remote_dbid);

788

789 slot_updated = true;

790 }

791

792 ReplicationSlotRelease();

793

794 return slot_updated;

795}

796

797/*

798 * Synchronize slots.

799 *

800 * Gets the failover logical slots info from the primary server and updates

801 * the slots locally. Creates the slots if not present on the standby.

802 *

803 * Returns TRUE if any of the slots gets updated in this sync-cycle.

804 */

805static bool

806 synchronize_slots(WalReceiverConn *wrconn)

807{

808#define SLOTSYNC_COLUMN_COUNT 10

809 Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,

810 LSNOID, XIDOID, BOOLOID, LSNOID, BOOLOID, TEXTOID, TEXTOID};

811

812 WalRcvExecResult *res;

813 TupleTableSlot *tupslot;

814 List *remote_slot_list = NIL;

815 bool some_slot_updated = false;

816 bool started_tx = false;

817 const char *query = "SELECT slot_name, plugin, confirmed_flush_lsn,"

818 " restart_lsn, catalog_xmin, two_phase, two_phase_at, failover,"

819 " database, invalidation_reason"

820 " FROM pg_catalog.pg_replication_slots"

821 " WHERE failover and NOT temporary";

822

823 /* The syscache access in walrcv_exec() needs a transaction env. */

824 if (!IsTransactionState())

825 {

826 StartTransactionCommand();

827 started_tx = true;

828 }

829

830 /* Execute the query */

831 res = walrcv_exec(wrconn, query, SLOTSYNC_COLUMN_COUNT, slotRow);

832 if (res->status != WALRCV_OK_TUPLES)

833 ereport(ERROR,

834 errmsg("could not fetch failover logical slots info from the primary server: %s",

835 res->err));

836

837 /* Construct the remote_slot tuple and synchronize each slot locally */

838 tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);

839 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))

840 {

841 bool isnull;

842 RemoteSlot *remote_slot = palloc0(sizeof(RemoteSlot));

843 Datum d;

844 int col = 0;

845

846 remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,

847 &isnull));

848 Assert(!isnull);

849

850 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,

851 &isnull));

852 Assert(!isnull);

853

854 /*

855 * It is possible to get null values for LSN and Xmin if slot is

856 * invalidated on the primary server, so handle accordingly.

857 */

858 d = slot_getattr(tupslot, ++col, &isnull);

859 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :

860 DatumGetLSN(d);

861

862 d = slot_getattr(tupslot, ++col, &isnull);

863 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);

864

865 d = slot_getattr(tupslot, ++col, &isnull);

866 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :

867 DatumGetTransactionId(d);

868

869 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,

870 &isnull));

871 Assert(!isnull);

872

873 d = slot_getattr(tupslot, ++col, &isnull);

874 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);

875

876 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,

877 &isnull));

878 Assert(!isnull);

879

880 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,

881 ++col, &isnull));

882 Assert(!isnull);

883

884 d = slot_getattr(tupslot, ++col, &isnull);

885 remote_slot->invalidated = isnull ? RS_INVAL_NONE :

886 GetSlotInvalidationCause(TextDatumGetCString(d));

887

888 /* Sanity check */

889 Assert(col == SLOTSYNC_COLUMN_COUNT);

890

891 /*

892 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the

893 * slot is valid, that means we have fetched the remote_slot in its

894 * RS_EPHEMERAL state. In such a case, don't sync it; we can always

895 * sync it in the next sync cycle when the remote_slot is persisted

896 * and has valid lsn(s) and xmin values.

897 *

898 * XXX: In future, if we plan to expose 'slot->data.persistency' in

899 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL

900 * slots in the first place.

901 */

902 if ((XLogRecPtrIsInvalid(remote_slot->restart_lsn) ||

903 XLogRecPtrIsInvalid(remote_slot->confirmed_lsn) ||

904 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&

905 remote_slot->invalidated == RS_INVAL_NONE)

906 pfree(remote_slot);

907 else

908 /* Create list of remote slots */

909 remote_slot_list = lappend(remote_slot_list, remote_slot);

910

911 ExecClearTuple(tupslot);

912 }

913

914 /* Drop local slots that no longer need to be synced. */

915 drop_local_obsolete_slots(remote_slot_list);

916

917 /* Now sync the slots locally */

918 foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)

919 {

920 Oid remote_dbid = get_database_oid(remote_slot->database, false);

921

922 /*

923 * Use shared lock to prevent a conflict with

924 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during

925 * a drop-database operation.

926 */

927 LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);

928

929 some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid);

930

931 UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);

932 }

933

934 /* We are done, free remote_slot_list elements */

935 list_free_deep(remote_slot_list);

936

937 walrcv_clear_result(res);

938

939 if (started_tx)

940 CommitTransactionCommand();

941

942 return some_slot_updated;

943}

944

945/*

946 * Checks the remote server info.

947 *

948 * We ensure that the 'primary_slot_name' exists on the remote server and the

949 * remote server is not a standby node.

950 */

951static void

952 validate_remote_info(WalReceiverConn *wrconn)

953{

954#define PRIMARY_INFO_OUTPUT_COL_COUNT 2

955 WalRcvExecResult *res;

956 Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};

957 StringInfoData cmd;

958 bool isnull;

959 TupleTableSlot *tupslot;

960 bool remote_in_recovery;

961 bool primary_slot_valid;

962 bool started_tx = false;

963

964 initStringInfo(&cmd);

965 appendStringInfo(&cmd,

966 "SELECT pg_is_in_recovery(), count(*) = 1"

967 " FROM pg_catalog.pg_replication_slots"

968 " WHERE slot_type='physical' AND slot_name=%s",

969 quote_literal_cstr(PrimarySlotName));

970

971 /* The syscache access in walrcv_exec() needs a transaction env. */

972 if (!IsTransactionState())

973 {

974 StartTransactionCommand();

975 started_tx = true;

976 }

977

978 res = walrcv_exec(wrconn, cmd.data, PRIMARY_INFO_OUTPUT_COL_COUNT, slotRow);

979 pfree(cmd.data);

980

981 if (res->status != WALRCV_OK_TUPLES)

982 ereport(ERROR,

983 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",

984 PrimarySlotName, res->err),

985 errhint("Check if \"primary_slot_name\" is configured correctly."));

986

987 tupslot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);

988 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))

989 elog(ERROR,

990 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");

991

992 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));

993 Assert(!isnull);

994

995 /*

996 * Slot sync is currently not supported on a cascading standby. This is

997 * because if we allow it, the primary server needs to wait for all the

998 * cascading standbys, otherwise, logical subscribers can still be ahead

999 * of one of the cascading standbys which we plan to promote. Thus, to

1000 * avoid this additional complexity, we restrict it for the time being.

1001 */

1002 if (remote_in_recovery)

1003 ereport(ERROR,

1004 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

1005 errmsg("cannot synchronize replication slots from a standby server"));

1006

1007 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));

1008 Assert(!isnull);

1009

1010 if (!primary_slot_valid)

1011 ereport(ERROR,

1012 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1013 /* translator: second %s is a GUC variable name */

1014 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",

1015 PrimarySlotName, "primary_slot_name"));

1016

1017 ExecClearTuple(tupslot);

1018 walrcv_clear_result(res);

1019

1020 if (started_tx)

1021 CommitTransactionCommand();

1022}

1023

1024/*

1025 * Checks if dbname is specified in 'primary_conninfo'.

1026 *

1027 * Error out if not specified otherwise return it.

1028 */

1029char *

1030 CheckAndGetDbnameFromConninfo(void)

1031{

1032 char *dbname;

1033

1034 /*

1035 * The slot synchronization needs a database connection for walrcv_exec to

1036 * work.

1037 */

1038 dbname = walrcv_get_dbname_from_conninfo(PrimaryConnInfo);

1039 if (dbname == NULL)

1040 ereport(ERROR,

1041 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1042

1043 /*

1044 * translator: first %s is a connection option; second %s is a GUC

1045 * variable name

1046 */

1047 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",

1048 "dbname", "primary_conninfo"));

1049 return dbname;

1050}

1051

1052/*

1053 * Return true if all necessary GUCs for slot synchronization are set

1054 * appropriately, otherwise, return false.

1055 */

1056bool

1057 ValidateSlotSyncParams(int elevel)

1058{

1059 /*

1060 * Logical slot sync/creation requires wal_level >= logical.

1061 */

1062 if (wal_level < WAL_LEVEL_LOGICAL)

1063 {

1064 ereport(elevel,

1065 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1066 errmsg("replication slot synchronization requires \"wal_level\" >= \"logical\""));

1067 return false;

1068 }

1069

1070 /*

1071 * A physical replication slot(primary_slot_name) is required on the

1072 * primary to ensure that the rows needed by the standby are not removed

1073 * after restarting, so that the synchronized slot on the standby will not

1074 * be invalidated.

1075 */

1076 if (PrimarySlotName == NULL || *PrimarySlotName == '0円')

1077 {

1078 ereport(elevel,

1079 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1080 /* translator: %s is a GUC variable name */

1081 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));

1082 return false;

1083 }

1084

1085 /*

1086 * hot_standby_feedback must be enabled to cooperate with the physical

1087 * replication slot, which allows informing the primary about the xmin and

1088 * catalog_xmin values on the standby.

1089 */

1090 if (!hot_standby_feedback)

1091 {

1092 ereport(elevel,

1093 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1094 /* translator: %s is a GUC variable name */

1095 errmsg("replication slot synchronization requires \"%s\" to be enabled",

1096 "hot_standby_feedback"));

1097 return false;

1098 }

1099

1100 /*

1101 * The primary_conninfo is required to make connection to primary for

1102 * getting slots information.

1103 */

1104 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '0円')

1105 {

1106 ereport(elevel,

1107 errcode(ERRCODE_INVALID_PARAMETER_VALUE),

1108 /* translator: %s is a GUC variable name */

1109 errmsg("replication slot synchronization requires \"%s\" to be set",

1110 "primary_conninfo"));

1111 return false;

1112 }

1113

1114 return true;

1115}

1116

1117/*

1118 * Re-read the config file.

1119 *

1120 * Exit if any of the slot sync GUCs have changed. The postmaster will

1121 * restart it.

1122 */

1123static void

1124 slotsync_reread_config(void)

1125{

1126 char *old_primary_conninfo = pstrdup(PrimaryConnInfo);

1127 char *old_primary_slotname = pstrdup(PrimarySlotName);

1128 bool old_sync_replication_slots = sync_replication_slots;

1129 bool old_hot_standby_feedback = hot_standby_feedback;

1130 bool conninfo_changed;

1131 bool primary_slotname_changed;

1132

1133 Assert(sync_replication_slots);

1134

1135 ConfigReloadPending = false;

1136 ProcessConfigFile(PGC_SIGHUP);

1137

1138 conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;

1139 primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;

1140 pfree(old_primary_conninfo);

1141 pfree(old_primary_slotname);

1142

1143 if (old_sync_replication_slots != sync_replication_slots)

1144 {

1145 ereport(LOG,

1146 /* translator: %s is a GUC variable name */

1147 errmsg("replication slot synchronization worker will shut down because \"%s\" is disabled", "sync_replication_slots"));

1148 proc_exit(0);

1149 }

1150

1151 if (conninfo_changed ||

1152 primary_slotname_changed ||

1153 (old_hot_standby_feedback != hot_standby_feedback))

1154 {

1155 ereport(LOG,

1156 errmsg("replication slot synchronization worker will restart because of a parameter change"));

1157

1158 /*

1159 * Reset the last-start time for this worker so that the postmaster

1160 * can restart it without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.

1161 */

1162 SlotSyncCtx->last_start_time = 0;

1163

1164 proc_exit(0);

1165 }

1166

1167}

1168

1169/*

1170 * Interrupt handler for main loop of slot sync worker.

1171 */

1172static void

1173 ProcessSlotSyncInterrupts(void)

1174{

1175 CHECK_FOR_INTERRUPTS();

1176

1177 if (ShutdownRequestPending)

1178 {

1179 ereport(LOG,

1180 errmsg("replication slot synchronization worker is shutting down on receiving SIGINT"));

1181

1182 proc_exit(0);

1183 }

1184

1185 if (ConfigReloadPending)

1186 slotsync_reread_config();

1187}

1188

1189/*

1190 * Connection cleanup function for slotsync worker.

1191 *

1192 * Called on slotsync worker exit.

1193 */

1194static void

1195 slotsync_worker_disconnect(int code, Datum arg)

1196{

1197 WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);

1198

1199 walrcv_disconnect(wrconn);

1200}

1201

1202/*

1203 * Cleanup function for slotsync worker.

1204 *

1205 * Called on slotsync worker exit.

1206 */

1207static void

1208 slotsync_worker_onexit(int code, Datum arg)

1209{

1210 /*

1211 * We need to do slots cleanup here just like WalSndErrorCleanup() does.

1212 *

1213 * The startup process during promotion invokes ShutDownSlotSync() which

1214 * waits for slot sync to finish and it does that by checking the

1215 * 'syncing' flag. Thus the slot sync worker must be done with slots'

1216 * release and cleanup to avoid any dangling temporary slots or active

1217 * slots before it marks itself as finished syncing.

1218 */

1219

1220 /* Make sure active replication slots are released */

1221 if (MyReplicationSlot != NULL)

1222 ReplicationSlotRelease();

1223

1224 /* Also cleanup the temporary slots. */

1225 ReplicationSlotCleanup(false);

1226

1227 SpinLockAcquire(&SlotSyncCtx->mutex);

1228

1229 SlotSyncCtx->pid = InvalidPid;

1230

1231 /*

1232 * If syncing_slots is true, it indicates that the process errored out

1233 * without resetting the flag. So, we need to clean up shared memory and

1234 * reset the flag here.

1235 */

1236 if (syncing_slots)

1237 {

1238 SlotSyncCtx->syncing = false;

1239 syncing_slots = false;

1240 }

1241

1242 SpinLockRelease(&SlotSyncCtx->mutex);

1243}

1244

1245/*

1246 * Sleep for long enough that we believe it's likely that the slots on primary

1247 * get updated.

1248 *

1249 * If there is no slot activity the wait time between sync-cycles will double

1250 * (to a maximum of 30s). If there is some slot activity the wait time between

1251 * sync-cycles is reset to the minimum (200ms).

1252 */

1253static void

1254 wait_for_slot_activity(bool some_slot_updated)

1255{

1256 int rc;

1257

1258 if (!some_slot_updated)

1259 {

1260 /*

1261 * No slots were updated, so double the sleep time, but not beyond the

1262 * maximum allowable value.

1263 */

1264 sleep_ms = Min(sleep_ms * 2, MAX_SLOTSYNC_WORKER_NAPTIME_MS);

1265 }

1266 else

1267 {

1268 /*

1269 * Some slots were updated since the last sleep, so reset the sleep

1270 * time.

1271 */

1272 sleep_ms = MIN_SLOTSYNC_WORKER_NAPTIME_MS;

1273 }

1274

1275 rc = WaitLatch(MyLatch,

1276 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,

1277 sleep_ms,

1278 WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);

1279

1280 if (rc & WL_LATCH_SET)

1281 ResetLatch(MyLatch);

1282}

1283

1284/*

1285 * Emit an error if a promotion or a concurrent sync call is in progress.

1286 * Otherwise, advertise that a sync is in progress.

1287 */

1288static void

1289 check_and_set_sync_info(pid_t worker_pid)

1290{

1291 SpinLockAcquire(&SlotSyncCtx->mutex);

1292

1293 /* The worker pid must not be already assigned in SlotSyncCtx */

1294 Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);

1295

1296 /*

1297 * Emit an error if startup process signaled the slot sync machinery to

1298 * stop. See comments atop SlotSyncCtxStruct.

1299 */

1300 if (SlotSyncCtx->stopSignaled)

1301 {

1302 SpinLockRelease(&SlotSyncCtx->mutex);

1303 ereport(ERROR,

1304 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

1305 errmsg("cannot synchronize replication slots when standby promotion is ongoing"));

1306 }

1307

1308 if (SlotSyncCtx->syncing)

1309 {

1310 SpinLockRelease(&SlotSyncCtx->mutex);

1311 ereport(ERROR,

1312 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

1313 errmsg("cannot synchronize replication slots concurrently"));

1314 }

1315

1316 SlotSyncCtx->syncing = true;

1317

1318 /*

1319 * Advertise the required PID so that the startup process can kill the

1320 * slot sync worker on promotion.

1321 */

1322 SlotSyncCtx->pid = worker_pid;

1323

1324 SpinLockRelease(&SlotSyncCtx->mutex);

1325

1326 syncing_slots = true;

1327}

1328

1329/*

1330 * Reset syncing flag.

1331 */

1332static void

1333 reset_syncing_flag()

1334{

1335 SpinLockAcquire(&SlotSyncCtx->mutex);

1336 SlotSyncCtx->syncing = false;

1337 SpinLockRelease(&SlotSyncCtx->mutex);

1338

1339 syncing_slots = false;

1340}

1341

1342/*

1343 * The main loop of our worker process.

1344 *

1345 * It connects to the primary server, fetches logical failover slots

1346 * information periodically in order to create and sync the slots.

1347 */

1348void

1349 ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)

1350{

1351 WalReceiverConn *wrconn = NULL;

1352 char *dbname;

1353 char *err;

1354 sigjmp_buf local_sigjmp_buf;

1355 StringInfoData app_name;

1356

1357 Assert(startup_data_len == 0);

1358

1359 MyBackendType = B_SLOTSYNC_WORKER;

1360

1361 init_ps_display(NULL);

1362

1363 Assert(GetProcessingMode() == InitProcessing);

1364

1365 /*

1366 * Create a per-backend PGPROC struct in shared memory. We must do this

1367 * before we access any shared memory.

1368 */

1369 InitProcess();

1370

1371 /*

1372 * Early initialization.

1373 */

1374 BaseInit();

1375

1376 Assert(SlotSyncCtx != NULL);

1377

1378 /*

1379 * If an exception is encountered, processing resumes here.

1380 *

1381 * We just need to clean up, report the error, and go away.

1382 *

1383 * If we do not have this handling here, then since this worker process

1384 * operates at the bottom of the exception stack, ERRORs turn into FATALs.

1385 * Therefore, we create our own exception handler to catch ERRORs.

1386 */

1387 if (sigsetjmp(local_sigjmp_buf, 1) != 0)

1388 {

1389 /* since not using PG_TRY, must reset error stack by hand */

1390 error_context_stack = NULL;

1391

1392 /* Prevents interrupts while cleaning up */

1393 HOLD_INTERRUPTS();

1394

1395 /* Report the error to the server log */

1396 EmitErrorReport();

1397

1398 /*

1399 * We can now go away. Note that because we called InitProcess, a

1400 * callback was registered to do ProcKill, which will clean up

1401 * necessary state.

1402 */

1403 proc_exit(0);

1404 }

1405

1406 /* We can now handle ereport(ERROR) */

1407 PG_exception_stack = &local_sigjmp_buf;

1408

1409 /* Setup signal handling */

1410 pqsignal(SIGHUP, SignalHandlerForConfigReload);

1411 pqsignal(SIGINT, SignalHandlerForShutdownRequest);

1412 pqsignal(SIGTERM, die);

1413 pqsignal(SIGFPE, FloatExceptionHandler);

1414 pqsignal(SIGUSR1, procsignal_sigusr1_handler);

1415 pqsignal(SIGUSR2, SIG_IGN);

1416 pqsignal(SIGPIPE, SIG_IGN);

1417 pqsignal(SIGCHLD, SIG_DFL);

1418

1419 check_and_set_sync_info(MyProcPid);

1420

1421 ereport(LOG, errmsg("slot sync worker started"));

1422

1423 /* Register it as soon as SlotSyncCtx->pid is initialized. */

1424 before_shmem_exit(slotsync_worker_onexit, (Datum) 0);

1425

1426 /*

1427 * Establishes SIGALRM handler and initialize timeout module. It is needed

1428 * by InitPostgres to register different timeouts.

1429 */

1430 InitializeTimeouts();

1431

1432 /* Load the libpq-specific functions */

1433 load_file("libpqwalreceiver", false);

1434

1435 /*

1436 * Unblock signals (they were blocked when the postmaster forked us)

1437 */

1438 sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);

1439

1440 /*

1441 * Set always-secure search path, so malicious users can't redirect user

1442 * code (e.g. operators).

1443 *

1444 * It's not strictly necessary since we won't be scanning or writing to

1445 * any user table locally, but it's good to retain it here for added

1446 * precaution.

1447 */

1448 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);

1449

1450 dbname = CheckAndGetDbnameFromConninfo();

1451

1452 /*

1453 * Connect to the database specified by the user in primary_conninfo. We

1454 * need a database connection for walrcv_exec to work which we use to

1455 * fetch slot information from the remote node. See comments atop

1456 * libpqrcv_exec.

1457 *

1458 * We do not specify a specific user here since the slot sync worker will

1459 * operate as a superuser. This is safe because the slot sync worker does

1460 * not interact with user tables, eliminating the risk of executing

1461 * arbitrary code within triggers.

1462 */

1463 InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);

1464

1465 SetProcessingMode(NormalProcessing);

1466

1467 initStringInfo(&app_name);

1468 if (cluster_name[0])

1469 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");

1470 else

1471 appendStringInfoString(&app_name, "slotsync worker");

1472

1473 /*

1474 * Establish the connection to the primary server for slot

1475 * synchronization.

1476 */

1477 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,

1478 app_name.data, &err);

1479

1480 if (!wrconn)

1481 ereport(ERROR,

1482 errcode(ERRCODE_CONNECTION_FAILURE),

1483 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",

1484 app_name.data, err));

1485

1486 pfree(app_name.data);

1487

1488 /*

1489 * Register the disconnection callback.

1490 *

1491 * XXX: This can be combined with previous cleanup registration of

1492 * slotsync_worker_onexit() but that will need the connection to be made

1493 * global and we want to avoid introducing global for this purpose.

1494 */

1495 before_shmem_exit(slotsync_worker_disconnect, PointerGetDatum(wrconn));

1496

1497 /*

1498 * Using the specified primary server connection, check that we are not a

1499 * cascading standby and slot configured in 'primary_slot_name' exists on

1500 * the primary server.

1501 */

1502 validate_remote_info(wrconn);

1503

1504 /* Main loop to synchronize slots */

1505 for (;;)

1506 {

1507 bool some_slot_updated = false;

1508

1509 ProcessSlotSyncInterrupts();

1510

1511 some_slot_updated = synchronize_slots(wrconn);

1512

1513 wait_for_slot_activity(some_slot_updated);

1514 }

1515

1516 /*

1517 * The slot sync worker can't get here because it will only stop when it

1518 * receives a SIGINT from the startup process, or when there is an error.

1519 */

1520 Assert(false);

1521}

1522

1523/*

1524 * Update the inactive_since property for synced slots.

1525 *

1526 * Note that this function is currently called when we shutdown the slot

1527 * sync machinery.

1528 */

1529static void

1530 update_synced_slots_inactive_since(void)

1531{

1532 TimestampTz now = 0;

1533

1534 /*

1535 * We need to update inactive_since only when we are promoting standby to

1536 * correctly interpret the inactive_since if the standby gets promoted

1537 * without a restart. We don't want the slots to appear inactive for a

1538 * long time after promotion if they haven't been synchronized recently.

1539 * Whoever acquires the slot, i.e., makes the slot active, will reset it.

1540 */

1541 if (!StandbyMode)

1542 return;

1543

1544 /* The slot sync worker or SQL function mustn't be running by now */

1545 Assert((SlotSyncCtx->pid == InvalidPid) && !SlotSyncCtx->syncing);

1546

1547 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);

1548

1549 for (int i = 0; i < max_replication_slots; i++)

1550 {

1551 ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];

1552

1553 /* Check if it is a synchronized slot */

1554 if (s->in_use && s->data.synced)

1555 {

1556 Assert(SlotIsLogical(s));

1557

1558 /* The slot must not be acquired by any process */

1559 Assert(s->active_pid == 0);

1560

1561 /* Use the same inactive_since time for all the slots. */

1562 if (now == 0)

1563 now = GetCurrentTimestamp();

1564

1565 ReplicationSlotSetInactiveSince(s, now, true);

1566 }

1567 }

1568

1569 LWLockRelease(ReplicationSlotControlLock);

1570}

1571

1572/*

1573 * Shut down the slot sync worker.

1574 *

1575 * This function sends signal to shutdown slot sync worker, if required. It

1576 * also waits till the slot sync worker has exited or

1577 * pg_sync_replication_slots() has finished.

1578 */

1579void

1580 ShutDownSlotSync(void)

1581{

1582 pid_t worker_pid;

1583

1584 SpinLockAcquire(&SlotSyncCtx->mutex);

1585

1586 SlotSyncCtx->stopSignaled = true;

1587

1588 /*

1589 * Return if neither the slot sync worker is running nor the function

1590 * pg_sync_replication_slots() is executing.

1591 */

1592 if (!SlotSyncCtx->syncing)

1593 {

1594 SpinLockRelease(&SlotSyncCtx->mutex);

1595 update_synced_slots_inactive_since();

1596 return;

1597 }

1598

1599 worker_pid = SlotSyncCtx->pid;

1600

1601 SpinLockRelease(&SlotSyncCtx->mutex);

1602

1603 if (worker_pid != InvalidPid)

1604 kill(worker_pid, SIGINT);

1605

1606 /* Wait for slot sync to end */

1607 for (;;)

1608 {

1609 int rc;

1610

1611 /* Wait a bit, we don't expect to have to wait long */

1612 rc = WaitLatch(MyLatch,

1613 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,

1614 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);

1615

1616 if (rc & WL_LATCH_SET)

1617 {

1618 ResetLatch(MyLatch);

1619 CHECK_FOR_INTERRUPTS();

1620 }

1621

1622 SpinLockAcquire(&SlotSyncCtx->mutex);

1623

1624 /* Ensure that no process is syncing the slots. */

1625 if (!SlotSyncCtx->syncing)

1626 break;

1627

1628 SpinLockRelease(&SlotSyncCtx->mutex);

1629 }

1630

1631 SpinLockRelease(&SlotSyncCtx->mutex);

1632

1633 update_synced_slots_inactive_since();

1634}

1635

1636/*

1637 * SlotSyncWorkerCanRestart

1638 *

1639 * Returns true if enough time (SLOTSYNC_RESTART_INTERVAL_SEC) has passed

1640 * since it was launched last. Otherwise returns false.

1641 *

1642 * This is a safety valve to protect against continuous respawn attempts if the

1643 * worker is dying immediately at launch. Note that since we will retry to

1644 * launch the worker from the postmaster main loop, we will get another

1645 * chance later.

1646 */

1647bool

1648 SlotSyncWorkerCanRestart(void)

1649{

1650 time_t curtime = time(NULL);

1651

1652 /* Return false if too soon since last start. */

1653 if ((unsigned int) (curtime - SlotSyncCtx->last_start_time) <

1654 (unsigned int) SLOTSYNC_RESTART_INTERVAL_SEC)

1655 return false;

1656

1657 SlotSyncCtx->last_start_time = curtime;

1658

1659 return true;

1660}

1661

1662/*

1663 * Is current process syncing replication slots?

1664 *

1665 * Could be either backend executing SQL function or slot sync worker.

1666 */

1667bool

1668 IsSyncingReplicationSlots(void)

1669{

1670 return syncing_slots;

1671}

1672

1673/*

1674 * Amount of shared memory required for slot synchronization.

1675 */

1676Size

1677 SlotSyncShmemSize(void)

1678{

1679 return sizeof(SlotSyncCtxStruct);

1680}

1681

1682/*

1683 * Allocate and initialize the shared memory of slot synchronization.

1684 */

1685void

1686 SlotSyncShmemInit(void)

1687{

1688 Size size = SlotSyncShmemSize();

1689 bool found;

1690

1691 SlotSyncCtx = (SlotSyncCtxStruct *)

1692 ShmemInitStruct("Slot Sync Data", size, &found);

1693

1694 if (!found)

1695 {

1696 memset(SlotSyncCtx, 0, size);

1697 SlotSyncCtx->pid = InvalidPid;

1698 SpinLockInit(&SlotSyncCtx->mutex);

1699 }

1700}

1701

1702/*

1703 * Error cleanup callback for slot sync SQL function.

1704 */

1705static void

1706 slotsync_failure_callback(int code, Datum arg)

1707{

1708 WalReceiverConn *wrconn = (WalReceiverConn *) DatumGetPointer(arg);

1709

1710 /*

1711 * We need to do slots cleanup here just like WalSndErrorCleanup() does.

1712 *

1713 * The startup process during promotion invokes ShutDownSlotSync() which

1714 * waits for slot sync to finish and it does that by checking the

1715 * 'syncing' flag. Thus the SQL function must be done with slots' release

1716 * and cleanup to avoid any dangling temporary slots or active slots

1717 * before it marks itself as finished syncing.

1718 */

1719

1720 /* Make sure active replication slots are released */

1721 if (MyReplicationSlot != NULL)

1722 ReplicationSlotRelease();

1723

1724 /* Also cleanup the synced temporary slots. */

1725 ReplicationSlotCleanup(true);

1726

1727 /*

1728 * The set syncing_slots indicates that the process errored out without

1729 * resetting the flag. So, we need to clean up shared memory and reset the

1730 * flag here.

1731 */

1732 if (syncing_slots)

1733 reset_syncing_flag();

1734

1735 walrcv_disconnect(wrconn);

1736}

1737

1738/*

1739 * Synchronize the failover enabled replication slots using the specified

1740 * primary server connection.

1741 */

1742void

1743 SyncReplicationSlots(WalReceiverConn *wrconn)

1744{

1745 PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));

1746 {

1747 check_and_set_sync_info(InvalidPid);

1748

1749 validate_remote_info(wrconn);

1750

1751 synchronize_slots(wrconn);

1752

1753 /* Cleanup the synced temporary slots */

1754 ReplicationSlotCleanup(true);

1755

1756 /* We are done with sync, so reset sync flag */

1757 reset_syncing_flag();

1758 }

1759 PG_END_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));

1760}

UnBlockSig

sigset_t UnBlockSig

Definition: pqsignal.c:22

GetCurrentTimestamp

TimestampTz GetCurrentTimestamp(void)

Definition: timestamp.c:1645

now

Datum now(PG_FUNCTION_ARGS)

Definition: timestamp.c:1609

builtins.h

TextDatumGetCString

#define TextDatumGetCString(d)

Definition: builtins.h:98

NameStr

#define NameStr(name)

Definition: c.h:751

Min

#define Min(x, y)

Definition: c.h:1003

UINT64_FORMAT

#define UINT64_FORMAT

Definition: c.h:557

TransactionId

uint32 TransactionId

Definition: c.h:657

Size

size_t Size

Definition: c.h:610

TimestampTz

int64 TimestampTz

Definition: timestamp.h:39

get_database_oid

Oid get_database_oid(const char *dbname, bool missing_ok)

Definition: dbcommands.c:3167

load_file

void load_file(const char *filename, bool restricted)

Definition: dfmgr.c:149

errmsg_internal

int errmsg_internal(const char *fmt,...)

Definition: elog.c:1161

EmitErrorReport

void EmitErrorReport(void)

Definition: elog.c:1695

errdetail_internal

int errdetail_internal(const char *fmt,...)

Definition: elog.c:1234

errdetail

int errdetail(const char *fmt,...)

Definition: elog.c:1207

error_context_stack

ErrorContextCallback * error_context_stack

Definition: elog.c:95

errhint

int errhint(const char *fmt,...)

Definition: elog.c:1321

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

PG_exception_stack

sigjmp_buf * PG_exception_stack

Definition: elog.c:97

LOG

#define LOG

Definition: elog.h:31

DEBUG1

#define DEBUG1

Definition: elog.h:30

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ereport

#define ereport(elevel,...)

Definition: elog.h:150

err

void err(int eval, const char *fmt,...)

Definition: err.c:43

MakeSingleTupleTableSlot

TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)

Definition: execTuples.c:1427

TTSOpsMinimalTuple

const TupleTableSlotOps TTSOpsMinimalTuple

Definition: execTuples.c:86

MyProcPid

int MyProcPid

Definition: globals.c:47

MyLatch

struct Latch * MyLatch

Definition: globals.c:63

ProcessConfigFile

void ProcessConfigFile(GucContext context)

Definition: guc-file.l:120

SetConfigOption

void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)

Definition: guc.c:4338

PGC_S_OVERRIDE

@ PGC_S_OVERRIDE

Definition: guc.h:123

PGC_SUSET

@ PGC_SUSET

Definition: guc.h:78

PGC_SIGHUP

@ PGC_SIGHUP

Definition: guc.h:75

cluster_name

char * cluster_name

Definition: guc_tables.c:555

Assert

Assert(PointerIsAligned(start, uint64))

SignalHandlerForShutdownRequest

void SignalHandlerForShutdownRequest(SIGNAL_ARGS)

Definition: interrupt.c:104

ShutdownRequestPending

volatile sig_atomic_t ShutdownRequestPending

Definition: interrupt.c:28

ConfigReloadPending

volatile sig_atomic_t ConfigReloadPending

Definition: interrupt.c:27

SignalHandlerForConfigReload

void SignalHandlerForConfigReload(SIGNAL_ARGS)

Definition: interrupt.c:61

interrupt.h

before_shmem_exit

void before_shmem_exit(pg_on_exit_callback function, Datum arg)

Definition: ipc.c:337

proc_exit

void proc_exit(int code)

Definition: ipc.c:104

ipc.h

PG_ENSURE_ERROR_CLEANUP

#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)

Definition: ipc.h:47

PG_END_ENSURE_ERROR_CLEANUP

#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)

Definition: ipc.h:52

i

int i

Definition: isn.c:77

ResetLatch

void ResetLatch(Latch *latch)

Definition: latch.c:374

WaitLatch

int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)

Definition: latch.c:172

lappend

List * lappend(List *list, void *datum)

Definition: list.c:339

list_free_deep

void list_free_deep(List *list)

Definition: list.c:1560

LockSharedObject

void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)

Definition: lmgr.c:1088

UnlockSharedObject

void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)

Definition: lmgr.c:1148

lmgr.h

AccessShareLock

#define AccessShareLock

Definition: lockdefs.h:36

LogicalSlotAdvanceAndCheckSnapState

XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)

Definition: logical.c:2081

logical.h

LWLockAcquire

bool LWLockAcquire(LWLock *lock, LWLockMode mode)

Definition: lwlock.c:1174

LWLockRelease

void LWLockRelease(LWLock *lock)

Definition: lwlock.c:1894

LW_SHARED

@ LW_SHARED

Definition: lwlock.h:113

LW_EXCLUSIVE

@ LW_EXCLUSIVE

Definition: lwlock.h:112

pstrdup

char * pstrdup(const char *in)

Definition: mcxt.c:1759

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

palloc0

void * palloc0(Size size)

Definition: mcxt.c:1395

NormalProcessing

@ NormalProcessing

Definition: miscadmin.h:471

InitProcessing

@ InitProcessing

Definition: miscadmin.h:470

GetProcessingMode

#define GetProcessingMode()

Definition: miscadmin.h:480

CHECK_FOR_INTERRUPTS

#define CHECK_FOR_INTERRUPTS()

Definition: miscadmin.h:122

AmLogicalSlotSyncWorkerProcess

#define AmLogicalSlotSyncWorkerProcess()

Definition: miscadmin.h:385

HOLD_INTERRUPTS

#define HOLD_INTERRUPTS()

Definition: miscadmin.h:133

SetProcessingMode

#define SetProcessingMode(mode)

Definition: miscadmin.h:482

B_SLOTSYNC_WORKER

@ B_SLOTSYNC_WORKER

Definition: miscadmin.h:347

InvalidPid

#define InvalidPid

Definition: miscadmin.h:32

MyBackendType

BackendType MyBackendType

Definition: miscinit.c:64

namestrcpy

void namestrcpy(Name name, const char *str)

Definition: name.c:233

arg

void * arg

Definition: pg_backup_utils.c:29

pg_database.h

NIL

#define NIL

Definition: pg_list.h:68

foreach_ptr

#define foreach_ptr(type, var, lst)

Definition: pg_list.h:469

pg_lsn.h

DatumGetLSN

static XLogRecPtr DatumGetLSN(Datum X)

Definition: pg_lsn.h:25

die

#define die(msg)

Definition: pg_test_fsync.c:100

pgstat.h

pqsignal

#define pqsignal

Definition: port.h:531

FloatExceptionHandler

void FloatExceptionHandler(SIGNAL_ARGS)

Definition: postgres.c:3078

postgres.h

DatumGetBool

static bool DatumGetBool(Datum X)

Definition: postgres.h:100

PointerGetDatum

static Datum PointerGetDatum(const void *X)

Definition: postgres.h:332

Datum

uint64_t Datum

Definition: postgres.h:70

DatumGetPointer

static Pointer DatumGetPointer(Datum X)

Definition: postgres.h:322

DatumGetTransactionId

static TransactionId DatumGetTransactionId(Datum X)

Definition: postgres.h:272

InvalidOid

#define InvalidOid

Definition: postgres_ext.h:37

Oid

unsigned int Oid

Definition: postgres_ext.h:32

BaseInit

void BaseInit(void)

Definition: postinit.c:611

InitPostgres

void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)

Definition: postinit.c:711

pqsignal.h

proc.h

GetOldestSafeDecodingTransactionId

TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)

Definition: procarray.c:2907

procarray.h

procsignal_sigusr1_handler

void procsignal_sigusr1_handler(SIGNAL_ARGS)

Definition: procsignal.c:674

init_ps_display

void init_ps_display(const char *fixed_part)

Definition: ps_status.c:285

ps_status.h

quote_literal_cstr

char * quote_literal_cstr(const char *rawstr)

Definition: quote.c:103

ShmemInitStruct

void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)

Definition: shmem.c:387

ReplicationSlotAcquire

void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)

Definition: slot.c:593

ReplicationSlotCreate

void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)

Definition: slot.c:352

ReplicationSlotDropAcquired

void ReplicationSlotDropAcquired(void)

Definition: slot.c:964

ReplicationSlotMarkDirty

void ReplicationSlotMarkDirty(void)

Definition: slot.c:1106

GetSlotInvalidationCause

ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)

Definition: slot.c:2707

ReplicationSlotsComputeRequiredXmin

void ReplicationSlotsComputeRequiredXmin(bool already_locked)

Definition: slot.c:1145

ReplicationSlotPersist

void ReplicationSlotPersist(void)

Definition: slot.c:1123

MyReplicationSlot

ReplicationSlot * MyReplicationSlot

Definition: slot.c:148

ReplicationSlotSave

void ReplicationSlotSave(void)

Definition: slot.c:1088

SearchNamedReplicationSlot

ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)

Definition: slot.c:513

ReplicationSlotRelease

void ReplicationSlotRelease(void)

Definition: slot.c:731

max_replication_slots

int max_replication_slots

Definition: slot.c:151

ReplicationSlotCtl

ReplicationSlotCtlData * ReplicationSlotCtl

Definition: slot.c:145

ReplicationSlotsComputeRequiredLSN

void ReplicationSlotsComputeRequiredLSN(void)

Definition: slot.c:1201

ReplicationSlotCleanup

void ReplicationSlotCleanup(bool synced_only)

Definition: slot.c:820

RS_TEMPORARY

@ RS_TEMPORARY

Definition: slot.h:47

ReplicationSlotInvalidationCause

Definition: slot.h:59

RS_INVAL_NONE

@ RS_INVAL_NONE

Definition: slot.h:60

SlotIsLogical

#define SlotIsLogical(slot)

Definition: slot.h:255

ReplicationSlotSetInactiveSince

static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)

Definition: slot.h:273

get_local_synced_slots

static List * get_local_synced_slots(void)

Definition: slotsync.c:348

MIN_SLOTSYNC_WORKER_NAPTIME_MS

#define MIN_SLOTSYNC_WORKER_NAPTIME_MS

Definition: slotsync.c:113

PRIMARY_INFO_OUTPUT_COL_COUNT

#define PRIMARY_INFO_OUTPUT_COL_COUNT

slotsync_worker_disconnect

static void slotsync_worker_disconnect(int code, Datum arg)

Definition: slotsync.c:1195

SyncReplicationSlots

void SyncReplicationSlots(WalReceiverConn *wrconn)

Definition: slotsync.c:1743

local_sync_slot_required

static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)

Definition: slotsync.c:379

drop_local_obsolete_slots

static void drop_local_obsolete_slots(List *remote_slot_list)

Definition: slotsync.c:432

reserve_wal_for_local_slot

static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)

Definition: slotsync.c:489

ShutDownSlotSync

void ShutDownSlotSync(void)

Definition: slotsync.c:1580

update_and_persist_local_synced_slot

static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)

Definition: slotsync.c:560

sync_replication_slots

bool sync_replication_slots

Definition: slotsync.c:106

SlotSyncCtx

static SlotSyncCtxStruct * SlotSyncCtx

Definition: slotsync.c:103

slotsync_failure_callback

static void slotsync_failure_callback(int code, Datum arg)

Definition: slotsync.c:1706

SLOTSYNC_COLUMN_COUNT

#define SLOTSYNC_COLUMN_COUNT

sleep_ms

static long sleep_ms

Definition: slotsync.c:116

SLOTSYNC_RESTART_INTERVAL_SEC

#define SLOTSYNC_RESTART_INTERVAL_SEC

Definition: slotsync.c:119

reset_syncing_flag

static void reset_syncing_flag()

Definition: slotsync.c:1333

CheckAndGetDbnameFromConninfo

char * CheckAndGetDbnameFromConninfo(void)

Definition: slotsync.c:1030

syncing_slots

static bool syncing_slots

Definition: slotsync.c:126

RemoteSlot

struct RemoteSlot RemoteSlot

ProcessSlotSyncInterrupts

static void ProcessSlotSyncInterrupts(void)

Definition: slotsync.c:1173

SlotSyncCtxStruct

struct SlotSyncCtxStruct SlotSyncCtxStruct

MAX_SLOTSYNC_WORKER_NAPTIME_MS

#define MAX_SLOTSYNC_WORKER_NAPTIME_MS

Definition: slotsync.c:114

synchronize_slots

static bool synchronize_slots(WalReceiverConn *wrconn)

Definition: slotsync.c:806

SlotSyncWorkerCanRestart

bool SlotSyncWorkerCanRestart(void)

Definition: slotsync.c:1648

synchronize_one_slot

static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)

Definition: slotsync.c:624

wait_for_slot_activity

static void wait_for_slot_activity(bool some_slot_updated)

Definition: slotsync.c:1254

slotsync_reread_config

static void slotsync_reread_config(void)

Definition: slotsync.c:1124

SlotSyncShmemInit

void SlotSyncShmemInit(void)

Definition: slotsync.c:1686

update_local_synced_slot

static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *found_consistent_snapshot, bool *remote_slot_precedes)

Definition: slotsync.c:166

slotsync_worker_onexit

static void slotsync_worker_onexit(int code, Datum arg)

Definition: slotsync.c:1208

check_and_set_sync_info

static void check_and_set_sync_info(pid_t worker_pid)

Definition: slotsync.c:1289

update_synced_slots_inactive_since

static void update_synced_slots_inactive_since(void)

Definition: slotsync.c:1530

ValidateSlotSyncParams

bool ValidateSlotSyncParams(int elevel)

Definition: slotsync.c:1057

validate_remote_info

static void validate_remote_info(WalReceiverConn *wrconn)

Definition: slotsync.c:952

IsSyncingReplicationSlots

bool IsSyncingReplicationSlots(void)

Definition: slotsync.c:1668

ReplSlotSyncWorkerMain

void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)

Definition: slotsync.c:1349

SlotSyncShmemSize

Size SlotSyncShmemSize(void)

Definition: slotsync.c:1677

slotsync.h

SnapBuildSnapshotExists

bool SnapBuildSnapshotExists(XLogRecPtr lsn)

Definition: snapbuild.c:2058

snapbuild.h

SpinLockInit

#define SpinLockInit(lock)

Definition: spin.h:57

SpinLockRelease

#define SpinLockRelease(lock)

Definition: spin.h:61

SpinLockAcquire

#define SpinLockAcquire(lock)

Definition: spin.h:59

InitProcess

void InitProcess(void)

Definition: proc.c:390

dbname

char * dbname

Definition: streamutil.c:49

appendStringInfo

void appendStringInfo(StringInfo str, const char *fmt,...)

Definition: stringinfo.c:145

appendStringInfoString

void appendStringInfoString(StringInfo str, const char *s)

Definition: stringinfo.c:230

initStringInfo

void initStringInfo(StringInfo str)

Definition: stringinfo.c:97

List

Definition: pg_list.h:54

RemoteSlot

Definition: slotsync.c:133

RemoteSlot::two_phase

bool two_phase

Definition: slotsync.c:137

RemoteSlot::plugin

char * plugin

Definition: slotsync.c:135

RemoteSlot::name

char * name

Definition: slotsync.c:134

RemoteSlot::database

char * database

Definition: slotsync.c:136

RemoteSlot::failover

bool failover

Definition: slotsync.c:138

RemoteSlot::invalidated

ReplicationSlotInvalidationCause invalidated

Definition: slotsync.c:145

RemoteSlot::confirmed_lsn

XLogRecPtr confirmed_lsn

Definition: slotsync.c:140

RemoteSlot::restart_lsn

XLogRecPtr restart_lsn

Definition: slotsync.c:139

RemoteSlot::two_phase_at

XLogRecPtr two_phase_at

Definition: slotsync.c:141

RemoteSlot::catalog_xmin

TransactionId catalog_xmin

Definition: slotsync.c:142

ReplicationSlotCtlData::replication_slots

ReplicationSlot replication_slots[1]

Definition: slot.h:266

ReplicationSlotPersistentData::database

Oid database

Definition: slot.h:83

ReplicationSlotPersistentData::two_phase_at

XLogRecPtr two_phase_at

Definition: slot.h:124

ReplicationSlotPersistentData::catalog_xmin

TransactionId catalog_xmin

Definition: slot.h:104

ReplicationSlotPersistentData::restart_lsn

XLogRecPtr restart_lsn

Definition: slot.h:107

ReplicationSlotPersistentData::two_phase

bool two_phase

Definition: slot.h:129

ReplicationSlotPersistentData::confirmed_flush

XLogRecPtr confirmed_flush

Definition: slot.h:118

ReplicationSlotPersistentData::plugin

NameData plugin

Definition: slot.h:132

ReplicationSlotPersistentData::synced

bool synced

Definition: slot.h:137

ReplicationSlotPersistentData::failover

bool failover

Definition: slot.h:143

ReplicationSlotPersistentData::name

NameData name

Definition: slot.h:80

ReplicationSlotPersistentData::persistency

ReplicationSlotPersistency persistency

Definition: slot.h:88

ReplicationSlotPersistentData::invalidated

ReplicationSlotInvalidationCause invalidated

Definition: slot.h:110

ReplicationSlot

Definition: slot.h:163

ReplicationSlot::effective_catalog_xmin

TransactionId effective_catalog_xmin

Definition: slot.h:189

ReplicationSlot::mutex

slock_t mutex

Definition: slot.h:165

ReplicationSlot::active_pid

pid_t active_pid

Definition: slot.h:171

ReplicationSlot::in_use

bool in_use

Definition: slot.h:168

ReplicationSlot::data

ReplicationSlotPersistentData data

Definition: slot.h:192

SlotSyncCtxStruct

Definition: slotsync.c:95

SlotSyncCtxStruct::mutex

slock_t mutex

Definition: slotsync.c:100

SlotSyncCtxStruct::last_start_time

time_t last_start_time

Definition: slotsync.c:99

SlotSyncCtxStruct::syncing

bool syncing

Definition: slotsync.c:98

SlotSyncCtxStruct::pid

pid_t pid

Definition: slotsync.c:96

SlotSyncCtxStruct::stopSignaled

bool stopSignaled

Definition: slotsync.c:97

StringInfoData

Definition: stringinfo.h:47

StringInfoData::data

char * data

Definition: stringinfo.h:48

TupleTableSlot

Definition: tuptable.h:114

WalRcvExecResult

Definition: walreceiver.h:219

WalRcvExecResult::tuplestore

Tuplestorestate * tuplestore

Definition: walreceiver.h:223

WalRcvExecResult::tupledesc

TupleDesc tupledesc

Definition: walreceiver.h:224

WalRcvExecResult::err

char * err

Definition: walreceiver.h:222

WalRcvExecResult::status

WalRcvExecStatus status

Definition: walreceiver.h:220

WalReceiverConn

Definition: libpqwalreceiver.c:46

nameData

Definition: c.h:746

tcopprot.h

time.h

InitializeTimeouts

void InitializeTimeouts(void)

Definition: timeout.c:470

timeout.h

TransactionIdPrecedes

bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)

Definition: transam.c:280

TransactionIdFollows

bool TransactionIdFollows(TransactionId id1, TransactionId id2)

Definition: transam.c:314

InvalidTransactionId

#define InvalidTransactionId

Definition: transam.h:31

TransactionIdIsValid

#define TransactionIdIsValid(xid)

Definition: transam.h:41

tuplestore_gettupleslot

bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)

Definition: tuplestore.c:1130

slot_getattr

static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)

Definition: tuptable.h:398

ExecClearTuple

static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)

Definition: tuptable.h:457

WL_TIMEOUT

#define WL_TIMEOUT

Definition: waiteventset.h:37

WL_EXIT_ON_PM_DEATH

#define WL_EXIT_ON_PM_DEATH

Definition: waiteventset.h:39

WL_LATCH_SET

#define WL_LATCH_SET

Definition: waiteventset.h:34

wrconn

static WalReceiverConn * wrconn

Definition: walreceiver.c:93

hot_standby_feedback

bool hot_standby_feedback

Definition: walreceiver.c:90

walrcv_connect

#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)

Definition: walreceiver.h:435

WALRCV_OK_TUPLES

@ WALRCV_OK_TUPLES

Definition: walreceiver.h:207

walrcv_clear_result

static void walrcv_clear_result(WalRcvExecResult *walres)

Definition: walreceiver.h:471

walrcv_get_dbname_from_conninfo

#define walrcv_get_dbname_from_conninfo(conninfo)

Definition: walreceiver.h:445

walrcv_exec

#define walrcv_exec(conn, exec, nRetTypes, retTypes)

Definition: walreceiver.h:465

walrcv_disconnect

#define walrcv_disconnect(conn)

Definition: walreceiver.h:467

GetWalRcvFlushRecPtr

XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)

Definition: walreceiverfuncs.c:332

GetStandbyFlushRecPtr

XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)

Definition: walsender.c:3617

SIGCHLD

#define SIGCHLD

Definition: win32_port.h:168

SIGHUP

#define SIGHUP

Definition: win32_port.h:158

SIGPIPE

#define SIGPIPE

Definition: win32_port.h:163

kill

#define kill(pid, sig)

Definition: win32_port.h:493

SIGUSR1

#define SIGUSR1

Definition: win32_port.h:170

SIGUSR2

#define SIGUSR2

Definition: win32_port.h:171

IsTransactionState

bool IsTransactionState(void)

Definition: xact.c:387

StartTransactionCommand

void StartTransactionCommand(void)

Definition: xact.c:3071

CommitTransactionCommand

void CommitTransactionCommand(void)

Definition: xact.c:3169

XLogGetLastRemovedSegno

XLogSegNo XLogGetLastRemovedSegno(void)

Definition: xlog.c:3774

wal_level

int wal_level

Definition: xlog.c:132

wal_segment_size

int wal_segment_size

Definition: xlog.c:144

XLogGetOldestSegno

XLogSegNo XLogGetOldestSegno(TimeLineID tli)

Definition: xlog.c:3790

WAL_LEVEL_LOGICAL

@ WAL_LEVEL_LOGICAL

Definition: xlog.h:76

xlog_internal.h

XLogSegNoOffsetToRecPtr

#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

Definition: xlog_internal.h:103

XLByteToSeg

#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

Definition: xlog_internal.h:117

LSN_FORMAT_ARGS

#define LSN_FORMAT_ARGS(lsn)

Definition: xlogdefs.h:46

XLogRecPtrIsInvalid

#define XLogRecPtrIsInvalid(r)

Definition: xlogdefs.h:29

XLogRecPtr

uint64 XLogRecPtr

Definition: xlogdefs.h:21

InvalidXLogRecPtr

#define InvalidXLogRecPtr

Definition: xlogdefs.h:28

TimeLineID

uint32 TimeLineID

Definition: xlogdefs.h:62

XLogSegNo

uint64 XLogSegNo

Definition: xlogdefs.h:51

PrimarySlotName

char * PrimarySlotName

Definition: xlogrecovery.c:99

StandbyMode

bool StandbyMode

Definition: xlogrecovery.c:149

PrimaryConnInfo

char * PrimaryConnInfo

Definition: xlogrecovery.c:98

xlogrecovery.h

PostgreSQL Source Code: src/backend/replication/logical/slotsync.c Source File