[フレーム]

visibilitymap.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 *

3 * visibilitymap.c

4 * bitmap for tracking visibility of heap tuples

5 *

8 *

9 *

10 * IDENTIFICATION

11 * src/backend/access/heap/visibilitymap.c

12 *

13 * INTERFACE ROUTINES

14 * visibilitymap_clear - clear bits for one page in the visibility map

15 * visibilitymap_pin - pin a map page for setting a bit

16 * visibilitymap_pin_ok - check whether correct map page is already pinned

17 * visibilitymap_set - set a bit in a previously pinned page

18 * visibilitymap_get_status - get status of bits

19 * visibilitymap_count - count number of bits set in visibility map

20 * visibilitymap_prepare_truncate -

21 * prepare for truncation of the visibility map

22 *

23 * NOTES

24 *

25 * The visibility map is a bitmap with two bits (all-visible and all-frozen)

26 * per heap page. A set all-visible bit means that all tuples on the page are

27 * known visible to all transactions, and therefore the page doesn't need to

28 * be vacuumed. A set all-frozen bit means that all tuples on the page are

29 * completely frozen, and therefore the page doesn't need to be vacuumed even

30 * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).

31 * The all-frozen bit must be set only when the page is already all-visible.

32 *

33 * The map is conservative in the sense that we make sure that whenever a bit

34 * is set, we know the condition is true, but if a bit is not set, it might or

35 * might not be true.

36 *

37 * Clearing visibility map bits is not separately WAL-logged. The callers

38 * must make sure that whenever a bit is cleared, the bit is cleared on WAL

39 * replay of the updating operation as well.

40 *

41 * When we *set* a visibility map during VACUUM, we must write WAL. This may

42 * seem counterintuitive, since the bit is basically a hint: if it is clear,

43 * it may still be the case that every tuple on the page is visible to all

44 * transactions; we just don't know that for certain. The difficulty is that

45 * there are two bits which are typically set together: the PD_ALL_VISIBLE bit

46 * on the page itself, and the visibility map bit. If a crash occurs after the

47 * visibility map page makes it to disk and before the updated heap page makes

48 * it to disk, redo must set the bit on the heap page. Otherwise, the next

49 * insert, update, or delete on the heap page will fail to realize that the

50 * visibility map bit must be cleared, possibly causing index-only scans to

51 * return wrong answers.

52 *

53 * VACUUM will normally skip pages for which the visibility map bit is set;

54 * such pages can't contain any dead tuples and therefore don't need vacuuming.

55 *

56 * LOCKING

57 *

58 * In heapam.c, whenever a page is modified so that not all tuples on the

59 * page are visible to everyone anymore, the corresponding bit in the

60 * visibility map is cleared. In order to be crash-safe, we need to do this

61 * while still holding a lock on the heap page and in the same critical

62 * section that logs the page modification. However, we don't want to hold

63 * the buffer lock over any I/O that may be required to read in the visibility

64 * map page. To avoid this, we examine the heap page before locking it;

65 * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map

66 * bit. Then, we lock the buffer. But this creates a race condition: there

67 * is a possibility that in the time it takes to lock the buffer, the

68 * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the

69 * buffer, pin the visibility map page, and relock the buffer. This shouldn't

70 * happen often, because only VACUUM currently sets visibility map bits,

71 * and the race will only occur if VACUUM processes a given page at almost

72 * exactly the same time that someone tries to further modify it.

73 *

74 * To set a bit, you need to hold a lock on the heap page. That prevents

75 * the race condition where VACUUM sees that all tuples on the page are

76 * visible to everyone, but another backend modifies the page before VACUUM

77 * sets the bit in the visibility map.

78 *

79 * When a bit is set, the LSN of the visibility map page is updated to make

80 * sure that the visibility map update doesn't get written to disk before the

81 * WAL record of the changes that made it possible to set the bit is flushed.

82 * But when a bit is cleared, we don't have to do that because it's always

83 * safe to clear a bit in the map from correctness point of view.

84 *

85 *-------------------------------------------------------------------------

86 */

87#include "postgres.h"

88

89#include "access/heapam_xlog.h"

90#include "access/visibilitymap.h"

91#include "access/xloginsert.h"

92#include "access/xlogutils.h"

93#include "miscadmin.h"

94#include "port/pg_bitutils.h"

95#include "storage/bufmgr.h"

96#include "storage/smgr.h"

97#include "utils/inval.h"

98#include "utils/rel.h"

99

100

101/*#define TRACE_VISIBILITYMAP */

102

103/*

104 * Size of the bitmap on each visibility map page, in bytes. There's no

105 * extra headers, so the whole page minus the standard page header is

106 * used for the bitmap.

107 */

108 #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))

109

110/* Number of heap blocks we can represent in one byte */

111 #define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)

112

113/* Number of heap blocks we can represent in one visibility map page. */

114 #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)

115

116/* Mapping from heap block number to the right bit in the visibility map */

117 #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)

118 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)

119 #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)

120

121/* Masks for counting subsets of bits in the visibility map. */

122 #define VISIBLE_MASK8 (0x55) /* The lower bit of each bit pair */

123 #define FROZEN_MASK8 (0xaa) /* The upper bit of each bit pair */

124

125/* prototypes for internal routines */

126static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);

127static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks);

128

129

130/*

131 * visibilitymap_clear - clear specified bits for one page in visibility map

132 *

133 * You must pass a buffer containing the correct map page to this function.

134 * Call visibilitymap_pin first to pin the right one. This function doesn't do

135 * any I/O. Returns true if any bits have been cleared and false otherwise.

136 */

137bool

138 visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)

139{

140 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);

141 int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);

142 int mapOffset = HEAPBLK_TO_OFFSET(heapBlk);

143 uint8 mask = flags << mapOffset;

144 char *map;

145 bool cleared = false;

146

147 /* Must never clear all_visible bit while leaving all_frozen bit set */

148 Assert(flags & VISIBILITYMAP_VALID_BITS);

149 Assert(flags != VISIBILITYMAP_ALL_VISIBLE);

150

151#ifdef TRACE_VISIBILITYMAP

152 elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);

153#endif

154

155 if (!BufferIsValid(vmbuf) || BufferGetBlockNumber(vmbuf) != mapBlock)

156 elog(ERROR, "wrong buffer passed to visibilitymap_clear");

157

158 LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);

159 map = PageGetContents(BufferGetPage(vmbuf));

160

161 if (map[mapByte] & mask)

162 {

163 map[mapByte] &= ~mask;

164

165 MarkBufferDirty(vmbuf);

166 cleared = true;

167 }

168

169 LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);

170

171 return cleared;

172}

173

174/*

175 * visibilitymap_pin - pin a map page for setting a bit

176 *

177 * Setting a bit in the visibility map is a two-phase operation. First, call

178 * visibilitymap_pin, to pin the visibility map page containing the bit for

179 * the heap page. Because that can require I/O to read the map page, you

180 * shouldn't hold a lock on the heap page while doing that. Then, call

181 * visibilitymap_set to actually set the bit.

182 *

183 * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by

184 * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same

185 * relation. On return, *vmbuf is a valid buffer with the map page containing

186 * the bit for heapBlk.

187 *

188 * If the page doesn't exist in the map file yet, it is extended.

189 */

190void

191 visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)

192{

193 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);

194

195 /* Reuse the old pinned buffer if possible */

196 if (BufferIsValid(*vmbuf))

197 {

198 if (BufferGetBlockNumber(*vmbuf) == mapBlock)

199 return;

200

201 ReleaseBuffer(*vmbuf);

202 }

203 *vmbuf = vm_readbuf(rel, mapBlock, true);

204}

205

206/*

207 * visibilitymap_pin_ok - do we already have the correct page pinned?

208 *

209 * On entry, vmbuf should be InvalidBuffer or a valid buffer returned by

210 * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same

211 * relation. The return value indicates whether the buffer covers the

212 * given heapBlk.

213 */

214bool

215 visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)

216{

217 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);

218

219 return BufferIsValid(vmbuf) && BufferGetBlockNumber(vmbuf) == mapBlock;

220}

221

222/*

223 * visibilitymap_set - set bit(s) on a previously pinned page

224 *

225 * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,

226 * or InvalidXLogRecPtr in normal running. The VM page LSN is advanced to the

227 * one provided; in normal running, we generate a new XLOG record and set the

228 * page LSN to that value (though the heap page's LSN may *not* be updated;

229 * see below). cutoff_xid is the largest xmin on the page being marked

230 * all-visible; it is needed for Hot Standby, and can be InvalidTransactionId

231 * if the page contains no tuples. It can also be set to InvalidTransactionId

232 * when a page that is already all-visible is being marked all-frozen.

233 *

234 * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling

235 * this function. Except in recovery, caller should also pass the heap

236 * buffer. When checksums are enabled and we're not in recovery, we must add

237 * the heap buffer to the WAL chain to protect it from being torn.

238 *

239 * You must pass a buffer containing the correct map page to this function.

240 * Call visibilitymap_pin first to pin the right one. This function doesn't do

241 * any I/O.

242 *

243 * Returns the state of the page's VM bits before setting flags.

244 */

245uint8

246 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,

247 XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,

248 uint8 flags)

249{

250 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);

251 uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);

252 uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);

253 Page page;

254 uint8 *map;

255 uint8 status;

256

257#ifdef TRACE_VISIBILITYMAP

258 elog(DEBUG1, "vm_set flags 0x%02X for %s %d",

259 flags, RelationGetRelationName(rel), heapBlk);

260#endif

261

262 Assert(InRecovery || XLogRecPtrIsInvalid(recptr));

263 Assert(InRecovery || PageIsAllVisible(BufferGetPage(heapBuf)));

264 Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);

265

266 /* Must never set all_frozen bit without also setting all_visible bit */

267 Assert(flags != VISIBILITYMAP_ALL_FROZEN);

268

269 /* Check that we have the right heap page pinned, if present */

270 if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)

271 elog(ERROR, "wrong heap buffer passed to visibilitymap_set");

272

273 Assert(!BufferIsValid(heapBuf) || BufferIsExclusiveLocked(heapBuf));

274

275 /* Check that we have the right VM page pinned */

276 if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)

277 elog(ERROR, "wrong VM buffer passed to visibilitymap_set");

278

279 page = BufferGetPage(vmBuf);

280 map = (uint8 *) PageGetContents(page);

281 LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);

282

283 status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;

284 if (flags != status)

285 {

286 START_CRIT_SECTION();

287

288 map[mapByte] |= (flags << mapOffset);

289 MarkBufferDirty(vmBuf);

290

291 if (RelationNeedsWAL(rel))

292 {

293 if (XLogRecPtrIsInvalid(recptr))

294 {

295 Assert(!InRecovery);

296 recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags);

297

298 /*

299 * If data checksums are enabled (or wal_log_hints=on), we

300 * need to protect the heap page from being torn.

301 *

302 * If not, then we must *not* update the heap page's LSN. In

303 * this case, the FPI for the heap page was omitted from the

304 * WAL record inserted above, so it would be incorrect to

305 * update the heap page's LSN.

306 */

307 if (XLogHintBitIsNeeded())

308 {

309 Page heapPage = BufferGetPage(heapBuf);

310

311 PageSetLSN(heapPage, recptr);

312 }

313 }

314 PageSetLSN(page, recptr);

315 }

316

317 END_CRIT_SECTION();

318 }

319

320 LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);

321 return status;

322}

323

324/*

325 * visibilitymap_get_status - get status of bits

326 *

327 * Are all tuples on heapBlk visible to all or are marked frozen, according

328 * to the visibility map?

329 *

330 * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by an

331 * earlier call to visibilitymap_pin or visibilitymap_get_status on the same

332 * relation. On return, *vmbuf is a valid buffer with the map page containing

333 * the bit for heapBlk, or InvalidBuffer. The caller is responsible for

334 * releasing *vmbuf after it's done testing and setting bits.

335 *

336 * NOTE: This function is typically called without a lock on the heap page,

337 * so somebody else could change the bit just after we look at it. In fact,

338 * since we don't lock the visibility map page either, it's even possible that

339 * someone else could have changed the bit just before we look at it, but yet

340 * we might see the old value. It is the caller's responsibility to deal with

341 * all concurrency issues!

342 */

343uint8

344 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)

345{

346 BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);

347 uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);

348 uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);

349 char *map;

350 uint8 result;

351

352#ifdef TRACE_VISIBILITYMAP

353 elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);

354#endif

355

356 /* Reuse the old pinned buffer if possible */

357 if (BufferIsValid(*vmbuf))

358 {

359 if (BufferGetBlockNumber(*vmbuf) != mapBlock)

360 {

361 ReleaseBuffer(*vmbuf);

362 *vmbuf = InvalidBuffer;

363 }

364 }

365

366 if (!BufferIsValid(*vmbuf))

367 {

368 *vmbuf = vm_readbuf(rel, mapBlock, false);

369 if (!BufferIsValid(*vmbuf))

370 return (uint8) 0;

371 }

372

373 map = PageGetContents(BufferGetPage(*vmbuf));

374

375 /*

376 * A single byte read is atomic. There could be memory-ordering effects

377 * here, but for performance reasons we make it the caller's job to worry

378 * about that.

379 */

380 result = ((map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS);

381 return result;

382}

383

384/*

385 * visibilitymap_count - count number of bits set in visibility map

386 *

387 * Note: we ignore the possibility of race conditions when the table is being

388 * extended concurrently with the call. New pages added to the table aren't

389 * going to be marked all-visible or all-frozen, so they won't affect the result.

390 */

391void

392 visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

393{

394 BlockNumber mapBlock;

395 BlockNumber nvisible = 0;

396 BlockNumber nfrozen = 0;

397

398 /* all_visible must be specified */

399 Assert(all_visible);

400

401 for (mapBlock = 0;; mapBlock++)

402 {

403 Buffer mapBuffer;

404 uint64 *map;

405

406 /*

407 * Read till we fall off the end of the map. We assume that any extra

408 * bytes in the last page are zeroed, so we don't bother excluding

409 * them from the count.

410 */

411 mapBuffer = vm_readbuf(rel, mapBlock, false);

412 if (!BufferIsValid(mapBuffer))

413 break;

414

415 /*

416 * We choose not to lock the page, since the result is going to be

417 * immediately stale anyway if anyone is concurrently setting or

418 * clearing bits, and we only really need an approximate value.

419 */

420 map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));

421

422 nvisible += pg_popcount_masked((const char *) map, MAPSIZE, VISIBLE_MASK8);

423 if (all_frozen)

424 nfrozen += pg_popcount_masked((const char *) map, MAPSIZE, FROZEN_MASK8);

425

426 ReleaseBuffer(mapBuffer);

427 }

428

429 *all_visible = nvisible;

430 if (all_frozen)

431 *all_frozen = nfrozen;

432}

433

434/*

435 * visibilitymap_prepare_truncate -

436 * prepare for truncation of the visibility map

437 *

438 * nheapblocks is the new size of the heap.

439 *

440 * Return the number of blocks of new visibility map.

441 * If it's InvalidBlockNumber, there is nothing to truncate;

442 * otherwise the caller is responsible for calling smgrtruncate()

443 * to truncate the visibility map pages.

444 */

445BlockNumber

446 visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)

447{

448 BlockNumber newnblocks;

449

450 /* last remaining block, byte, and bit */

451 BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks);

452 uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks);

453 uint8 truncOffset = HEAPBLK_TO_OFFSET(nheapblocks);

454

455#ifdef TRACE_VISIBILITYMAP

456 elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);

457#endif

458

459 /*

460 * If no visibility map has been created yet for this relation, there's

461 * nothing to truncate.

462 */

463 if (!smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM))

464 return InvalidBlockNumber;

465

466 /*

467 * Unless the new size is exactly at a visibility map page boundary, the

468 * tail bits in the last remaining map page, representing truncated heap

469 * blocks, need to be cleared. This is not only tidy, but also necessary

470 * because we don't get a chance to clear the bits if the heap is extended

471 * again.

472 */

473 if (truncByte != 0 || truncOffset != 0)

474 {

475 Buffer mapBuffer;

476 Page page;

477 char *map;

478

479 newnblocks = truncBlock + 1;

480

481 mapBuffer = vm_readbuf(rel, truncBlock, false);

482 if (!BufferIsValid(mapBuffer))

483 {

484 /* nothing to do, the file was already smaller */

485 return InvalidBlockNumber;

486 }

487

488 page = BufferGetPage(mapBuffer);

489 map = PageGetContents(page);

490

491 LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);

492

493 /* NO EREPORT(ERROR) from here till changes are logged */

494 START_CRIT_SECTION();

495

496 /* Clear out the unwanted bytes. */

497 MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));

498

499 /*----

500 * Mask out the unwanted bits of the last remaining byte.

501 *

502 * ((1 << 0) - 1) = 00000000

503 * ((1 << 1) - 1) = 00000001

504 * ...

505 * ((1 << 6) - 1) = 00111111

506 * ((1 << 7) - 1) = 01111111

507 *----

508 */

509 map[truncByte] &= (1 << truncOffset) - 1;

510

511 /*

512 * Truncation of a relation is WAL-logged at a higher-level, and we

513 * will be called at WAL replay. But if checksums are enabled, we need

514 * to still write a WAL record to protect against a torn page, if the

515 * page is flushed to disk before the truncation WAL record. We cannot

516 * use MarkBufferDirtyHint here, because that will not dirty the page

517 * during recovery.

518 */

519 MarkBufferDirty(mapBuffer);

520 if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())

521 log_newpage_buffer(mapBuffer, false);

522

523 END_CRIT_SECTION();

524

525 UnlockReleaseBuffer(mapBuffer);

526 }

527 else

528 newnblocks = truncBlock;

529

530 if (smgrnblocks(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM) <= newnblocks)

531 {

532 /* nothing to do, the file was already smaller than requested size */

533 return InvalidBlockNumber;

534 }

535

536 return newnblocks;

537}

538

539/*

540 * Read a visibility map page.

541 *

542 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is

543 * true, the visibility map file is extended.

544 */

545static Buffer

546 vm_readbuf(Relation rel, BlockNumber blkno, bool extend)

547{

548 Buffer buf;

549 SMgrRelation reln;

550

551 /*

552 * Caution: re-using this smgr pointer could fail if the relcache entry

553 * gets closed. It's safe as long as we only do smgr-level operations

554 * between here and the last use of the pointer.

555 */

556 reln = RelationGetSmgr(rel);

557

558 /*

559 * If we haven't cached the size of the visibility map fork yet, check it

560 * first.

561 */

562 if (reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber)

563 {

564 if (smgrexists(reln, VISIBILITYMAP_FORKNUM))

565 smgrnblocks(reln, VISIBILITYMAP_FORKNUM);

566 else

567 reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0;

568 }

569

570 /*

571 * For reading we use ZERO_ON_ERROR mode, and initialize the page if

572 * necessary. It's always safe to clear bits, so it's better to clear

573 * corrupt pages than error out.

574 *

575 * We use the same path below to initialize pages when extending the

576 * relation, as a concurrent extension can end up with vm_extend()

577 * returning an already-initialized page.

578 */

579 if (blkno >= reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM])

580 {

581 if (extend)

582 buf = vm_extend(rel, blkno + 1);

583 else

584 return InvalidBuffer;

585 }

586 else

587 buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,

588 RBM_ZERO_ON_ERROR, NULL);

589

590 /*

591 * Initializing the page when needed is trickier than it looks, because of

592 * the possibility of multiple backends doing this concurrently, and our

593 * desire to not uselessly take the buffer lock in the normal path where

594 * the page is OK. We must take the lock to initialize the page, so

595 * recheck page newness after we have the lock, in case someone else

596 * already did it. Also, because we initially check PageIsNew with no

597 * lock, it's possible to fall through and return the buffer while someone

598 * else is still initializing the page (i.e., we might see pd_upper as set

599 * but other page header fields are still zeroes). This is harmless for

600 * callers that will take a buffer lock themselves, but some callers

601 * inspect the page without any lock at all. The latter is OK only so

602 * long as it doesn't depend on the page header having correct contents.

603 * Current usage is safe because PageGetContents() does not require that.

604 */

605 if (PageIsNew(BufferGetPage(buf)))

606 {

607 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

608 if (PageIsNew(BufferGetPage(buf)))

609 PageInit(BufferGetPage(buf), BLCKSZ, 0);

610 LockBuffer(buf, BUFFER_LOCK_UNLOCK);

611 }

612 return buf;

613}

614

615/*

616 * Ensure that the visibility map fork is at least vm_nblocks long, extending

617 * it if necessary with zeroed pages.

618 */

619static Buffer

620 vm_extend(Relation rel, BlockNumber vm_nblocks)

621{

622 Buffer buf;

623

624 buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL,

625 EB_CREATE_FORK_IF_NEEDED |

626 EB_CLEAR_SIZE_CACHE,

627 vm_nblocks,

628 RBM_ZERO_ON_ERROR);

629

630 /*

631 * Send a shared-inval message to force other backends to close any smgr

632 * references they may have for this rel, which we are about to change.

633 * This is a useful optimization because it means that backends don't have

634 * to keep checking for creation or extension of the file, which happens

635 * infrequently.

636 */

637 CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);

638

639 return buf;

640}

BlockNumber

uint32 BlockNumber

Definition: block.h:31

InvalidBlockNumber

#define InvalidBlockNumber

Definition: block.h:33

Buffer

int Buffer

Definition: buf.h:23

InvalidBuffer

#define InvalidBuffer

Definition: buf.h:25

BufferIsExclusiveLocked

bool BufferIsExclusiveLocked(Buffer buffer)

Definition: bufmgr.c:2860

BufferGetBlockNumber

BlockNumber BufferGetBlockNumber(Buffer buffer)

Definition: bufmgr.c:4198

ExtendBufferedRelTo

Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)

Definition: bufmgr.c:922

ReleaseBuffer

void ReleaseBuffer(Buffer buffer)

Definition: bufmgr.c:5338

UnlockReleaseBuffer

void UnlockReleaseBuffer(Buffer buffer)

Definition: bufmgr.c:5355

MarkBufferDirty

void MarkBufferDirty(Buffer buffer)

Definition: bufmgr.c:2921

LockBuffer

void LockBuffer(Buffer buffer, int mode)

Definition: bufmgr.c:5572

ReadBufferExtended

Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)

Definition: bufmgr.c:805

bufmgr.h

BUFFER_LOCK_UNLOCK

#define BUFFER_LOCK_UNLOCK

Definition: bufmgr.h:196

BufferGetPage

static Page BufferGetPage(Buffer buffer)

Definition: bufmgr.h:417

EB_CLEAR_SIZE_CACHE

@ EB_CLEAR_SIZE_CACHE

Definition: bufmgr.h:90

EB_CREATE_FORK_IF_NEEDED

@ EB_CREATE_FORK_IF_NEEDED

Definition: bufmgr.h:84

BUFFER_LOCK_EXCLUSIVE

#define BUFFER_LOCK_EXCLUSIVE

Definition: bufmgr.h:198

RBM_ZERO_ON_ERROR

@ RBM_ZERO_ON_ERROR

Definition: bufmgr.h:51

BMR_REL

#define BMR_REL(p_rel)

Definition: bufmgr.h:111

BufferIsValid

static bool BufferIsValid(Buffer bufnum)

Definition: bufmgr.h:368

PageInit

void PageInit(Page page, Size pageSize, Size specialSize)

Definition: bufpage.c:42

PageIsAllVisible

static bool PageIsAllVisible(const PageData *page)

Definition: bufpage.h:429

PageIsNew

static bool PageIsNew(const PageData *page)

Definition: bufpage.h:234

PageGetContents

static char * PageGetContents(Page page)

Definition: bufpage.h:258

PageSetLSN

static void PageSetLSN(Page page, XLogRecPtr lsn)

Definition: bufpage.h:391

Page

PageData * Page

Definition: bufpage.h:82

uint8

uint8_t uint8

Definition: c.h:536

uint64

uint64_t uint64

Definition: c.h:539

uint32

uint32_t uint32

Definition: c.h:538

MemSet

#define MemSet(start, val, len)

Definition: c.h:1019

TransactionId

uint32 TransactionId

Definition: c.h:657

DEBUG1

#define DEBUG1

Definition: elog.h:30

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

Assert

Assert(PointerIsAligned(start, uint64))

log_heap_visible

XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, Buffer vm_buffer, TransactionId snapshotConflictHorizon, uint8 vmflags)

Definition: heapam.c:8804

heapam_xlog.h

CacheInvalidateSmgr

void CacheInvalidateSmgr(RelFileLocatorBackend rlocator)

Definition: inval.c:1751

inval.h

miscadmin.h

START_CRIT_SECTION

#define START_CRIT_SECTION()

Definition: miscadmin.h:149

END_CRIT_SECTION

#define END_CRIT_SECTION()

Definition: miscadmin.h:151

pg_bitutils.h

pg_popcount_masked

static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)

Definition: pg_bitutils.h:394

buf

static char * buf

Definition: pg_test_fsync.c:72

postgres.h

rel.h

RelationGetSmgr

static SMgrRelation RelationGetSmgr(Relation rel)

Definition: rel.h:576

RelationGetRelationName

#define RelationGetRelationName(relation)

Definition: rel.h:548

RelationNeedsWAL

#define RelationNeedsWAL(relation)

Definition: rel.h:637

VISIBILITYMAP_FORKNUM

@ VISIBILITYMAP_FORKNUM

Definition: relpath.h:60

smgrnblocks

BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)

Definition: smgr.c:819

smgrexists

bool smgrexists(SMgrRelation reln, ForkNumber forknum)

Definition: smgr.c:462

smgr.h

RelationData

Definition: rel.h:56

SMgrRelationData

Definition: smgr.h:36

SMgrRelationData::smgr_cached_nblocks

BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]

Definition: smgr.h:47

MAPSIZE

#define MAPSIZE

Definition: visibilitymap.c:108

FROZEN_MASK8

#define FROZEN_MASK8

Definition: visibilitymap.c:123

visibilitymap_pin_ok

bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)

Definition: visibilitymap.c:215

visibilitymap_clear

bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)

Definition: visibilitymap.c:138

VISIBLE_MASK8

#define VISIBLE_MASK8

Definition: visibilitymap.c:122

HEAPBLK_TO_OFFSET

#define HEAPBLK_TO_OFFSET(x)

Definition: visibilitymap.c:119

visibilitymap_pin

void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)

Definition: visibilitymap.c:191

visibilitymap_get_status

uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)

Definition: visibilitymap.c:344

vm_extend

static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks)

Definition: visibilitymap.c:620

visibilitymap_prepare_truncate

BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)

Definition: visibilitymap.c:446

visibilitymap_count

void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

Definition: visibilitymap.c:392

vm_readbuf

static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend)

Definition: visibilitymap.c:546

visibilitymap_set

uint8 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)

Definition: visibilitymap.c:246

HEAPBLK_TO_MAPBLOCK

#define HEAPBLK_TO_MAPBLOCK(x)

Definition: visibilitymap.c:117

HEAPBLK_TO_MAPBYTE

#define HEAPBLK_TO_MAPBYTE(x)

Definition: visibilitymap.c:118

visibilitymap.h

VISIBILITYMAP_VALID_BITS

#define VISIBILITYMAP_VALID_BITS

Definition: visibilitymapdefs.h:22

VISIBILITYMAP_ALL_FROZEN

#define VISIBILITYMAP_ALL_FROZEN

Definition: visibilitymapdefs.h:21

VISIBILITYMAP_ALL_VISIBLE

#define VISIBILITYMAP_ALL_VISIBLE

Definition: visibilitymapdefs.h:20

XLogHintBitIsNeeded

#define XLogHintBitIsNeeded()

Definition: xlog.h:120

XLogRecPtrIsInvalid

#define XLogRecPtrIsInvalid(r)

Definition: xlogdefs.h:29

XLogRecPtr

uint64 XLogRecPtr

Definition: xlogdefs.h:21

log_newpage_buffer

XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

Definition: xloginsert.c:1249

xloginsert.h

InRecovery

bool InRecovery

Definition: xlogutils.c:50

xlogutils.h

PostgreSQL Source Code: src/backend/access/heap/visibilitymap.c Source File