3 * Range map for BRIN indexes
5 * The range map (revmap) is a translation structure for BRIN indexes: for each
6 * page range there is one summary tuple, and its location is tracked by the
7 * revmap. Whenever a new tuple is inserted into a table that violates the
8 * previously recorded summary values, a new tuple is inserted into the index
9 * and the revmap is updated to point to it.
11 * The revmap is stored in the first pages of the index, immediately following
12 * the metapage. When the revmap needs to be expanded, all tuples on the
13 * regular BRIN page at that block (if any) are moved out of the way.
15 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
19 * src/backend/access/brin/brin_revmap.c
36 * In revmap pages, each item stores an ItemPointerData. These defines let one
37 * find the logical revmap page number and index number of the revmap item for
38 * the given heap block number.
40 #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
41 ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
42 #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
43 ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
55/* typedef appears in brin_revmap.h */
66 * Initialize an access object for a range map. This must be freed by
67 * brinRevmapTerminate when caller is done with it.
97 * Release resources associated with a revmap access object.
109 * Extend the revmap to cover the given heap block number.
118 /* Ensure the buffer we got is in the expected range */
121 mapBlk <= revmap->rm_lastRevmapPage);
125 * Prepare to insert an entry into the revmap; the revmap buffer in which the
126 * entry is to reside is locked and returned. Most callers should call
127 * brinRevmapExtend beforehand, as this routine does not extend the revmap if
128 * it's not long enough.
130 * The returned buffer is also recorded in the revmap struct; finishing that
131 * releases the buffer, therefore the caller needn't do it explicitly.
145 * In the given revmap buffer (locked appropriately by caller), which is used
146 * in a BRIN index of pagesPerRange pages per range, set the element
147 * corresponding to heap block number heapBlk to the given TID.
149 * Once the operation is complete, the caller must update the LSN on the
152 * This is used both in regular operation and during WAL replay.
162 /* The correct page should already be pinned and locked */
177 * Fetch the BrinTuple for a given heap block.
179 * The buffer containing the tuple is locked, and returned in *buf. The
180 * returned tuple points to the shared buffer and must not be freed; if caller
181 * wants to use it after releasing the buffer lock, it must create its own
182 * palloc'ed copy. As an optimization, the caller can pass a pinned buffer
183 * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on
184 * the same page as a previous one.
186 * If no tuple is found for the given heap range, returns NULL. In that case,
187 * *buf might still be updated (and pin must be released by caller), but it's
190 * The output tuple offset within the buffer is returned in *off, and its size
191 * is returned in *size.
207 /* normalize the heap block number to be the first page in the range */
211 * Compute the revmap page number we need. If Invalid is returned (i.e.,
212 * the revmap page hasn't been created yet), the requested page range is
251 * Check the TID we got in a previous iteration, if any, and save the
252 * current TID we got from the revmap; if we loop, we can sanity-check
253 * that the next one we get is different. Otherwise we might be stuck
254 * looping forever if the revmap is somehow badly broken.
258 (
errcode(ERRCODE_INDEX_CORRUPTED),
267 /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */
277 /* If we land on a revmap page, start over */
281 * If the offset number is greater than what's in the page, it's
282 * possible that the range was desummarized concurrently. Just
283 * return NULL to handle that case.
307 * No luck. Assume that the revmap was updated concurrently.
311 /* not reached, but keep compiler quiet */
316 * Delete an index tuple, marking a page range as unsummarized.
318 * Index must be locked in ShareUpdateExclusiveLock mode.
320 * Return false if caller should retry.
344 /* revmap page doesn't exist: range not summarized, we're done */
349 /* Lock the revmap page, obtain the index tuple pointer from it */
356 iptr += revmapOffset;
360 /* no index tuple: range not summarized, we're done */
370 /* if this is no longer a regular page, tell caller to start over */
382 (
errcode(ERRCODE_INDEX_CORRUPTED),
383 errmsg(
"corrupted BRIN index: inconsistent range map")));
388 (
errcode(ERRCODE_INDEX_CORRUPTED),
389 errmsg(
"corrupted BRIN index: inconsistent range map")));
392 * Placeholder tuples only appear during unfinished summarization, and we
393 * hold ShareUpdateExclusiveLock, so this function cannot run concurrently
394 * with that. So any placeholder tuples that exist are leftovers from a
395 * crashed or aborted summarization; remove them silently.
404 /* XXX record free space in FSM? */
437 * Given a heap block number, find the corresponding physical revmap block
438 * number and return it. If the revmap page hasn't been allocated yet, return
439 * InvalidBlockNumber.
446 /* obtain revmap block number, skip 1 for metapage block */
449 /* Normal case: the revmap page is already allocated */
450 if (targetblk <= revmap->rm_lastRevmapPage)
457 * Obtain and return a buffer containing the revmap page for the given heap
458 * page. The revmap must have been previously extended to cover that page.
459 * The returned buffer is also recorded in the revmap struct; finishing that
460 * releases the buffer, therefore the caller needn't do it explicitly.
467 /* Translate the heap block number to physical index location. */
471 elog(
ERROR,
"revmap does not cover heap block %u", heapBlk);
473 /* Ensure the buffer we got is in the expected range */
475 mapBlk <= revmap->rm_lastRevmapPage);
478 * Obtain the buffer from which we need to read. If we already have the
479 * correct buffer in our access struct, use that; otherwise, release that,
480 * (if valid) and read the one we need.
495 * Given a heap block number, find the corresponding physical revmap block
496 * number and return it. If the revmap page hasn't been allocated yet, extend
497 * the revmap until it is.
504 /* obtain revmap block number, skip 1 for metapage block */
507 /* Extend the revmap, if necessary */
518 * Try to extend the revmap by one page. This might not happen for a number of
519 * reasons; caller is expected to retry until the expected outcome is obtained.
533 * Lock the metapage. This locks out concurrent extensions of the revmap,
534 * but note that we still need to grab the relation extension lock because
535 * another backend can extend the index with regular BRIN pages.
542 * Check that our cached lastRevmapPage value was up-to-date; if it
543 * wasn't, update the cached copy and have caller start over.
554 if (mapBlk < nblocks)
567 * Very rare corner case: somebody extended the relation
568 * concurrently after we read its length. If this happens, give
569 * up and have caller start over. We will have to evacuate that
570 * page from under whoever is using it.
579 /* Check that it's a regular block (or an empty page) */
582 (
errcode(ERRCODE_INDEX_CORRUPTED),
583 errmsg(
"unexpected page type 0x%04X in BRIN index \"%s\" block %u",
588 /* If the page is in use, evacuate it and restart */
594 /* have caller start over */
599 * Ok, we have now locked the metapage and the target block. Re-initialize
600 * the target block as a revmap page, and update the metapage.
604 /* the rm_tids array is initialized to all invalid by PageInit */
611 * Set pd_lower just past the end of the metadata. This is essential,
612 * because without doing so, metadata will be lost if xlog.c compresses
613 * the page. (We must do this here because pre-v11 versions of PG did not
614 * set the metapage's pd_lower correctly, so a pg_upgraded index might
615 * contain the wrong value.)
#define InvalidBlockNumber
static bool BlockNumberIsValid(BlockNumber blockNumber)
#define BRIN_PAGETYPE_REVMAP
#define BRIN_METAPAGE_BLKNO
#define BrinPageType(page)
#define BRIN_IS_REGULAR_PAGE(page)
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
void brin_page_init(Page page, uint16 type)
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
void brinRevmapTerminate(BrinRevmap *revmap)
static void revmap_physical_extend(BrinRevmap *revmap)
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode)
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
#define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk)
#define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk)
static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk)
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
#define XLOG_BRIN_REVMAP_EXTEND
#define SizeOfBrinRevmapExtend
#define XLOG_BRIN_DESUMMARIZE
#define SizeOfBrinDesummarize
BlockNumber BufferGetBlockNumber(Buffer buffer)
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
void ReleaseBuffer(Buffer buffer)
void UnlockReleaseBuffer(Buffer buffer)
void MarkBufferDirty(Buffer buffer)
void LockBuffer(Buffer buffer, int mode)
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
#define BUFFER_LOCK_UNLOCK
#define BUFFER_LOCK_SHARE
#define RelationGetNumberOfBlocks(reln)
static Page BufferGetPage(Buffer buffer)
#define BUFFER_LOCK_EXCLUSIVE
static bool BufferIsValid(Buffer bufnum)
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
PageHeaderData * PageHeader
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
static bool PageIsNew(const PageData *page)
static char * PageGetContents(Page page)
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
static void PageSetLSN(Page page, XLogRecPtr lsn)
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
#define PG_USED_FOR_ASSERTS_ONLY
int errmsg_internal(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
Assert(PointerIsAligned(start, uint64))
#define ItemIdGetLength(itemId)
#define ItemIdIsUsed(itemId)
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
static void ItemPointerSetInvalid(ItemPointerData *pointer)
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
static bool ItemPointerIsValid(const ItemPointerData *pointer)
void pfree(void *pointer)
#define START_CRIT_SECTION()
#define CHECK_FOR_INTERRUPTS()
#define END_CRIT_SECTION()
#define InvalidOffsetNumber
static PgChecksumMode mode
#define RelationGetRelationName(relation)
#define RelationNeedsWAL(relation)
BlockNumber lastRevmapPage
BlockNumber pagesPerRange
BlockNumber rm_pagesPerRange
BlockNumber rm_lastRevmapPage
ItemPointerData rm_tids[1]
BlockNumber pagesPerRange
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
void XLogRegisterData(const void *data, uint32 len)
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
void XLogBeginInsert(void)