1/*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginbtree.c
12 *-------------------------------------------------------------------------
36 * Lock buffer by needed method for search.
48 if (searchMode ==
false)
50 /* we should relock our page */
54 /* But root can become non-leaf during relock */
57 /* restore old lock type (very rare) */
70 * Descend the tree to the leaf page that contains or would contain the key
71 * we're searching for. The key should already be filled in 'btree', in
72 * tree-type specific manner. If btree->fullScan is true, descends to the
75 * If 'searchmode' is false, on return stack->buffer is exclusively locked,
76 * and the stack represents the full path to the root. Otherwise stack->buffer
77 * is share-locked, and stack->parent is NULL.
79 * If 'rootConflictCheck' is true, tree root is checked for serialization
84 bool rootConflictCheck)
94 if (rootConflictCheck)
110 * If we're going to modify the tree, finish any incomplete splits we
111 * encounter on the way.
117 * ok, page is correctly locked, we should check to move right ..,
118 * root never has a right link, so small optimization
130 stack->
blkno = rightlink;
140 /* now we have correct buffer, try to find child */
149 /* in search mode we may forget path to leaf */
150 stack->
blkno = child;
159 stack->
blkno = child;
167 * Step right from current page.
169 * The next page is locked first, before releasing the current page. This is
170 * crucial to prevent concurrent VACUUM from deleting a page that we are about
171 * to step to. (The lock-coupling isn't strictly necessary when we are
172 * traversing the tree to find an insert location, because page deletion grabs
173 * a cleanup lock on the root to prevent any concurrent inserts. See Page
174 * deletion section in the README. But there's no harm in doing it always.)
189 /* Sanity check that the page we stepped to is of similar kind. */
192 elog(
ERROR,
"right sibling of GIN page is of different type");
213 * Try to find parent for current stack position. Returns correct parent and
214 * child's offset in stack->parent. The root page is never released, to
215 * prevent conflict with vacuum process.
229 * Unwind the stack all the way up to the root, leaving only the root
232 * Be careful not to release the pin on the root page! The pin on root
233 * page is required to lock out concurrent vacuums on the tree.
247 buffer =
root->buffer;
265 * parent may be wrong, but if so, the ginFinishSplit call will
266 * recurse to call ginFindParents again to fix it.
281 /* Link not present in this level */
283 /* Do not release pin on the root buffer */
284 if (buffer !=
root->buffer)
291 /* finish any incomplete splits, as above */
308 ptr->
parent =
root;
/* it may be wrong, but in next call we will
315 /* Descend down to next level */
316 blkno = leftmostBlkno;
322 * Insert a new item to a page.
324 * Returns true if the insertion was finished. On false, the page was split and
325 * the parent needs to be updated. (A root split returns true as it doesn't
326 * need any further action by the caller to complete.)
328 * When inserting a downlink to an internal page, 'childbuf' contains the
329 * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
330 * atomically with the insert. Also, the existing item at offset stack->off
331 * in the target page is updated to point to updateblkno.
333 * stack->buffer is locked on entry, and is kept locked.
334 * Likewise for childbuf, if given.
345 Page childpage = NULL;
346 Page newlpage = NULL,
348 void *ptp_workspace = NULL;
353 * We do all the work of this function and its subfunctions in a temporary
354 * memory context. This avoids leakages and simplifies APIs, since some
355 * subfunctions allocate storage that has to survive until we've finished
359 "ginPlaceToPage temporary context",
379 * See if the incoming tuple will fit on the page. beginPlaceToPage will
380 * decide if the page needs to be split, and will compute the split
381 * contents if so. See comments for beginPlaceToPage and execPlaceToPage
382 * functions for more details of the API here.
385 insertdata, updateblkno,
387 &newlpage, &newrpage);
396 /* It will fit, perform the insertion */
403 * Perform the page update, dirty and register stack->buffer, and
404 * register any extra WAL data.
407 insertdata, updateblkno, ptp_workspace);
409 /* An insert to an internal page finishes the split of the child. */
424 xlrec.
flags = xlflags;
429 * Log information about child if this was an insertion of a
448 /* Insertion is complete. */
454 * Didn't fit, need to split. The split has been computed in newlpage
455 * and newrpage, which are pointers to palloc'd pages, not associated
456 * with buffers. stack->buffer is not touched yet.
462 Page newrootpg = NULL;
464 /* Get a new index page to become the right page */
467 /* During index build, count the new page */
478 /* Begin setting up WAL record */
480 data.flags = xlflags;
489 if (stack->
parent == NULL)
492 * splitting the root, so we need to allocate new left page and
493 * place pointers to left and right page on root page.
497 /* During index build, count the new left page */
513 * Construct a new root page containing downlinks to the new left
514 * and right pages. (Do this in a temporary copy rather than
515 * overwriting the original page directly, since we're not in the
516 * critical section yet.)
539 /* splitting a non-root page */
540 data.rrlink = savedRightLink;
556 * OK, we have the new contents of the left page in a temporary copy
557 * now (newlpage), and likewise for the new contents of the
558 * newly-allocated right block. The original page is still unchanged.
560 * If this is a root split, we also have a temporary page containing
561 * the new contents of the root.
570 * Restore the temporary copies over the real buffers.
572 if (stack->
parent == NULL)
574 /* Splitting the root, three pages to update */
576 memcpy(page, newrootpg, BLCKSZ);
582 /* Normal split, only two pages to update */
583 memcpy(page, newlpage, BLCKSZ);
587 /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
594 /* write WAL record */
602 * We just take full page images of all the split pages. Splits
603 * are uncommon enough that it's not worth complicating the code
604 * to be more efficient.
606 if (stack->
parent == NULL)
626 if (stack->
parent == NULL)
634 * We can release the locks/pins on the new pages now, but keep
635 * stack->buffer locked. childbuf doesn't get unlocked either.
638 if (stack->
parent == NULL)
642 * If we split the root, we're done. Otherwise the split is not
643 * complete until the downlink for the new page has been inserted to
646 result = (stack->
parent == NULL);
650 elog(
ERROR,
"invalid return code from GIN beginPlaceToPage method: %d", rc);
651 result =
false;
/* keep compiler quiet */
654 /* Clean up temp context */
662 * Finish a split by inserting the downlink for the new page to parent.
664 * On entry, stack->buffer is exclusively locked.
666 * If freestack is true, all the buffers are released and unlocked as we
667 * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
668 * locked, and stack is unmodified, except for possibly moving right to find
669 * the correct parent of page.
679 /* this loop crawls up the stack until the insertion is complete */
686#ifdef USE_INJECTION_POINTS
693 /* search parent to lock */
697 * If the parent page was incompletely split, finish that split first,
698 * then continue with the current one.
700 * Note: we have to finish *all* incomplete splits we encounter, even
701 * if we have to move right. Otherwise we might choose as the target a
702 * page that has no downlink in the parent, and splitting it further
708 /* move right if it's needed */
715 * rightmost page, but we don't find parent, we should use
733 /* insert the downlink */
737 insertdata, updateblkno,
738 stack->
buffer, buildStats);
742 * If the caller requested to free the stack, unlock and release the
743 * child buffer now. Otherwise keep it pinned and locked, but if we
744 * have to recurse up the tree, we can unlock the upper pages, only
745 * keeping the page at the bottom of the stack locked.
747 if (!first || freestack)
759 /* unlock the parent */
767 * An entry point to ginFinishSplit() that is used when we stumble upon an
768 * existing incompletely split page in the tree, as opposed to completing a
769 * split that we just made ourselves. The difference is that stack->buffer may
770 * be merely share-locked on entry, and will be upgraded to exclusive mode.
772 * Note: Upgrading the lock momentarily releases it. Doing that in a scan
773 * would not be OK, because a concurrent VACUUM might delete the page while
774 * we're not holding the lock. It's OK in an insert, though, because VACUUM
775 * has a different mechanism that prevents it from running concurrently with
776 * inserts. (Namely, it holds a cleanup lock on the root.)
782 elog(
DEBUG1,
"finishing incomplete split of block %u in gin index \"%s\"",
793 * Someone else already completed the split while we were not
804 * Insert a value to tree described by stack.
806 * The value to be inserted is given in 'insertdata'. Its format depends
807 * on whether this is an entry or data tree, ginInsertValue just passes it
808 * through to the tree-specific callback function.
810 * During an index build, buildStats is non-null and the counters it contains
811 * are incremented as needed.
813 * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
821 /* If the leaf page was incompletely split, finish the split first */
#define InvalidBlockNumber
static void BlockIdSet(BlockIdData *blockId, BlockNumber blockNumber)
BlockNumber BufferGetBlockNumber(Buffer buffer)
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
void ReleaseBuffer(Buffer buffer)
void UnlockReleaseBuffer(Buffer buffer)
void MarkBufferDirty(Buffer buffer)
void LockBuffer(Buffer buffer, int mode)
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
static Page BufferGetPage(Buffer buffer)
static bool BufferIsValid(Buffer bufnum)
Page PageGetTempPage(const PageData *page)
static void PageSetLSN(Page page, XLogRecPtr lsn)
#define GinPageGetOpaque(page)
#define GinPageIsData(page)
#define GinPageRightMost(page)
#define GIN_INCOMPLETE_SPLIT
#define GinPageIsLeaf(page)
#define GinPageIsIncompleteSplit(page)
int ginTraverseLock(Buffer buffer, bool searchMode)
void freeGinBtreeStack(GinBtreeStack *stack)
void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats)
static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, void *insertdata, BlockNumber updateblkno, Buffer childbuf, GinStatsData *buildStats)
static void ginFindParents(GinBtree btree, GinBtreeStack *stack)
static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, GinStatsData *buildStats)
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, bool rootConflictCheck)
static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
Buffer ginStepRight(Buffer buffer, Relation index, int lockmode)
void GinInitPage(Page page, uint32 f, Size pageSize)
Buffer GinNewBuffer(Relation index)
#define GIN_INSERT_ISDATA
#define GIN_INSERT_ISLEAF
Assert(PointerIsAligned(start, uint64))
#define INJECTION_POINT(name, arg)
void pfree(void *pointer)
MemoryContext CurrentMemoryContext
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define START_CRIT_SECTION()
#define END_CRIT_SECTION()
#define InvalidOffsetNumber
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
#define RelationGetRelationName(relation)
#define RelationNeedsWAL(relation)
BlockNumber(* findChildPage)(GinBtree, GinBtreeStack *)
void(* execPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *)
BlockNumber(* getLeftMostChild)(GinBtree, Page)
void *(* prepareDownlink)(GinBtree, Buffer)
bool(* isMoveRight)(GinBtree, Page)
GinPlaceToPageRC(* beginPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *)
OffsetNumber(* findChildPtr)(GinBtree, Page, BlockNumber, OffsetNumber)
void(* fillRoot)(GinBtree, Page, BlockNumber, Page, BlockNumber, Page)
struct GinBtreeStack * parent
RelFileLocator rd_locator
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
void XLogRegisterData(const void *data, uint32 len)
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
void XLogBeginInsert(void)
#define REGBUF_FORCE_IMAGE