1/*-------------------------------------------------------------------------
4 * routines for handling GIN entry tree pages.
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginentrypage.c
12 *-------------------------------------------------------------------------
29 * Form a tuple for entry tree.
31 * If the tuple would be too big to be stored, function throws a suitable
32 * error if errorTooBig is true, or returns NULL if errorTooBig is false.
34 * See src/backend/access/gin/README for a description of the index tuple
35 * format that is being built here. We build on the assumption that we
36 * are making a leaf-level key entry containing a posting list of nipd items.
37 * If the caller is actually trying to make a posting-tree entry, non-leaf
38 * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite
39 * the t_tid fields as necessary. In any case, 'data' can be NULL to skip
40 * filling in the posting list; the caller is responsible for filling it
41 * afterwards if data = NULL and nipd > 0.
54 /* Build the basic tuple: optional column number, plus key datum */
71 * Determine and store offset to the posting list, making sure there is
72 * room for the category byte if needed.
74 * Note: because index_form_tuple MAXALIGNs the tuple size, there may well
75 * be some wasted pad space. Is it worth recomputing the data length to
76 * prevent that? That would also allow us to Assert that the real data
77 * doesn't overlap the GinNullCategory byte, which this code currently
88 newsize =
Max(newsize, minsize);
97 * Add space needed for posting list, if any. Then check that the tuple
98 * won't be too big to store.
108 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
109 errmsg(
"index row size %zu exceeds maximum %zu for index \"%s\"",
117 * Resize tuple if needed
124 * PostgreSQL 9.3 and earlier did not clear this new space, so we
125 * might find uninitialized padding when reading tuples from disk.
129 /* set new size in tuple header */
130 itup->
t_info &= ~INDEX_SIZE_MASK;
135 * Copy in the posting list, if provided
141 memcpy(ptr,
data, dataSize);
145 * Insert category byte, if needed
156 * Read item pointers from leaf entry tuple.
158 * Returns a palloc'd array of ItemPointers. The number of items is returned
175 if (nipd != ndecoded)
176 elog(
ERROR,
"number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
194 * Form a non-leaf entry tuple by copying the key data from the given tuple,
195 * which can be either a leaf or non-leaf entry tuple.
197 * Any posting list in the source tuple is not copied. The specified child
198 * block number is inserted into t_tid.
207 /* Tuple contains a posting list, just copy stuff before that */
212 memcpy(nitup, itup, origsize);
213 /* ... be sure to fix the size header field ... */
214 nitup->
t_info &= ~INDEX_SIZE_MASK;
215 nitup->
t_info |= origsize;
219 /* Copy the tuple as-is */
224 /* Now insert the correct downlink */
231 * Entry tree is a "static", ie tuple never deletes from it,
232 * so we don't use right bound, we use rightmost key instead.
266 * Find correct tuple in non-leaf page. It supposed that
267 * page correctly chosen and searching value SHOULD be on page
341 * Searches correct position for value on leaf page.
342 * Page should be correctly chosen.
343 * Returns true if value found on page.
414 /* if page isn't changed, we returns storedOff */
422 * we hope, that needed pointer goes to right. It's true if there
425 for (
i = storedOff + 1;
i <= maxoff;
i++)
431 maxoff = storedOff - 1;
485 * Delete tuple on leaf page if tuples existed and we
486 * should update it, update old child blkno to new right page
487 * if child split occurred
511 * Prepare to insert data on an entry page.
513 * If it will fit, return GPTP_INSERT after doing whatever setup is needed
514 * before we enter the insertion critical section. *ptp_workspace can be
515 * set to pass information along to the execPlaceToPage function.
517 * If it won't fit, perform a page split and return two temporary page
518 * images into *newlpage and *newrpage, with result GPTP_SPLIT.
520 * In neither case should the given page buffer be modified here.
522 * Note: on insertion to an internal node, in addition to inserting the given
523 * item, the downlink of the existing item at stack->off will be updated to
524 * point to updateblkno.
529 void **ptp_workspace,
535 /* If it doesn't fit, deal with split case */
543 /* Else, we're ready to proceed with insertion */
548 * Perform data insertion after beginPlaceToPage has decided it will fit.
550 * This is invoked within a critical section, and XLOG record creation (if
551 * needed) is already started. The target buffer is registered in slot 0.
570 elog(
ERROR,
"failed to add item to index page in \"%s\"",
578 * This must be static, because it has to survive until XLogInsert,
579 * and we can't palloc here. Ugly, but the XLogInsert infrastructure
580 * isn't reentrant anyway.
596 * Split entry page and insert new data.
598 * Returns new temp pages to *newlpage and *newrpage.
599 * The original buffer is left untouched.
621 PGAlignedBlock tupstore[2];
/* could need 2 pages' worth of tuples */
626 * First, append all the existing tuples and the new tuple we're inserting
627 * one after another in a temporary workspace.
630 ptr = tupstore[0].
data;
636 memcpy(ptr, insertData->
entry, size);
643 memcpy(ptr, itup, size);
648 if (off == maxoff + 1)
651 memcpy(ptr, insertData->
entry, size);
657 * Initialize the left and right pages, and copy all the tuples back to
663 ptr = tupstore[0].
data;
673 * Decide where to split. We try to equalize the pages' total data
674 * size, not number of tuples.
676 if (lsize > totalsize / 2)
688 elog(
ERROR,
"failed to add item to index page in \"%s\"",
693 /* return temp pages to caller */
699 * Construct insertion payload for inserting the downlink for given buffer.
719 * Fills new root by rightest values from child.
720 * Also called from ginxlog, should not use btree
731 elog(
ERROR,
"failed to add item to index root page");
736 elog(
ERROR,
"failed to add item to index root page");
741 * Set up GinBtree for entry page access
743 * Note: during WAL recovery, there may be no valid data in ginstate
744 * other than a faked-up Relation pointer; the key datum is bogus too.
#define InvalidBlockNumber
BlockNumber BufferGetBlockNumber(Buffer buffer)
void MarkBufferDirty(Buffer buffer)
static Page BufferGetPage(Buffer buffer)
Size PageGetFreeSpace(const PageData *page)
Page PageGetTempPageCopy(const PageData *page)
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
static Size PageGetPageSize(const PageData *page)
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define GinIsPostingTree(itup)
#define GinPageGetOpaque(page)
#define GinCategoryOffset(itup, ginstate)
#define GinGetPosting(itup)
#define GinGetDownlink(itup)
#define GinSetNPosting(itup, n)
#define GinItupIsCompressed(itup)
#define GinSetPostingOffset(itup, n)
#define GinSetDownlink(itup, blkno)
#define GinGetNPosting(itup)
#define GinGetPostingOffset(itup)
#define GinPageIsData(page)
signed char GinNullCategory
#define GinPageRightMost(page)
#define GinSetNullCategory(itup, ginstate, c)
#define GinPageIsLeaf(page)
void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
static void entryPreparePage(GinBtree btree, Page page, OffsetNumber off, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno)
static bool entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
static bool entryIsMoveRight(GinBtree btree, Page page)
static BlockNumber entryGetLeftMostPage(GinBtree btree, Page page)
static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage)
static IndexTuple getRightMostTuple(Page page)
static IndexTuple GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk)
static OffsetNumber entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
static bool entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, GinBtreeEntryInsertData *insertData)
static GinPlaceToPageRC entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, void **ptp_workspace, Page *newlpage, Page *newrpage)
ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, int *nitems)
static void entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, void *insertPayload, BlockNumber updateblkno, void *ptp_workspace)
static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig)
void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate)
static void * entryPrepareDownlink(GinBtree btree, Buffer lbuf)
ItemPointer ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
void GinInitPage(Page page, uint32 f, Size pageSize)
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
int ginCompareAttEntries(GinState *ginstate, OffsetNumber attnuma, Datum a, GinNullCategory categorya, OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
Assert(PointerIsAligned(start, uint64))
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
struct ItemIdData ItemIdData
ItemPointerData * ItemPointer
IndexTupleData * IndexTuple
static bool IndexTupleHasNulls(const IndexTupleData *itup)
static Size IndexTupleSize(const IndexTupleData *itup)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
#define InvalidOffsetNumber
#define FirstOffsetNumber
static Datum UInt16GetDatum(uint16 X)
#define RelationGetRelationName(relation)
#define RelationNeedsWAL(relation)
BlockNumber(* findChildPage)(GinBtree, GinBtreeStack *)
void(* execPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *)
BlockNumber(* getLeftMostChild)(GinBtree, Page)
bool(* findItem)(GinBtree, GinBtreeStack *)
void *(* prepareDownlink)(GinBtree, Buffer)
bool(* isMoveRight)(GinBtree, Page)
GinPlaceToPageRC(* beginPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *)
GinNullCategory entryCategory
OffsetNumber(* findChildPtr)(GinBtree, Page, BlockNumber, OffsetNumber)
void(* fillRoot)(GinBtree, Page, BlockNumber, Page, BlockNumber, Page)
TupleDesc tupdesc[INDEX_MAX_KEYS]
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)