1/*-------------------------------------------------------------------------
4 * Verifies the integrity of GIN indexes based on invariants.
7 * GIN index verification checks a number of invariants:
9 * - consistency: Paths in GIN graph have to contain consistent keys: tuples
10 * on parent pages consistently include tuples from children pages.
12 * - graph invariants: Each internal page must have at least one downlink, and
13 * can reference either only leaf pages or only internal pages.
16 * Copyright (c) 2016-2025, PostgreSQL Global Development Group
19 * contrib/amcheck/verify_gin.c
21 *-------------------------------------------------------------------------
34 * GinScanItem represents one item of depth-first scan of the index.
46 * GinPostingTreeScanItem represents one item of a depth-first posting tree scan.
62 void *callback_state,
bool readonly);
72 * gin_index_check(index regclass)
74 * Verify integrity of GIN index.
76 * Acquires AccessShareLock on heap & index relations.
93 * Read item pointers from leaf entry tuple.
95 * Returns a palloc'd array of ItemPointers. The number of items is returned
111 if (nipd != ndecoded)
112 elog(
ERROR,
"number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
128 * Scans through a posting tree (given by the root), and verifies that the keys
129 * on a child keys are consistent with the parent.
131 * Allocates a separate memory context and scans through posting tree graph.
144 "posting tree check context",
149 * We don't know the height of the tree yet, but as soon as we encounter a
150 * leaf page, we will set 'leafdepth' to its depth.
154 /* Start the scan at the root page */
159 stack->
blkno = posting_tree_root;
161 elog(
DEBUG3,
"processing posting tree at blk %u", posting_tree_root);
181 /* Check that the tree has the same height in all branches */
194 leafdepth = stack->
depth;
195 else if (stack->
depth != leafdepth)
197 (
errcode(ERRCODE_INDEX_CORRUPTED),
198 errmsg(
"index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
203 snprintf(tidrange_buf,
sizeof(tidrange_buf),
204 "%d tids (%u, %u) - (%u, %u)",
211 snprintf(tidrange_buf,
sizeof(tidrange_buf),
"0 tids");
214 elog(
DEBUG3,
"blk %u: parent %u highkey (%u, %u), %s",
229 (
errcode(ERRCODE_INDEX_CORRUPTED),
230 errmsg(
"index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
240 * Check that tuples in each page are properly ordered and
241 * consistent with parent high key
246 elog(
DEBUG1,
"page blk: %u, type data, maxoff %d", stack->
blkno, maxoff);
249 elog(
DEBUG3,
"blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)",
254 elog(
DEBUG3,
"blk %u: root internal posting tree page with %u items",
255 stack->
blkno, maxoff);
258 * A GIN posting tree internal page stores PostingItems in the
259 * 'lower' part of the page. The 'upper' part is unused. The
260 * number of elements is stored in the opaque area (maxoff). Make
261 * sure the size of the 'lower' part agrees with 'maxoff'
263 * We didn't set pd_lower until PostgreSQL version 9.4, so if this
264 * check fails, it could also be because the index was
265 * binary-upgraded from an earlier version. That was a long time
266 * ago, though, so let's warn if it doesn't match.
272 (
errcode(ERRCODE_INDEX_CORRUPTED),
273 errmsg(
"index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)",
277 * Before the PostingItems, there's one ItemPointerData in the
278 * 'lower' part that stores the page's high key.
283 * Gin page right bound has a sane value only when not a highkey
284 * on the rightmost page (at a given level). For the rightmost
285 * page does not store the highkey explicitly, and the value is
292 (
errcode(ERRCODE_INDEX_CORRUPTED),
293 errmsg(
"index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))",
306 /* ItemPointerGetOffsetNumber expects a valid pointer */
320 * The rightmost item in the tree level has (0, 0) as the
326 (
errcode(ERRCODE_INDEX_CORRUPTED),
327 errmsg(
"index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)",
339 (
errcode(ERRCODE_INDEX_CORRUPTED),
340 errmsg(
"index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
345 * Check if this tuple is consistent with the downlink in the
351 (
errcode(ERRCODE_INDEX_CORRUPTED),
352 errmsg(
"index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u",
356 /* This is an internal page, recurse into the child. */
361 * The rightmost parent key is always invalid item pointer.
362 * Its value is 'Infinity' and not explicitly stored.
374 /* Step to next item in the queue */
375 stack_next = stack->
next;
385 * Main entry point for GIN checks.
387 * Allocates memory context and scans through the whole GIN graph.
392 void *callback_state,
403 "amcheck consistency check context",
409 * We don't know the height of the tree yet, but as soon as we encounter a
410 * leaf page, we will set 'leafdepth' to its depth.
414 /* Start the scan at the root page */
441 /* Do basic sanity checks on the page headers */
444 elog(
DEBUG3,
"processing entry tree page at blk %u, maxoff: %u", stack->
blkno, maxoff);
447 * It's possible that the page was split since we looked at the
448 * parent, so that we didn't missed the downlink of the right sibling
449 * when we scanned the parent. If so, add the right sibling to the
457 &parent_key_category);
468 page_max_key_category, parent_key_attnum,
469 parent_key, parent_key_category) < 0)
471 /* split page detected, install right link to the stack */
480 ptr->
blkno = rightlink;
486 /* Check that the tree has the same height in all branches */
490 leafdepth = stack->
depth;
491 else if (stack->
depth != leafdepth)
493 (
errcode(ERRCODE_INDEX_CORRUPTED),
494 errmsg(
"index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
499 * Check that tuples in each page are properly ordered and consistent
500 * with parent high key
514 (
errcode(ERRCODE_INDEX_CORRUPTED),
515 errmsg(
"index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
521 * Compare the entry to the preceding one.
523 * Don't check for high key on the rightmost inner page, as this
524 * key is not really stored explicitly.
526 * The entries may be for different attributes, so make sure to
527 * use ginCompareAttEntries for comparison.
537 prev_key_category, current_attnum,
538 current_key, current_key_category) >= 0)
540 (
errcode(ERRCODE_INDEX_CORRUPTED),
541 errmsg(
"index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u",
546 * Check if this tuple is consistent with the downlink in the
556 &parent_key_category);
559 current_key_category, parent_key_attnum,
560 parent_key, parent_key_category) > 0)
563 * There was a discrepancy between parent and child
564 * tuples. We need to verify it is not a result of
565 * concurrent call of gistplacetopage(). So, lock parent
566 * and try to find downlink for current page. It may be
567 * missing due to concurrent page split, this is OK.
571 stack->
blkno, strategy);
573 /* We found it - make a final check before failing */
575 elog(
NOTICE,
"Unable to find parent tuple for block %u on block %u due to concurrent split",
582 &parent_key_category);
585 * Check if it is properly adjusted. If succeed,
586 * proceed to the next key.
589 current_key_category, parent_key_attnum,
590 parent_key, parent_key_category) > 0)
592 (
errcode(ERRCODE_INDEX_CORRUPTED),
593 errmsg(
"index \"%s\" has inconsistent records on page %u offset %u",
599 /* If this is an internal page, recurse into the child */
606 /* last tuple in layer has no high key */
616 /* If this item is a pointer to a posting tree, recurse into it */
630 for (
int j = 0;
j < nipd;
j++)
634 (
errcode(ERRCODE_INDEX_CORRUPTED),
635 errmsg(
"index \"%s\": posting list contains invalid heap pointer on block %u",
642 prev_attnum = current_attnum;
648 /* Step to next item in the queue */
649 stack_next = stack->
next;
661 * Verify that a freshly-read page looks sane.
669 * ReadBuffer verifies that every newly-read page passes
670 * PageHeaderIsValid, which means it either contains a reasonably sane
671 * page header or is all-zero. We have to defend against the all-zero
676 (
errcode(ERRCODE_INDEX_CORRUPTED),
677 errmsg(
"index \"%s\" contains unexpected zero page at block %u",
680 errhint(
"Please REINDEX it.")));
683 * Additionally check that the special area looks sane.
687 (
errcode(ERRCODE_INDEX_CORRUPTED),
688 errmsg(
"index \"%s\" contains corrupted page at block %u",
691 errhint(
"Please REINDEX it.")));
697 (
errcode(ERRCODE_INDEX_CORRUPTED),
698 errmsg(
"index \"%s\" has deleted internal page %u",
702 (
errcode(ERRCODE_INDEX_CORRUPTED),
703 errmsg(
"index \"%s\" has deleted page %u with tuples",
708 (
errcode(ERRCODE_INDEX_CORRUPTED),
709 errmsg(
"index \"%s\" has page %u with exceeding count of tuples",
714 * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
716 * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
749 /* Found it! Make copy and return it */
769 (
errcode(ERRCODE_INDEX_CORRUPTED),
770 errmsg(
"line pointer points past end of tuple space in index \"%s\"",
778 * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED or LP_DEAD,
779 * since GIN never uses all three. Verify that line pointer has storage,
785 (
errcode(ERRCODE_INDEX_CORRUPTED),
786 errmsg(
"invalid line pointer storage in index \"%s\"",
#define InvalidAttrNumber
#define InvalidBlockNumber
static BlockNumber BlockIdGetBlockNumber(const BlockIdData *blockId)
BlockNumber BufferGetBlockNumber(Buffer buffer)
void ReleaseBuffer(Buffer buffer)
void UnlockReleaseBuffer(Buffer buffer)
void LockBuffer(Buffer buffer, int mode)
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
static Page BufferGetPage(Buffer buffer)
PageHeaderData * PageHeader
static uint16 PageGetSpecialSize(const PageData *page)
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
static bool PageIsNew(const PageData *page)
#define SizeOfPageHeaderData
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
int errdetail_internal(const char *fmt,...)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
#define GinIsPostingTree(itup)
#define GinPageGetOpaque(page)
#define GinGetPosting(itup)
#define GinGetDownlink(itup)
#define GinItupIsCompressed(itup)
#define GinGetNPosting(itup)
#define GinDataPageGetRightBound(page)
#define GinGetPostingTree(itup)
#define GinPageIsData(page)
signed char GinNullCategory
#define ItemPointerSetMin(p)
#define GinDataPageGetPostingItem(page, i)
#define GinPageIsDeleted(page)
#define GinPageIsLeaf(page)
ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
ItemPointer ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
int ginCompareAttEntries(GinState *ginstate, OffsetNumber attnuma, Datum a, GinNullCategory categorya, OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, GinNullCategory *category)
void initGinState(GinState *state, Relation index)
Assert(PointerIsAligned(start, uint64))
IndexTuple CopyIndexTuple(IndexTuple source)
#define ItemIdGetLength(itemId)
#define ItemIdGetOffset(itemId)
#define ItemIdIsDead(itemId)
#define ItemIdIsUsed(itemId)
#define ItemIdIsRedirected(itemId)
#define ItemIdGetFlags(itemId)
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
static void ItemPointerSetInvalid(ItemPointerData *pointer)
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
static OffsetNumber ItemPointerGetOffsetNumberNoCheck(const ItemPointerData *pointer)
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
ItemPointerData * ItemPointer
static bool ItemPointerIsValid(const ItemPointerData *pointer)
IndexTupleData * IndexTuple
static Size IndexTupleSize(const IndexTupleData *itup)
#define MaxIndexTuplesPerPage
void pfree(void *pointer)
void * palloc0(Size size)
MemoryContext CurrentMemoryContext
void MemoryContextDelete(MemoryContext context)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define CHECK_FOR_INTERRUPTS()
#define InvalidOffsetNumber
#define OffsetNumberIsValid(offsetNumber)
#define OffsetNumberNext(offsetNumber)
#define FirstOffsetNumber
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
#define RelationGetRelationName(relation)
ItemPointerData parentkey
struct GinPostingTreeScanItem * next
struct GinScanItem * next
void amcheck_lock_relation_and_check(Oid indrelid, Oid am_id, IndexDoCheckCallback check, LOCKMODE lockmode, void *state)
Datum gin_index_check(PG_FUNCTION_ARGS)
static IndexTuple gin_refind_parent(Relation rel, BlockNumber parentblkno, BlockNumber childblkno, BufferAccessStrategy strategy)
static void gin_check_parent_keys_consistency(Relation rel, Relation heaprel, void *callback_state, bool readonly)
static ItemPointer ginReadTupleWithoutState(IndexTuple itup, int *nitems)
static void gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
struct GinPostingTreeScanItem GinPostingTreeScanItem
static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, OffsetNumber offset)
struct GinScanItem GinScanItem
PG_FUNCTION_INFO_V1(gin_index_check)