[フレーム]

gist.c

1/*-------------------------------------------------------------------------

2 *

3 * gist.c

4 * interface routines for the postgres GiST index access method.

5 *

6 *

9 *

10 * IDENTIFICATION

11 * src/backend/access/gist/gist.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#include "postgres.h"

16

17#include "access/gist_private.h"

18#include "access/gistscan.h"

19#include "access/xloginsert.h"

20#include "catalog/pg_collation.h"

21#include "commands/vacuum.h"

22#include "miscadmin.h"

23#include "nodes/execnodes.h"

24#include "storage/predicate.h"

25#include "utils/fmgrprotos.h"

26#include "utils/index_selfuncs.h"

27#include "utils/memutils.h"

28#include "utils/rel.h"

29

30/* non-export function prototypes */

31static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);

32static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,

33 GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);

34static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,

35 GISTSTATE *giststate,

36 IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,

37 Buffer leftchild, Buffer rightchild,

38 bool unlockbuf, bool unlockleftchild);

39static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,

40 GISTSTATE *giststate, List *splitinfo, bool unlockbuf);

41static void gistprunepage(Relation rel, Page page, Buffer buffer,

42 Relation heapRel);

43

44

45 #define ROTATEDIST(d) do { \

46 SplitPageLayout *tmp = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout)); \

47 tmp->block.blkno = InvalidBlockNumber; \

48 tmp->buffer = InvalidBuffer; \

49 tmp->next = (d); \

50 (d)=tmp; \

51} while(0)

52

53

54/*

55 * GiST handler function: return IndexAmRoutine with access method parameters

56 * and callbacks.

57 */

58Datum

59 gisthandler(PG_FUNCTION_ARGS)

60{

61 IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);

62

63 amroutine->amstrategies = 0;

64 amroutine->amsupport = GISTNProcs;

65 amroutine->amoptsprocnum = GIST_OPTIONS_PROC;

66 amroutine->amcanorder = false;

67 amroutine->amcanorderbyop = true;

68 amroutine->amcanhash = false;

69 amroutine->amconsistentequality = false;

70 amroutine->amconsistentordering = false;

71 amroutine->amcanbackward = false;

72 amroutine->amcanunique = false;

73 amroutine->amcanmulticol = true;

74 amroutine->amoptionalkey = true;

75 amroutine->amsearcharray = false;

76 amroutine->amsearchnulls = true;

77 amroutine->amstorage = true;

78 amroutine->amclusterable = true;

79 amroutine->ampredlocks = true;

80 amroutine->amcanparallel = false;

81 amroutine->amcanbuildparallel = false;

82 amroutine->amcaninclude = true;

83 amroutine->amusemaintenanceworkmem = false;

84 amroutine->amsummarizing = false;

85 amroutine->amparallelvacuumoptions =

86 VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;

87 amroutine->amkeytype = InvalidOid;

88

89 amroutine->ambuild = gistbuild;

90 amroutine->ambuildempty = gistbuildempty;

91 amroutine->aminsert = gistinsert;

92 amroutine->aminsertcleanup = NULL;

93 amroutine->ambulkdelete = gistbulkdelete;

94 amroutine->amvacuumcleanup = gistvacuumcleanup;

95 amroutine->amcanreturn = gistcanreturn;

96 amroutine->amcostestimate = gistcostestimate;

97 amroutine->amgettreeheight = NULL;

98 amroutine->amoptions = gistoptions;

99 amroutine->amproperty = gistproperty;

100 amroutine->ambuildphasename = NULL;

101 amroutine->amvalidate = gistvalidate;

102 amroutine->amadjustmembers = gistadjustmembers;

103 amroutine->ambeginscan = gistbeginscan;

104 amroutine->amrescan = gistrescan;

105 amroutine->amgettuple = gistgettuple;

106 amroutine->amgetbitmap = gistgetbitmap;

107 amroutine->amendscan = gistendscan;

108 amroutine->ammarkpos = NULL;

109 amroutine->amrestrpos = NULL;

110 amroutine->amestimateparallelscan = NULL;

111 amroutine->aminitparallelscan = NULL;

112 amroutine->amparallelrescan = NULL;

113 amroutine->amtranslatestrategy = NULL;

114 amroutine->amtranslatecmptype = gisttranslatecmptype;

115

116 PG_RETURN_POINTER(amroutine);

117}

118

119/*

120 * Create and return a temporary memory context for use by GiST. We

121 * _always_ invoke user-provided methods in a temporary memory

122 * context, so that memory leaks in those functions cannot cause

123 * problems. Also, we use some additional temporary contexts in the

124 * GiST code itself, to avoid the need to do some awkward manual

125 * memory management.

126 */

127MemoryContext

128 createTempGistContext(void)

129{

130 return AllocSetContextCreate(CurrentMemoryContext,

131 "GiST temporary context",

132 ALLOCSET_DEFAULT_SIZES);

133}

134

135/*

136 * gistbuildempty() -- build an empty gist index in the initialization fork

137 */

138void

139 gistbuildempty(Relation index)

140{

141 Buffer buffer;

142

143 /* Initialize the root page */

144 buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,

145 EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);

146

147 /* Initialize and xlog buffer */

148 START_CRIT_SECTION();

149 GISTInitBuffer(buffer, F_LEAF);

150 MarkBufferDirty(buffer);

151 log_newpage_buffer(buffer, true);

152 END_CRIT_SECTION();

153

154 /* Unlock and release the buffer */

155 UnlockReleaseBuffer(buffer);

156}

157

158/*

159 * gistinsert -- wrapper for GiST tuple insertion.

160 *

161 * This is the public interface routine for tuple insertion in GiSTs.

162 * It doesn't do any work; just locks the relation and passes the buck.

163 */

164bool

165 gistinsert(Relation r, Datum *values, bool *isnull,

166 ItemPointer ht_ctid, Relation heapRel,

167 IndexUniqueCheck checkUnique,

168 bool indexUnchanged,

169 IndexInfo *indexInfo)

170{

171 GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;

172 IndexTuple itup;

173 MemoryContext oldCxt;

174

175 /* Initialize GISTSTATE cache if first call in this statement */

176 if (giststate == NULL)

177 {

178 oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);

179 giststate = initGISTstate(r);

180 giststate->tempCxt = createTempGistContext();

181 indexInfo->ii_AmCache = giststate;

182 MemoryContextSwitchTo(oldCxt);

183 }

184

185 oldCxt = MemoryContextSwitchTo(giststate->tempCxt);

186

187 itup = gistFormTuple(giststate, r, values, isnull, true);

188 itup->t_tid = *ht_ctid;

189

190 gistdoinsert(r, itup, 0, giststate, heapRel, false);

191

192 /* cleanup */

193 MemoryContextSwitchTo(oldCxt);

194 MemoryContextReset(giststate->tempCxt);

195

196 return false;

197}

198

199

200/*

201 * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple

202 * at that offset is atomically removed along with inserting the new tuples.

203 * This is used to replace a tuple with a new one.

204 *

205 * If 'leftchildbuf' is valid, we're inserting the downlink for the page

206 * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.

207 * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.

208 *

209 * If 'markfollowright' is true and the page is split, the left child is

210 * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered

211 * index build, however, there is no concurrent access and the page splitting

212 * is done in a slightly simpler fashion, and false is passed.

213 *

214 * If there is not enough room on the page, it is split. All the split

215 * pages are kept pinned and locked and returned in *splitinfo, the caller

216 * is responsible for inserting the downlinks for them. However, if

217 * 'buffer' is the root page and it needs to be split, gistplacetopage()

218 * performs the split as one atomic operation, and *splitinfo is set to NIL.

219 * In that case, we continue to hold the root page locked, and the child

220 * pages are released; note that new tuple(s) are *not* on the root page

221 * but in one of the new child pages.

222 *

223 * If 'newblkno' is not NULL, returns the block number of page the first

224 * new/updated tuple was inserted to. Usually it's the given page, but could

225 * be its right sibling if the page was split.

226 *

227 * Returns 'true' if the page was split, 'false' otherwise.

228 */

229bool

230 gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,

231 Buffer buffer,

232 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,

233 BlockNumber *newblkno,

234 Buffer leftchildbuf,

235 List **splitinfo,

236 bool markfollowright,

237 Relation heapRel,

238 bool is_build)

239{

240 BlockNumber blkno = BufferGetBlockNumber(buffer);

241 Page page = BufferGetPage(buffer);

242 bool is_leaf = (GistPageIsLeaf(page)) ? true : false;

243 XLogRecPtr recptr;

244 bool is_split;

245

246 /*

247 * Refuse to modify a page that's incompletely split. This should not

248 * happen because we finish any incomplete splits while we walk down the

249 * tree. However, it's remotely possible that another concurrent inserter

250 * splits a parent page, and errors out before completing the split. We

251 * will just throw an error in that case, and leave any split we had in

252 * progress unfinished too. The next insert that comes along will clean up

253 * the mess.

254 */

255 if (GistFollowRight(page))

256 elog(ERROR, "concurrent GiST page split was incomplete");

257

258 /* should never try to insert to a deleted page */

259 Assert(!GistPageIsDeleted(page));

260

261 *splitinfo = NIL;

262

263 /*

264 * if isupdate, remove old key: This node's key has been modified, either

265 * because a child split occurred or because we needed to adjust our key

266 * for an insert in a child node. Therefore, remove the old version of

267 * this node's key.

268 *

269 * for WAL replay, in the non-split case we handle this by setting up a

270 * one-element todelete array; in the split case, it's handled implicitly

271 * because the tuple vector passed to gistSplit won't include this tuple.

272 */

273 is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);

274

275 /*

276 * If leaf page is full, try at first to delete dead tuples. And then

277 * check again.

278 */

279 if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))

280 {

281 gistprunepage(rel, page, buffer, heapRel);

282 is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);

283 }

284

285 if (is_split)

286 {

287 /* no space for insertion */

288 IndexTuple *itvec;

289 int tlen;

290 SplitPageLayout *dist = NULL,

291 *ptr;

292 BlockNumber oldrlink = InvalidBlockNumber;

293 GistNSN oldnsn = 0;

294 SplitPageLayout rootpg;

295 bool is_rootsplit;

296 int npage;

297

298 is_rootsplit = (blkno == GIST_ROOT_BLKNO);

299

300 /*

301 * Form index tuples vector to split. If we're replacing an old tuple,

302 * remove the old version from the vector.

303 */

304 itvec = gistextractpage(page, &tlen);

305 if (OffsetNumberIsValid(oldoffnum))

306 {

307 /* on inner page we should remove old tuple */

308 int pos = oldoffnum - FirstOffsetNumber;

309

310 tlen--;

311 if (pos != tlen)

312 memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));

313 }

314 itvec = gistjoinvector(itvec, &tlen, itup, ntup);

315 dist = gistSplit(rel, page, itvec, tlen, giststate);

316

317 /*

318 * Check that split didn't produce too many pages.

319 */

320 npage = 0;

321 for (ptr = dist; ptr; ptr = ptr->next)

322 npage++;

323 /* in a root split, we'll add one more page to the list below */

324 if (is_rootsplit)

325 npage++;

326 if (npage > GIST_MAX_SPLIT_PAGES)

327 elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",

328 npage, GIST_MAX_SPLIT_PAGES);

329

330 /*

331 * Set up pages to work with. Allocate new buffers for all but the

332 * leftmost page. The original page becomes the new leftmost page, and

333 * is just replaced with the new contents.

334 *

335 * For a root-split, allocate new buffers for all child pages, the

336 * original page is overwritten with new root page containing

337 * downlinks to the new child pages.

338 */

339 ptr = dist;

340 if (!is_rootsplit)

341 {

342 /* save old rightlink and NSN */

343 oldrlink = GistPageGetOpaque(page)->rightlink;

344 oldnsn = GistPageGetNSN(page);

345

346 dist->buffer = buffer;

347 dist->block.blkno = BufferGetBlockNumber(buffer);

348 dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));

349

350 /* clean all flags except F_LEAF */

351 GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;

352

353 ptr = ptr->next;

354 }

355 for (; ptr; ptr = ptr->next)

356 {

357 /* Allocate new page */

358 ptr->buffer = gistNewBuffer(rel, heapRel);

359 GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);

360 ptr->page = BufferGetPage(ptr->buffer);

361 ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);

362 PredicateLockPageSplit(rel,

363 BufferGetBlockNumber(buffer),

364 BufferGetBlockNumber(ptr->buffer));

365 }

366

367 /*

368 * Now that we know which blocks the new pages go to, set up downlink

369 * tuples to point to them.

370 */

371 for (ptr = dist; ptr; ptr = ptr->next)

372 {

373 ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);

374 GistTupleSetValid(ptr->itup);

375 }

376

377 /*

378 * If this is a root split, we construct the new root page with the

379 * downlinks here directly, instead of requiring the caller to insert

380 * them. Add the new root page to the list along with the child pages.

381 */

382 if (is_rootsplit)

383 {

384 IndexTuple *downlinks;

385 int ndownlinks = 0;

386 int i;

387

388 rootpg.buffer = buffer;

389 rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));

390 GistPageGetOpaque(rootpg.page)->flags = 0;

391

392 /* Prepare a vector of all the downlinks */

393 for (ptr = dist; ptr; ptr = ptr->next)

394 ndownlinks++;

395 downlinks = palloc(sizeof(IndexTuple) * ndownlinks);

396 for (i = 0, ptr = dist; ptr; ptr = ptr->next)

397 downlinks[i++] = ptr->itup;

398

399 rootpg.block.blkno = GIST_ROOT_BLKNO;

400 rootpg.block.num = ndownlinks;

401 rootpg.list = gistfillitupvec(downlinks, ndownlinks,

402 &(rootpg.lenlist));

403 rootpg.itup = NULL;

404

405 rootpg.next = dist;

406 dist = &rootpg;

407 }

408 else

409 {

410 /* Prepare split-info to be returned to caller */

411 for (ptr = dist; ptr; ptr = ptr->next)

412 {

413 GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));

414

415 si->buf = ptr->buffer;

416 si->downlink = ptr->itup;

417 *splitinfo = lappend(*splitinfo, si);

418 }

419 }

420

421 /*

422 * Fill all pages. All the pages are new, ie. freshly allocated empty

423 * pages, or a temporary copy of the old page.

424 */

425 for (ptr = dist; ptr; ptr = ptr->next)

426 {

427 char *data = (char *) (ptr->list);

428

429 for (int i = 0; i < ptr->block.num; i++)

430 {

431 IndexTuple thistup = (IndexTuple) data;

432

433 if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)

434 elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));

435

436 /*

437 * If this is the first inserted/updated tuple, let the caller

438 * know which page it landed on.

439 */

440 if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))

441 *newblkno = ptr->block.blkno;

442

443 data += IndexTupleSize(thistup);

444 }

445

446 /* Set up rightlinks */

447 if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)

448 GistPageGetOpaque(ptr->page)->rightlink =

449 ptr->next->block.blkno;

450 else

451 GistPageGetOpaque(ptr->page)->rightlink = oldrlink;

452

453 /*

454 * Mark the all but the right-most page with the follow-right

455 * flag. It will be cleared as soon as the downlink is inserted

456 * into the parent, but this ensures that if we error out before

457 * that, the index is still consistent. (in buffering build mode,

458 * any error will abort the index build anyway, so this is not

459 * needed.)

460 */

461 if (ptr->next && !is_rootsplit && markfollowright)

462 GistMarkFollowRight(ptr->page);

463 else

464 GistClearFollowRight(ptr->page);

465

466 /*

467 * Copy the NSN of the original page to all pages. The

468 * F_FOLLOW_RIGHT flags ensure that scans will follow the

469 * rightlinks until the downlinks are inserted.

470 */

471 GistPageSetNSN(ptr->page, oldnsn);

472 }

473

474 /*

475 * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL

476 * insertion for that. NB: The number of pages and data segments

477 * specified here must match the calculations in gistXLogSplit()!

478 */

479 if (!is_build && RelationNeedsWAL(rel))

480 XLogEnsureRecordSpace(npage, 1 + npage * 2);

481

482 START_CRIT_SECTION();

483

484 /*

485 * Must mark buffers dirty before XLogInsert, even though we'll still

486 * be changing their opaque fields below.

487 */

488 for (ptr = dist; ptr; ptr = ptr->next)

489 MarkBufferDirty(ptr->buffer);

490 if (BufferIsValid(leftchildbuf))

491 MarkBufferDirty(leftchildbuf);

492

493 /*

494 * The first page in the chain was a temporary working copy meant to

495 * replace the old page. Copy it over the old page.

496 */

497 PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));

498 dist->page = BufferGetPage(dist->buffer);

499

500 /*

501 * Write the WAL record.

502 *

503 * If we're building a new index, however, we don't WAL-log changes

504 * yet. The LSN-NSN interlock between parent and child requires that

505 * LSNs never move backwards, so set the LSNs to a value that's

506 * smaller than any real or fake unlogged LSN that might be generated

507 * later. (There can't be any concurrent scans during index build, so

508 * we don't need to be able to detect concurrent splits yet.)

509 */

510 if (is_build)

511 recptr = GistBuildLSN;

512 else

513 {

514 if (RelationNeedsWAL(rel))

515 recptr = gistXLogSplit(is_leaf,

516 dist, oldrlink, oldnsn, leftchildbuf,

517 markfollowright);

518 else

519 recptr = gistGetFakeLSN(rel);

520 }

521

522 for (ptr = dist; ptr; ptr = ptr->next)

523 PageSetLSN(ptr->page, recptr);

524

525 /*

526 * Return the new child buffers to the caller.

527 *

528 * If this was a root split, we've already inserted the downlink

529 * pointers, in the form of a new root page. Therefore we can release

530 * all the new buffers, and keep just the root page locked.

531 */

532 if (is_rootsplit)

533 {

534 for (ptr = dist->next; ptr; ptr = ptr->next)

535 UnlockReleaseBuffer(ptr->buffer);

536 }

537 }

538 else

539 {

540 /*

541 * Enough space. We always get here if ntup==0.

542 */

543 START_CRIT_SECTION();

544

545 /*

546 * Delete old tuple if any, then insert new tuple(s) if any. If

547 * possible, use the fast path of PageIndexTupleOverwrite.

548 */

549 if (OffsetNumberIsValid(oldoffnum))

550 {

551 if (ntup == 1)

552 {

553 /* One-for-one replacement, so use PageIndexTupleOverwrite */

554 if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,

555 IndexTupleSize(*itup)))

556 elog(ERROR, "failed to add item to index page in \"%s\"",

557 RelationGetRelationName(rel));

558 }

559 else

560 {

561 /* Delete old, then append new tuple(s) to page */

562 PageIndexTupleDelete(page, oldoffnum);

563 gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);

564 }

565 }

566 else

567 {

568 /* Just append new tuples at the end of the page */

569 gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);

570 }

571

572 MarkBufferDirty(buffer);

573

574 if (BufferIsValid(leftchildbuf))

575 MarkBufferDirty(leftchildbuf);

576

577 if (is_build)

578 recptr = GistBuildLSN;

579 else

580 {

581 if (RelationNeedsWAL(rel))

582 {

583 OffsetNumber ndeloffs = 0,

584 deloffs[1];

585

586 if (OffsetNumberIsValid(oldoffnum))

587 {

588 deloffs[0] = oldoffnum;

589 ndeloffs = 1;

590 }

591

592 recptr = gistXLogUpdate(buffer,

593 deloffs, ndeloffs, itup, ntup,

594 leftchildbuf);

595 }

596 else

597 recptr = gistGetFakeLSN(rel);

598 }

599 PageSetLSN(page, recptr);

600

601 if (newblkno)

602 *newblkno = blkno;

603 }

604

605 /*

606 * If we inserted the downlink for a child page, set NSN and clear

607 * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to

608 * follow the rightlink if and only if they looked at the parent page

609 * before we inserted the downlink.

610 *

611 * Note that we do this *after* writing the WAL record. That means that

612 * the possible full page image in the WAL record does not include these

613 * changes, and they must be replayed even if the page is restored from

614 * the full page image. There's a chicken-and-egg problem: if we updated

615 * the child pages first, we wouldn't know the recptr of the WAL record

616 * we're about to write.

617 */

618 if (BufferIsValid(leftchildbuf))

619 {

620 Page leftpg = BufferGetPage(leftchildbuf);

621

622 GistPageSetNSN(leftpg, recptr);

623 GistClearFollowRight(leftpg);

624

625 PageSetLSN(leftpg, recptr);

626 }

627

628 END_CRIT_SECTION();

629

630 return is_split;

631}

632

633/*

634 * Workhorse routine for doing insertion into a GiST index. Note that

635 * this routine assumes it is invoked in a short-lived memory context,

636 * so it does not bother releasing palloc'd allocations.

637 */

638void

639 gistdoinsert(Relation r, IndexTuple itup, Size freespace,

640 GISTSTATE *giststate, Relation heapRel, bool is_build)

641{

642 ItemId iid;

643 IndexTuple idxtuple;

644 GISTInsertStack firststack;

645 GISTInsertStack *stack;

646 GISTInsertState state;

647 bool xlocked = false;

648

649 memset(&state, 0, sizeof(GISTInsertState));

650 state.freespace = freespace;

651 state.r = r;

652 state.heapRel = heapRel;

653 state.is_build = is_build;

654

655 /* Start from the root */

656 firststack.blkno = GIST_ROOT_BLKNO;

657 firststack.lsn = 0;

658 firststack.retry_from_parent = false;

659 firststack.parent = NULL;

660 firststack.downlinkoffnum = InvalidOffsetNumber;

661 state.stack = stack = &firststack;

662

663 /*

664 * Walk down along the path of smallest penalty, updating the parent

665 * pointers with the key we're inserting as we go. If we crash in the

666 * middle, the tree is consistent, although the possible parent updates

667 * were a waste.

668 */

669 for (;;)

670 {

671 /*

672 * If we split an internal page while descending the tree, we have to

673 * retry at the parent. (Normally, the LSN-NSN interlock below would

674 * also catch this and cause us to retry. But LSNs are not updated

675 * during index build.)

676 */

677 while (stack->retry_from_parent)

678 {

679 if (xlocked)

680 LockBuffer(stack->buffer, GIST_UNLOCK);

681 xlocked = false;

682 ReleaseBuffer(stack->buffer);

683 state.stack = stack = stack->parent;

684 }

685

686 if (XLogRecPtrIsInvalid(stack->lsn))

687 stack->buffer = ReadBuffer(state.r, stack->blkno);

688

689 /*

690 * Be optimistic and grab shared lock first. Swap it for an exclusive

691 * lock later if we need to update the page.

692 */

693 if (!xlocked)

694 {

695 LockBuffer(stack->buffer, GIST_SHARE);

696 gistcheckpage(state.r, stack->buffer);

697 }

698

699 stack->page = BufferGetPage(stack->buffer);

700 stack->lsn = xlocked ?

701 PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);

702 Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));

703

704 /*

705 * If this page was split but the downlink was never inserted to the

706 * parent because the inserting backend crashed before doing that, fix

707 * that now.

708 */

709 if (GistFollowRight(stack->page))

710 {

711 if (!xlocked)

712 {

713 LockBuffer(stack->buffer, GIST_UNLOCK);

714 LockBuffer(stack->buffer, GIST_EXCLUSIVE);

715 xlocked = true;

716 /* someone might've completed the split when we unlocked */

717 if (!GistFollowRight(stack->page))

718 continue;

719 }

720 gistfixsplit(&state, giststate);

721

722 UnlockReleaseBuffer(stack->buffer);

723 xlocked = false;

724 state.stack = stack = stack->parent;

725 continue;

726 }

727

728 if ((stack->blkno != GIST_ROOT_BLKNO &&

729 stack->parent->lsn < GistPageGetNSN(stack->page)) ||

730 GistPageIsDeleted(stack->page))

731 {

732 /*

733 * Concurrent split or page deletion detected. There's no

734 * guarantee that the downlink for this page is consistent with

735 * the tuple we're inserting anymore, so go back to parent and

736 * rechoose the best child.

737 */

738 UnlockReleaseBuffer(stack->buffer);

739 xlocked = false;

740 state.stack = stack = stack->parent;

741 continue;

742 }

743

744 if (!GistPageIsLeaf(stack->page))

745 {

746 /*

747 * This is an internal page so continue to walk down the tree.

748 * Find the child node that has the minimum insertion penalty.

749 */

750 BlockNumber childblkno;

751 IndexTuple newtup;

752 GISTInsertStack *item;

753 OffsetNumber downlinkoffnum;

754

755 downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);

756 iid = PageGetItemId(stack->page, downlinkoffnum);

757 idxtuple = (IndexTuple) PageGetItem(stack->page, iid);

758 childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

759

760 /*

761 * Check that it's not a leftover invalid tuple from pre-9.1

762 */

763 if (GistTupleIsInvalid(idxtuple))

764 ereport(ERROR,

765 (errmsg("index \"%s\" contains an inner tuple marked as invalid",

766 RelationGetRelationName(r)),

767 errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),

768 errhint("Please REINDEX it.")));

769

770 /*

771 * Check that the key representing the target child node is

772 * consistent with the key we're inserting. Update it if it's not.

773 */

774 newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);

775 if (newtup)

776 {

777 /*

778 * Swap shared lock for an exclusive one. Beware, the page may

779 * change while we unlock/lock the page...

780 */

781 if (!xlocked)

782 {

783 LockBuffer(stack->buffer, GIST_UNLOCK);

784 LockBuffer(stack->buffer, GIST_EXCLUSIVE);

785 xlocked = true;

786 stack->page = BufferGetPage(stack->buffer);

787

788 if (PageGetLSN(stack->page) != stack->lsn)

789 {

790 /* the page was changed while we unlocked it, retry */

791 continue;

792 }

793 }

794

795 /*

796 * Update the tuple.

797 *

798 * We still hold the lock after gistinserttuple(), but it

799 * might have to split the page to make the updated tuple fit.

800 * In that case the updated tuple might migrate to the other

801 * half of the split, so we have to go back to the parent and

802 * descend back to the half that's a better fit for the new

803 * tuple.

804 */

805 if (gistinserttuple(&state, stack, giststate, newtup,

806 downlinkoffnum))

807 {

808 /*

809 * If this was a root split, the root page continues to be

810 * the parent and the updated tuple went to one of the

811 * child pages, so we just need to retry from the root

812 * page.

813 */

814 if (stack->blkno != GIST_ROOT_BLKNO)

815 {

816 UnlockReleaseBuffer(stack->buffer);

817 xlocked = false;

818 state.stack = stack = stack->parent;

819 }

820 continue;

821 }

822 }

823 LockBuffer(stack->buffer, GIST_UNLOCK);

824 xlocked = false;

825

826 /* descend to the chosen child */

827 item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));

828 item->blkno = childblkno;

829 item->parent = stack;

830 item->downlinkoffnum = downlinkoffnum;

831 state.stack = stack = item;

832 }

833 else

834 {

835 /*

836 * Leaf page. Insert the new key. We've already updated all the

837 * parents on the way down, but we might have to split the page if

838 * it doesn't fit. gistinserttuple() will take care of that.

839 */

840

841 /*

842 * Swap shared lock for an exclusive one. Be careful, the page may

843 * change while we unlock/lock the page...

844 */

845 if (!xlocked)

846 {

847 LockBuffer(stack->buffer, GIST_UNLOCK);

848 LockBuffer(stack->buffer, GIST_EXCLUSIVE);

849 xlocked = true;

850 stack->page = BufferGetPage(stack->buffer);

851 stack->lsn = PageGetLSN(stack->page);

852

853 if (stack->blkno == GIST_ROOT_BLKNO)

854 {

855 /*

856 * the only page that can become inner instead of leaf is

857 * the root page, so for root we should recheck it

858 */

859 if (!GistPageIsLeaf(stack->page))

860 {

861 /*

862 * very rare situation: during unlock/lock index with

863 * number of pages = 1 was increased

864 */

865 LockBuffer(stack->buffer, GIST_UNLOCK);

866 xlocked = false;

867 continue;

868 }

869

870 /*

871 * we don't need to check root split, because checking

872 * leaf/inner is enough to recognize split for root

873 */

874 }

875 else if ((GistFollowRight(stack->page) ||

876 stack->parent->lsn < GistPageGetNSN(stack->page)) ||

877 GistPageIsDeleted(stack->page))

878 {

879 /*

880 * The page was split or deleted while we momentarily

881 * unlocked the page. Go back to parent.

882 */

883 UnlockReleaseBuffer(stack->buffer);

884 xlocked = false;

885 state.stack = stack = stack->parent;

886 continue;

887 }

888 }

889

890 /* now state.stack->(page, buffer and blkno) points to leaf page */

891

892 gistinserttuple(&state, stack, giststate, itup,

893 InvalidOffsetNumber);

894 LockBuffer(stack->buffer, GIST_UNLOCK);

895

896 /* Release any pins we might still hold before exiting */

897 for (; stack; stack = stack->parent)

898 ReleaseBuffer(stack->buffer);

899 break;

900 }

901 }

902}

903

904/*

905 * Traverse the tree to find path from root page to specified "child" block.

906 *

907 * returns a new insertion stack, starting from the parent of "child", up

908 * to the root. *downlinkoffnum is set to the offset of the downlink in the

909 * direct parent of child.

910 *

911 * To prevent deadlocks, this should lock only one page at a time.

912 */

913static GISTInsertStack *

914 gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)

915{

916 Page page;

917 Buffer buffer;

918 OffsetNumber i,

919 maxoff;

920 ItemId iid;

921 IndexTuple idxtuple;

922 List *fifo;

923 GISTInsertStack *top,

924 *ptr;

925 BlockNumber blkno;

926

927 top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));

928 top->blkno = GIST_ROOT_BLKNO;

929 top->downlinkoffnum = InvalidOffsetNumber;

930

931 fifo = list_make1(top);

932 while (fifo != NIL)

933 {

934 /* Get next page to visit */

935 top = linitial(fifo);

936 fifo = list_delete_first(fifo);

937

938 buffer = ReadBuffer(r, top->blkno);

939 LockBuffer(buffer, GIST_SHARE);

940 gistcheckpage(r, buffer);

941 page = BufferGetPage(buffer);

942

943 if (GistPageIsLeaf(page))

944 {

945 /*

946 * Because we scan the index top-down, all the rest of the pages

947 * in the queue must be leaf pages as well.

948 */

949 UnlockReleaseBuffer(buffer);

950 break;

951 }

952

953 /* currently, internal pages are never deleted */

954 Assert(!GistPageIsDeleted(page));

955

956 top->lsn = BufferGetLSNAtomic(buffer);

957

958 /*

959 * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a

960 * downlink. This should not normally happen..

961 */

962 if (GistFollowRight(page))

963 elog(ERROR, "concurrent GiST page split was incomplete");

964

965 if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&

966 GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )

967 {

968 /*

969 * Page was split while we looked elsewhere. We didn't see the

970 * downlink to the right page when we scanned the parent, so add

971 * it to the queue now.

972 *

973 * Put the right page ahead of the queue, so that we visit it

974 * next. That's important, because if this is the lowest internal

975 * level, just above leaves, we might already have queued up some

976 * leaf pages, and we assume that there can't be any non-leaf

977 * pages behind leaf pages.

978 */

979 ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));

980 ptr->blkno = GistPageGetOpaque(page)->rightlink;

981 ptr->downlinkoffnum = InvalidOffsetNumber;

982 ptr->parent = top->parent;

983

984 fifo = lcons(ptr, fifo);

985 }

986

987 maxoff = PageGetMaxOffsetNumber(page);

988

989 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))

990 {

991 iid = PageGetItemId(page, i);

992 idxtuple = (IndexTuple) PageGetItem(page, iid);

993 blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

994 if (blkno == child)

995 {

996 /* Found it! */

997 UnlockReleaseBuffer(buffer);

998 *downlinkoffnum = i;

999 return top;

1000 }

1001 else

1002 {

1003 /* Append this child to the list of pages to visit later */

1004 ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));

1005 ptr->blkno = blkno;

1006 ptr->downlinkoffnum = i;

1007 ptr->parent = top;

1008

1009 fifo = lappend(fifo, ptr);

1010 }

1011 }

1012

1013 UnlockReleaseBuffer(buffer);

1014 }

1015

1016 elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",

1017 RelationGetRelationName(r), child);

1018 return NULL; /* keep compiler quiet */

1019}

1020

1021/*

1022 * Updates the stack so that child->parent is the correct parent of the

1023 * child. child->parent must be exclusively locked on entry, and will

1024 * remain so at exit, but it might not be the same page anymore.

1025 */

1026static void

1027 gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)

1028{

1029 GISTInsertStack *parent = child->parent;

1030 ItemId iid;

1031 IndexTuple idxtuple;

1032 OffsetNumber maxoff;

1033 GISTInsertStack *ptr;

1034

1035 gistcheckpage(r, parent->buffer);

1036 parent->page = BufferGetPage(parent->buffer);

1037 maxoff = PageGetMaxOffsetNumber(parent->page);

1038

1039 /* Check if the downlink is still where it was before */

1040 if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)

1041 {

1042 iid = PageGetItemId(parent->page, child->downlinkoffnum);

1043 idxtuple = (IndexTuple) PageGetItem(parent->page, iid);

1044 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)

1045 return; /* still there */

1046 }

1047

1048 /*

1049 * The page has changed since we looked. During normal operation, every

1050 * update of a page changes its LSN, so the LSN we memorized should have

1051 * changed too.

1052 *

1053 * During index build, however, we don't WAL-log the changes until we have

1054 * built the index, so the LSN doesn't change. There is no concurrent

1055 * activity during index build, but we might have changed the parent

1056 * ourselves.

1057 *

1058 * We will also get here if child->downlinkoffnum is invalid. That happens

1059 * if 'parent' had been updated by an earlier call to this function on its

1060 * grandchild, which had to move right.

1061 */

1062 Assert(parent->lsn != PageGetLSN(parent->page) || is_build ||

1063 child->downlinkoffnum == InvalidOffsetNumber);

1064

1065 /*

1066 * Scan the page to re-find the downlink. If the page was split, it might

1067 * have moved to a different page, so follow the right links until we find

1068 * it.

1069 */

1070 while (true)

1071 {

1072 OffsetNumber i;

1073

1074 maxoff = PageGetMaxOffsetNumber(parent->page);

1075 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))

1076 {

1077 iid = PageGetItemId(parent->page, i);

1078 idxtuple = (IndexTuple) PageGetItem(parent->page, iid);

1079 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)

1080 {

1081 /* yes!!, found */

1082 child->downlinkoffnum = i;

1083 return;

1084 }

1085 }

1086

1087 parent->blkno = GistPageGetOpaque(parent->page)->rightlink;

1088 parent->downlinkoffnum = InvalidOffsetNumber;

1089 UnlockReleaseBuffer(parent->buffer);

1090 if (parent->blkno == InvalidBlockNumber)

1091 {

1092 /*

1093 * End of chain and still didn't find parent. It's a very-very

1094 * rare situation when the root was split.

1095 */

1096 break;

1097 }

1098 parent->buffer = ReadBuffer(r, parent->blkno);

1099 LockBuffer(parent->buffer, GIST_EXCLUSIVE);

1100 gistcheckpage(r, parent->buffer);

1101 parent->page = BufferGetPage(parent->buffer);

1102 }

1103

1104 /*

1105 * awful!!, we need search tree to find parent ... , but before we should

1106 * release all old parent

1107 */

1108

1109 ptr = child->parent->parent; /* child->parent already released above */

1110 while (ptr)

1111 {

1112 ReleaseBuffer(ptr->buffer);

1113 ptr = ptr->parent;

1114 }

1115

1116 /* ok, find new path */

1117 ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);

1118

1119 /* read all buffers as expected by caller */

1120 /* note we don't lock them or gistcheckpage them here! */

1121 while (ptr)

1122 {

1123 ptr->buffer = ReadBuffer(r, ptr->blkno);

1124 ptr->page = BufferGetPage(ptr->buffer);

1125 ptr = ptr->parent;

1126 }

1127

1128 /* install new chain of parents to stack */

1129 child->parent = parent;

1130

1131 /* make recursive call to normal processing */

1132 LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);

1133 gistFindCorrectParent(r, child, is_build);

1134}

1135

1136/*

1137 * Form a downlink pointer for the page in 'buf'.

1138 */

1139static IndexTuple

1140 gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,

1141 GISTInsertStack *stack, bool is_build)

1142{

1143 Page page = BufferGetPage(buf);

1144 OffsetNumber maxoff;

1145 OffsetNumber offset;

1146 IndexTuple downlink = NULL;

1147

1148 maxoff = PageGetMaxOffsetNumber(page);

1149 for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))

1150 {

1151 IndexTuple ituple = (IndexTuple)

1152 PageGetItem(page, PageGetItemId(page, offset));

1153

1154 if (downlink == NULL)

1155 downlink = CopyIndexTuple(ituple);

1156 else

1157 {

1158 IndexTuple newdownlink;

1159

1160 newdownlink = gistgetadjusted(rel, downlink, ituple,

1161 giststate);

1162 if (newdownlink)

1163 downlink = newdownlink;

1164 }

1165 }

1166

1167 /*

1168 * If the page is completely empty, we can't form a meaningful downlink

1169 * for it. But we have to insert a downlink for the page. Any key will do,

1170 * as long as its consistent with the downlink of parent page, so that we

1171 * can legally insert it to the parent. A minimal one that matches as few

1172 * scans as possible would be best, to keep scans from doing useless work,

1173 * but we don't know how to construct that. So we just use the downlink of

1174 * the original page that was split - that's as far from optimal as it can

1175 * get but will do..

1176 */

1177 if (!downlink)

1178 {

1179 ItemId iid;

1180

1181 LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);

1182 gistFindCorrectParent(rel, stack, is_build);

1183 iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);

1184 downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);

1185 downlink = CopyIndexTuple(downlink);

1186 LockBuffer(stack->parent->buffer, GIST_UNLOCK);

1187 }

1188

1189 ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));

1190 GistTupleSetValid(downlink);

1191

1192 return downlink;

1193}

1194

1195

1196/*

1197 * Complete the incomplete split of state->stack->page.

1198 */

1199static void

1200 gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)

1201{

1202 GISTInsertStack *stack = state->stack;

1203 Buffer buf;

1204 Page page;

1205 List *splitinfo = NIL;

1206

1207 ereport(LOG,

1208 (errmsg("fixing incomplete split in index \"%s\", block %u",

1209 RelationGetRelationName(state->r), stack->blkno)));

1210

1211 Assert(GistFollowRight(stack->page));

1212 Assert(OffsetNumberIsValid(stack->downlinkoffnum));

1213

1214 buf = stack->buffer;

1215

1216 /*

1217 * Read the chain of split pages, following the rightlinks. Construct a

1218 * downlink tuple for each page.

1219 */

1220 for (;;)

1221 {

1222 GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));

1223 IndexTuple downlink;

1224

1225 page = BufferGetPage(buf);

1226

1227 /* Form the new downlink tuples to insert to parent */

1228 downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);

1229

1230 si->buf = buf;

1231 si->downlink = downlink;

1232

1233 splitinfo = lappend(splitinfo, si);

1234

1235 if (GistFollowRight(page))

1236 {

1237 /* lock next page */

1238 buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);

1239 LockBuffer(buf, GIST_EXCLUSIVE);

1240 }

1241 else

1242 break;

1243 }

1244

1245 /* Insert the downlinks */

1246 gistfinishsplit(state, stack, giststate, splitinfo, false);

1247}

1248

1249/*

1250 * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the

1251 * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.

1252 * 'stack' represents the path from the root to the page being updated.

1253 *

1254 * The caller must hold an exclusive lock on stack->buffer. The lock is still

1255 * held on return, but the page might not contain the inserted tuple if the

1256 * page was split. The function returns true if the page was split, false

1257 * otherwise.

1258 */

1259static bool

1260 gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,

1261 GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)

1262{

1263 return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,

1264 InvalidBuffer, InvalidBuffer, false, false);

1265}

1266

1267/* ----------------

1268 * An extended workhorse version of gistinserttuple(). This version allows

1269 * inserting multiple tuples, or replacing a single tuple with multiple tuples.

1270 * This is used to recursively update the downlinks in the parent when a page

1271 * is split.

1272 *

1273 * If leftchild and rightchild are valid, we're inserting/replacing the

1274 * downlink for rightchild, and leftchild is its left sibling. We clear the

1275 * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the

1276 * insertion of the downlink.

1277 *

1278 * To avoid holding locks for longer than necessary, when recursing up the

1279 * tree to update the parents, the locking is a bit peculiar here. On entry,

1280 * the caller must hold an exclusive lock on stack->buffer, as well as

1281 * leftchild and rightchild if given. On return:

1282 *

1283 * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is

1284 * always kept pinned, however.

1285 * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page

1286 * is kept pinned.

1287 * - Lock and pin on 'rightchild' are always released.

1288 *

1289 * Returns 'true' if the page had to be split. Note that if the page was

1290 * split, the inserted/updated tuples might've been inserted to a right

1291 * sibling of stack->buffer instead of stack->buffer itself.

1292 */

1293static bool

1294 gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,

1295 GISTSTATE *giststate,

1296 IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,

1297 Buffer leftchild, Buffer rightchild,

1298 bool unlockbuf, bool unlockleftchild)

1299{

1300 List *splitinfo;

1301 bool is_split;

1302

1303 /*

1304 * Check for any rw conflicts (in serializable isolation level) just

1305 * before we intend to modify the page

1306 */

1307 CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));

1308

1309 /* Insert the tuple(s) to the page, splitting the page if necessary */

1310 is_split = gistplacetopage(state->r, state->freespace, giststate,

1311 stack->buffer,

1312 tuples, ntup,

1313 oldoffnum, NULL,

1314 leftchild,

1315 &splitinfo,

1316 true,

1317 state->heapRel,

1318 state->is_build);

1319

1320 /*

1321 * Before recursing up in case the page was split, release locks on the

1322 * child pages. We don't need to keep them locked when updating the

1323 * parent.

1324 */

1325 if (BufferIsValid(rightchild))

1326 UnlockReleaseBuffer(rightchild);

1327 if (BufferIsValid(leftchild) && unlockleftchild)

1328 LockBuffer(leftchild, GIST_UNLOCK);

1329

1330 /*

1331 * If we had to split, insert/update the downlinks in the parent. If the

1332 * caller requested us to release the lock on stack->buffer, tell

1333 * gistfinishsplit() to do that as soon as it's safe to do so. If we

1334 * didn't have to split, release it ourselves.

1335 */

1336 if (splitinfo)

1337 gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);

1338 else if (unlockbuf)

1339 LockBuffer(stack->buffer, GIST_UNLOCK);

1340

1341 return is_split;

1342}

1343

1344/*

1345 * Finish an incomplete split by inserting/updating the downlinks in parent

1346 * page. 'splitinfo' contains all the child pages involved in the split,

1347 * from left-to-right.

1348 *

1349 * On entry, the caller must hold a lock on stack->buffer and all the child

1350 * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is

1351 * released on return. The child pages are always unlocked and unpinned.

1352 */

1353static void

1354 gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,

1355 GISTSTATE *giststate, List *splitinfo, bool unlockbuf)

1356{

1357 GISTPageSplitInfo *right;

1358 GISTPageSplitInfo *left;

1359 IndexTuple tuples[2];

1360

1361 /* A split always contains at least two halves */

1362 Assert(list_length(splitinfo) >= 2);

1363

1364 /*

1365 * We need to insert downlinks for each new page, and update the downlink

1366 * for the original (leftmost) page in the split. Begin at the rightmost

1367 * page, inserting one downlink at a time until there's only two pages

1368 * left. Finally insert the downlink for the last new page and update the

1369 * downlink for the original page as one operation.

1370 */

1371 LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);

1372

1373 /*

1374 * Insert downlinks for the siblings from right to left, until there are

1375 * only two siblings left.

1376 */

1377 for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)

1378 {

1379 right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);

1380 left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);

1381

1382 gistFindCorrectParent(state->r, stack, state->is_build);

1383 if (gistinserttuples(state, stack->parent, giststate,

1384 &right->downlink, 1,

1385 InvalidOffsetNumber,

1386 left->buf, right->buf, false, false))

1387 {

1388 /*

1389 * If the parent page was split, the existing downlink might have

1390 * moved.

1391 */

1392 stack->downlinkoffnum = InvalidOffsetNumber;

1393 }

1394 /* gistinserttuples() released the lock on right->buf. */

1395 }

1396

1397 right = (GISTPageSplitInfo *) lsecond(splitinfo);

1398 left = (GISTPageSplitInfo *) linitial(splitinfo);

1399

1400 /*

1401 * Finally insert downlink for the remaining right page and update the

1402 * downlink for the original page to not contain the tuples that were

1403 * moved to the new pages.

1404 */

1405 tuples[0] = left->downlink;

1406 tuples[1] = right->downlink;

1407 gistFindCorrectParent(state->r, stack, state->is_build);

1408 (void) gistinserttuples(state, stack->parent, giststate,

1409 tuples, 2,

1410 stack->downlinkoffnum,

1411 left->buf, right->buf,

1412 true, /* Unlock parent */

1413 unlockbuf /* Unlock stack->buffer if caller

1414 * wants that */

1415 );

1416

1417 /*

1418 * The downlink might have moved when we updated it. Even if the page

1419 * wasn't split, because gistinserttuples() implements updating the old

1420 * tuple by removing and re-inserting it!

1421 */

1422 stack->downlinkoffnum = InvalidOffsetNumber;

1423

1424 Assert(left->buf == stack->buffer);

1425

1426 /*

1427 * If we split the page because we had to adjust the downlink on an

1428 * internal page, while descending the tree for inserting a new tuple,

1429 * then this might no longer be the correct page for the new tuple. The

1430 * downlink to this page might not cover the new tuple anymore, it might

1431 * need to go to the newly-created right sibling instead. Tell the caller

1432 * to walk back up the stack, to re-check at the parent which page to

1433 * insert to.

1434 *

1435 * Normally, the LSN-NSN interlock during the tree descend would also

1436 * detect that a concurrent split happened (by ourselves), and cause us to

1437 * retry at the parent. But that mechanism doesn't work during index

1438 * build, because we don't do WAL-logging, and don't update LSNs, during

1439 * index build.

1440 */

1441 stack->retry_from_parent = true;

1442}

1443

1444/*

1445 * gistSplit -- split a page in the tree and fill struct

1446 * used for XLOG and real writes buffers. Function is recursive, ie

1447 * it will split page until keys will fit in every page.

1448 */

1449SplitPageLayout *

1450 gistSplit(Relation r,

1451 Page page,

1452 IndexTuple *itup, /* contains compressed entry */

1453 int len,

1454 GISTSTATE *giststate)

1455{

1456 IndexTuple *lvectup,

1457 *rvectup;

1458 GistSplitVector v;

1459 int i;

1460 SplitPageLayout *res = NULL;

1461

1462 /* this should never recurse very deeply, but better safe than sorry */

1463 check_stack_depth();

1464

1465 /* there's no point in splitting an empty page */

1466 Assert(len > 0);

1467

1468 /*

1469 * If a single tuple doesn't fit on a page, no amount of splitting will

1470 * help.

1471 */

1472 if (len == 1)

1473 ereport(ERROR,

1474 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),

1475 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",

1476 IndexTupleSize(itup[0]), GiSTPageSize,

1477 RelationGetRelationName(r))));

1478

1479 memset(v.spl_lisnull, true,

1480 sizeof(bool) * giststate->nonLeafTupdesc->natts);

1481 memset(v.spl_risnull, true,

1482 sizeof(bool) * giststate->nonLeafTupdesc->natts);

1483 gistSplitByKey(r, page, itup, len, giststate, &v, 0);

1484

1485 /* form left and right vector */

1486 lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));

1487 rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));

1488

1489 for (i = 0; i < v.splitVector.spl_nleft; i++)

1490 lvectup[i] = itup[v.splitVector.spl_left[i] - 1];

1491

1492 for (i = 0; i < v.splitVector.spl_nright; i++)

1493 rvectup[i] = itup[v.splitVector.spl_right[i] - 1];

1494

1495 /* finalize splitting (may need another split) */

1496 if (!gistfitpage(rvectup, v.splitVector.spl_nright))

1497 {

1498 res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);

1499 }

1500 else

1501 {

1502 ROTATEDIST(res);

1503 res->block.num = v.splitVector.spl_nright;

1504 res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));

1505 res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);

1506 }

1507

1508 if (!gistfitpage(lvectup, v.splitVector.spl_nleft))

1509 {

1510 SplitPageLayout *resptr,

1511 *subres;

1512

1513 resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);

1514

1515 /* install on list's tail */

1516 while (resptr->next)

1517 resptr = resptr->next;

1518

1519 resptr->next = res;

1520 res = subres;

1521 }

1522 else

1523 {

1524 ROTATEDIST(res);

1525 res->block.num = v.splitVector.spl_nleft;

1526 res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));

1527 res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);

1528 }

1529

1530 return res;

1531}

1532

1533/*

1534 * Create a GISTSTATE and fill it with information about the index

1535 */

1536GISTSTATE *

1537 initGISTstate(Relation index)

1538{

1539 GISTSTATE *giststate;

1540 MemoryContext scanCxt;

1541 MemoryContext oldCxt;

1542 int i;

1543

1544 /* safety check to protect fixed-size arrays in GISTSTATE */

1545 if (index->rd_att->natts > INDEX_MAX_KEYS)

1546 elog(ERROR, "numberOfAttributes %d > %d",

1547 index->rd_att->natts, INDEX_MAX_KEYS);

1548

1549 /* Create the memory context that will hold the GISTSTATE */

1550 scanCxt = AllocSetContextCreate(CurrentMemoryContext,

1551 "GiST scan context",

1552 ALLOCSET_DEFAULT_SIZES);

1553 oldCxt = MemoryContextSwitchTo(scanCxt);

1554

1555 /* Create and fill in the GISTSTATE */

1556 giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));

1557

1558 giststate->scanCxt = scanCxt;

1559 giststate->tempCxt = scanCxt; /* caller must change this if needed */

1560 giststate->leafTupdesc = index->rd_att;

1561

1562 /*

1563 * The truncated tupdesc for non-leaf index tuples, which doesn't contain

1564 * the INCLUDE attributes.

1565 *

1566 * It is used to form tuples during tuple adjustment and page split.

1567 * B-tree creates shortened tuple descriptor for every truncated tuple,

1568 * because it is doing this less often: it does not have to form truncated

1569 * tuples during page split. Also, B-tree is not adjusting tuples on

1570 * internal pages the way GiST does.

1571 */

1572 giststate->nonLeafTupdesc = CreateTupleDescTruncatedCopy(index->rd_att,

1573 IndexRelationGetNumberOfKeyAttributes(index));

1574

1575 for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)

1576 {

1577 fmgr_info_copy(&(giststate->consistentFn[i]),

1578 index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),

1579 scanCxt);

1580 fmgr_info_copy(&(giststate->unionFn[i]),

1581 index_getprocinfo(index, i + 1, GIST_UNION_PROC),

1582 scanCxt);

1583

1584 /* opclasses are not required to provide a Compress method */

1585 if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))

1586 fmgr_info_copy(&(giststate->compressFn[i]),

1587 index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),

1588 scanCxt);

1589 else

1590 giststate->compressFn[i].fn_oid = InvalidOid;

1591

1592 /* opclasses are not required to provide a Decompress method */

1593 if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))

1594 fmgr_info_copy(&(giststate->decompressFn[i]),

1595 index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),

1596 scanCxt);

1597 else

1598 giststate->decompressFn[i].fn_oid = InvalidOid;

1599

1600 fmgr_info_copy(&(giststate->penaltyFn[i]),

1601 index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),

1602 scanCxt);

1603 fmgr_info_copy(&(giststate->picksplitFn[i]),

1604 index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),

1605 scanCxt);

1606 fmgr_info_copy(&(giststate->equalFn[i]),

1607 index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),

1608 scanCxt);

1609

1610 /* opclasses are not required to provide a Distance method */

1611 if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))

1612 fmgr_info_copy(&(giststate->distanceFn[i]),

1613 index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),

1614 scanCxt);

1615 else

1616 giststate->distanceFn[i].fn_oid = InvalidOid;

1617

1618 /* opclasses are not required to provide a Fetch method */

1619 if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))

1620 fmgr_info_copy(&(giststate->fetchFn[i]),

1621 index_getprocinfo(index, i + 1, GIST_FETCH_PROC),

1622 scanCxt);

1623 else

1624 giststate->fetchFn[i].fn_oid = InvalidOid;

1625

1626 /*

1627 * If the index column has a specified collation, we should honor that

1628 * while doing comparisons. However, we may have a collatable storage

1629 * type for a noncollatable indexed data type. If there's no index

1630 * collation then specify default collation in case the support

1631 * functions need collation. This is harmless if the support

1632 * functions don't care about collation, so we just do it

1633 * unconditionally. (We could alternatively call get_typcollation,

1634 * but that seems like expensive overkill --- there aren't going to be

1635 * any cases where a GiST storage type has a nondefault collation.)

1636 */

1637 if (OidIsValid(index->rd_indcollation[i]))

1638 giststate->supportCollation[i] = index->rd_indcollation[i];

1639 else

1640 giststate->supportCollation[i] = DEFAULT_COLLATION_OID;

1641 }

1642

1643 /* No opclass information for INCLUDE attributes */

1644 for (; i < index->rd_att->natts; i++)

1645 {

1646 giststate->consistentFn[i].fn_oid = InvalidOid;

1647 giststate->unionFn[i].fn_oid = InvalidOid;

1648 giststate->compressFn[i].fn_oid = InvalidOid;

1649 giststate->decompressFn[i].fn_oid = InvalidOid;

1650 giststate->penaltyFn[i].fn_oid = InvalidOid;

1651 giststate->picksplitFn[i].fn_oid = InvalidOid;

1652 giststate->equalFn[i].fn_oid = InvalidOid;

1653 giststate->distanceFn[i].fn_oid = InvalidOid;

1654 giststate->fetchFn[i].fn_oid = InvalidOid;

1655 giststate->supportCollation[i] = InvalidOid;

1656 }

1657

1658 MemoryContextSwitchTo(oldCxt);

1659

1660 return giststate;

1661}

1662

1663void

1664 freeGISTstate(GISTSTATE *giststate)

1665{

1666 /* It's sufficient to delete the scanCxt */

1667 MemoryContextDelete(giststate->scanCxt);

1668}

1669

1670/*

1671 * gistprunepage() -- try to remove LP_DEAD items from the given page.

1672 * Function assumes that buffer is exclusively locked.

1673 */

1674static void

1675 gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)

1676{

1677 OffsetNumber deletable[MaxIndexTuplesPerPage];

1678 int ndeletable = 0;

1679 OffsetNumber offnum,

1680 maxoff;

1681

1682 Assert(GistPageIsLeaf(page));

1683

1684 /*

1685 * Scan over all items to see which ones need to be deleted according to

1686 * LP_DEAD flags.

1687 */

1688 maxoff = PageGetMaxOffsetNumber(page);

1689 for (offnum = FirstOffsetNumber;

1690 offnum <= maxoff;

1691 offnum = OffsetNumberNext(offnum))

1692 {

1693 ItemId itemId = PageGetItemId(page, offnum);

1694

1695 if (ItemIdIsDead(itemId))

1696 deletable[ndeletable++] = offnum;

1697 }

1698

1699 if (ndeletable > 0)

1700 {

1701 TransactionId snapshotConflictHorizon = InvalidTransactionId;

1702

1703 if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))

1704 snapshotConflictHorizon =

1705 index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,

1706 deletable, ndeletable);

1707

1708 START_CRIT_SECTION();

1709

1710 PageIndexMultiDelete(page, deletable, ndeletable);

1711

1712 /*

1713 * Mark the page as not containing any LP_DEAD items. This is not

1714 * certainly true (there might be some that have recently been marked,

1715 * but weren't included in our target-item list), but it will almost

1716 * always be true and it doesn't seem worth an additional page scan to

1717 * check it. Remember that F_HAS_GARBAGE is only a hint anyway.

1718 */

1719 GistClearPageHasGarbage(page);

1720

1721 MarkBufferDirty(buffer);

1722

1723 /* XLOG stuff */

1724 if (RelationNeedsWAL(rel))

1725 {

1726 XLogRecPtr recptr;

1727

1728 recptr = gistXLogDelete(buffer,

1729 deletable, ndeletable,

1730 snapshotConflictHorizon,

1731 heapRel);

1732

1733 PageSetLSN(page, recptr);

1734 }

1735 else

1736 PageSetLSN(page, gistGetFakeLSN(rel));

1737

1738 END_CRIT_SECTION();

1739 }

1740

1741 /*

1742 * Note: if we didn't find any LP_DEAD items, then the page's

1743 * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a

1744 * separate write to clear it, however. We will clear it when we split

1745 * the page.

1746 */

1747}

BlockNumber

uint32 BlockNumber

Definition: block.h:31

InvalidBlockNumber

#define InvalidBlockNumber

Definition: block.h:33

values

static Datum values[MAXATTR]

Definition: bootstrap.c:153

Buffer

int Buffer

Definition: buf.h:23

InvalidBuffer

#define InvalidBuffer

Definition: buf.h:25

BufferGetBlockNumber

BlockNumber BufferGetBlockNumber(Buffer buffer)

Definition: bufmgr.c:4198

ExtendBufferedRel

Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)

Definition: bufmgr.c:858

ReleaseBuffer

void ReleaseBuffer(Buffer buffer)

Definition: bufmgr.c:5338

BufferGetLSNAtomic

XLogRecPtr BufferGetLSNAtomic(Buffer buffer)

Definition: bufmgr.c:4460

UnlockReleaseBuffer

void UnlockReleaseBuffer(Buffer buffer)

Definition: bufmgr.c:5355

MarkBufferDirty

void MarkBufferDirty(Buffer buffer)

Definition: bufmgr.c:2921

LockBuffer

void LockBuffer(Buffer buffer, int mode)

Definition: bufmgr.c:5572

ReadBuffer

Buffer ReadBuffer(Relation reln, BlockNumber blockNum)

Definition: bufmgr.c:758

BufferGetPage

static Page BufferGetPage(Buffer buffer)

Definition: bufmgr.h:417

EB_SKIP_EXTENSION_LOCK

@ EB_SKIP_EXTENSION_LOCK

Definition: bufmgr.h:75

EB_LOCK_FIRST

@ EB_LOCK_FIRST

Definition: bufmgr.h:87

BMR_REL

#define BMR_REL(p_rel)

Definition: bufmgr.h:111

BufferIsValid

static bool BufferIsValid(Buffer bufnum)

Definition: bufmgr.h:368

PageRestoreTempPage

void PageRestoreTempPage(Page tempPage, Page oldPage)

Definition: bufpage.c:423

PageIndexMultiDelete

void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)

Definition: bufpage.c:1160

PageIndexTupleOverwrite

bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)

Definition: bufpage.c:1404

PageIndexTupleDelete

void PageIndexTupleDelete(Page page, OffsetNumber offnum)

Definition: bufpage.c:1051

PageGetTempPageCopySpecial

Page PageGetTempPageCopySpecial(const PageData *page)

Definition: bufpage.c:401

PageGetItem

static Item PageGetItem(const PageData *page, const ItemIdData *itemId)

Definition: bufpage.h:354

PageGetItemId

static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)

Definition: bufpage.h:244

PageSetLSN

static void PageSetLSN(Page page, XLogRecPtr lsn)

Definition: bufpage.h:391

Page

PageData * Page

Definition: bufpage.h:82

PageGetLSN

static XLogRecPtr PageGetLSN(const PageData *page)

Definition: bufpage.h:386

PageAddItem

#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)

Definition: bufpage.h:472

PageGetMaxOffsetNumber

static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)

Definition: bufpage.h:372

TransactionId

uint32 TransactionId

Definition: c.h:657

OidIsValid

#define OidIsValid(objectId)

Definition: c.h:774

Size

size_t Size

Definition: c.h:610

errdetail

int errdetail(const char *fmt,...)

Definition: elog.c:1207

errhint

int errhint(const char *fmt,...)

Definition: elog.c:1321

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

LOG

#define LOG

Definition: elog.h:31

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ereport

#define ereport(elevel,...)

Definition: elog.h:150

execnodes.h

fmgr_info_copy

void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)

Definition: fmgr.c:581

PG_RETURN_POINTER

#define PG_RETURN_POINTER(x)

Definition: fmgr.h:361

PG_FUNCTION_ARGS

#define PG_FUNCTION_ARGS

Definition: fmgr.h:193

rightchild

#define rightchild(x)

Definition: fsmpage.c:30

leftchild

#define leftchild(x)

Definition: fsmpage.c:29

index_compute_xid_horizon_for_tuples

TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)

Definition: genam.c:295

IndexUniqueCheck

Definition: genam.h:143

gistSplit

SplitPageLayout * gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate)

Definition: gist.c:1450

initGISTstate

GISTSTATE * initGISTstate(Relation index)

Definition: gist.c:1537

gistFindPath

static GISTInsertStack * gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)

Definition: gist.c:914

gistdoinsert

void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate, Relation heapRel, bool is_build)

Definition: gist.c:639

gistfixsplit

static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)

Definition: gist.c:1200

gistprunepage

static void gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)

Definition: gist.c:1675

gistplacetopage

bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer buffer, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, bool markfollowright, Relation heapRel, bool is_build)

Definition: gist.c:230

gistinsert

bool gistinsert(Relation r, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)

Definition: gist.c:165

gistbuildempty

void gistbuildempty(Relation index)

Definition: gist.c:139

gistinserttuples

static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple *tuples, int ntup, OffsetNumber oldoffnum, Buffer leftchild, Buffer rightchild, bool unlockbuf, bool unlockleftchild)

Definition: gist.c:1294

createTempGistContext

MemoryContext createTempGistContext(void)

Definition: gist.c:128

freeGISTstate

void freeGISTstate(GISTSTATE *giststate)

Definition: gist.c:1664

gistinserttuple

static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)

Definition: gist.c:1260

ROTATEDIST

#define ROTATEDIST(d)

Definition: gist.c:45

gistfinishsplit

static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, List *splitinfo, bool unlockbuf)

Definition: gist.c:1354

gistFindCorrectParent

static void gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)

Definition: gist.c:1027

gistformdownlink

static IndexTuple gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate, GISTInsertStack *stack, bool is_build)

Definition: gist.c:1140

gisthandler

Datum gisthandler(PG_FUNCTION_ARGS)

Definition: gist.c:59

GIST_DECOMPRESS_PROC

#define GIST_DECOMPRESS_PROC

Definition: gist.h:35

GIST_PICKSPLIT_PROC

#define GIST_PICKSPLIT_PROC

Definition: gist.h:37

GistMarkFollowRight

#define GistMarkFollowRight(page)

Definition: gist.h:184

F_LEAF

#define F_LEAF

Definition: gist.h:49

GIST_CONSISTENT_PROC

#define GIST_CONSISTENT_PROC

Definition: gist.h:32

GistClearFollowRight

#define GistClearFollowRight(page)

Definition: gist.h:185

GIST_UNION_PROC

#define GIST_UNION_PROC

Definition: gist.h:33

GIST_FETCH_PROC

#define GIST_FETCH_PROC

Definition: gist.h:40

GIST_COMPRESS_PROC

#define GIST_COMPRESS_PROC

Definition: gist.h:34

GISTNProcs

#define GISTNProcs

Definition: gist.h:44

GistClearPageHasGarbage

#define GistClearPageHasGarbage(page)

Definition: gist.h:181

GIST_PENALTY_PROC

#define GIST_PENALTY_PROC

Definition: gist.h:36

GistPageIsLeaf

#define GistPageIsLeaf(page)

Definition: gist.h:170

GistFollowRight

#define GistFollowRight(page)

Definition: gist.h:183

GIST_OPTIONS_PROC

#define GIST_OPTIONS_PROC

Definition: gist.h:41

GIST_DISTANCE_PROC

#define GIST_DISTANCE_PROC

Definition: gist.h:39

GistPageSetNSN

#define GistPageSetNSN(page, val)

Definition: gist.h:188

GistPageIsDeleted

#define GistPageIsDeleted(page)

Definition: gist.h:173

GistPageGetOpaque

#define GistPageGetOpaque(page)

Definition: gist.h:168

GIST_EQUAL_PROC

#define GIST_EQUAL_PROC

Definition: gist.h:38

GistPageHasGarbage

#define GistPageHasGarbage(page)

Definition: gist.h:179

GistPageGetNSN

#define GistPageGetNSN(page)

Definition: gist.h:187

GistNSN

XLogRecPtr GistNSN

Definition: gist.h:63

GistBuildLSN

#define GistBuildLSN

Definition: gist.h:70

gist_private.h

GIST_MAX_SPLIT_PAGES

#define GIST_MAX_SPLIT_PAGES

Definition: gist_private.h:39

GistTupleSetValid

#define GistTupleSetValid(itup)

Definition: gist_private.h:289

GIST_UNLOCK

#define GIST_UNLOCK

Definition: gist_private.h:44

GIST_ROOT_BLKNO

#define GIST_ROOT_BLKNO

Definition: gist_private.h:262

GIST_EXCLUSIVE

#define GIST_EXCLUSIVE

Definition: gist_private.h:43

GiSTPageSize

#define GiSTPageSize

Definition: gist_private.h:476

GistTupleIsInvalid

#define GistTupleIsInvalid(itup)

Definition: gist_private.h:288

GIST_SHARE

#define GIST_SHARE

Definition: gist_private.h:42

gistbuild

IndexBuildResult * gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)

Definition: gistbuild.c:179

gistgettuple

bool gistgettuple(IndexScanDesc scan, ScanDirection dir)

Definition: gistget.c:612

gistgetbitmap

int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)

Definition: gistget.c:745

gistcanreturn

bool gistcanreturn(Relation index, int attno)

Definition: gistget.c:797

gistbeginscan

IndexScanDesc gistbeginscan(Relation r, int nkeys, int norderbys)

Definition: gistscan.c:74

gistendscan

void gistendscan(IndexScanDesc scan)

Definition: gistscan.c:347

gistrescan

void gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, ScanKey orderbys, int norderbys)

Definition: gistscan.c:127

gistscan.h

gistSplitByKey

void gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, GistSplitVector *v, int attno)

Definition: gistsplit.c:623

gistoptions

bytea * gistoptions(Datum reloptions, bool validate)

Definition: gistutil.c:912

gistNewBuffer

Buffer gistNewBuffer(Relation r, Relation heaprel)

Definition: gistutil.c:824

gistproperty

bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)

Definition: gistutil.c:933

gistnospace

bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)

Definition: gistutil.c:59

gistfillbuffer

void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)

Definition: gistutil.c:34

gistFormTuple

IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)

Definition: gistutil.c:575

gistextractpage

IndexTuple * gistextractpage(Page page, int *len)

Definition: gistutil.c:95

gistfitpage

bool gistfitpage(IndexTuple *itvec, int len)

Definition: gistutil.c:79

gistgetadjusted

IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)

Definition: gistutil.c:316

gistchoose

OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)

Definition: gistutil.c:374

gistGetFakeLSN

XLogRecPtr gistGetFakeLSN(Relation rel)

Definition: gistutil.c:1016

gistjoinvector

IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)

Definition: gistutil.c:114

GISTInitBuffer

void GISTInitBuffer(Buffer b, uint32 f)

Definition: gistutil.c:773

gisttranslatecmptype

StrategyNumber gisttranslatecmptype(CompareType cmptype, Oid opfamily)

Definition: gistutil.c:1098

gistcheckpage

void gistcheckpage(Relation rel, Buffer buf)

Definition: gistutil.c:785

gistfillitupvec

IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)

Definition: gistutil.c:127

gistvacuumcleanup

IndexBulkDeleteResult * gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)

Definition: gistvacuum.c:75

gistbulkdelete

IndexBulkDeleteResult * gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)

Definition: gistvacuum.c:59

gistadjustmembers

void gistadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)

Definition: gistvalidate.c:288

gistvalidate

bool gistvalidate(Oid opclassoid)

Definition: gistvalidate.c:32

gistXLogSplit

XLogRecPtr gistXLogSplit(bool page_is_leaf, SplitPageLayout *dist, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright)

Definition: gistxlog.c:495

gistXLogDelete

XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete, int ntodelete, TransactionId snapshotConflictHorizon, Relation heaprel)

Definition: gistxlog.c:670

gistXLogUpdate

XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ituplen, Buffer leftchildbuf)

Definition: gistxlog.c:629

Assert

Assert(PointerIsAligned(start, uint64))

for

for(;;)

Definition: hashfn_unstable.h:265

index_selfuncs.h

index_getprocinfo

FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)

Definition: indexam.c:917

index_getprocid

RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)

Definition: indexam.c:883

CopyIndexTuple

IndexTuple CopyIndexTuple(IndexTuple source)

Definition: indextuple.c:547

i

int i

Definition: isn.c:77

if

if(TABLE==NULL||TABLE_index==NULL)

Definition: isn.c:81

Item

Pointer Item

Definition: item.h:17

ItemIdIsDead

#define ItemIdIsDead(itemId)

Definition: itemid.h:113

ItemPointerEquals

bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)

Definition: itemptr.c:35

ItemPointerSetBlockNumber

static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)

Definition: itemptr.h:147

ItemPointerGetBlockNumber

static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)

Definition: itemptr.h:103

IndexTuple

IndexTupleData * IndexTuple

Definition: itup.h:53

IndexTupleSize

static Size IndexTupleSize(const IndexTupleData *itup)

Definition: itup.h:71

MaxIndexTuplesPerPage

#define MaxIndexTuplesPerPage

Definition: itup.h:181

lappend

List * lappend(List *list, void *datum)

Definition: list.c:339

list_delete_first

List * list_delete_first(List *list)

Definition: list.c:943

lcons

List * lcons(void *datum, List *list)

Definition: list.c:495

MemoryContextReset

void MemoryContextReset(MemoryContext context)

Definition: mcxt.c:400

palloc0

void * palloc0(Size size)

Definition: mcxt.c:1395

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

CurrentMemoryContext

MemoryContext CurrentMemoryContext

Definition: mcxt.c:160

MemoryContextDelete

void MemoryContextDelete(MemoryContext context)

Definition: mcxt.c:469

memutils.h

AllocSetContextCreate

#define AllocSetContextCreate

Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES

#define ALLOCSET_DEFAULT_SIZES

Definition: memutils.h:160

miscadmin.h

START_CRIT_SECTION

#define START_CRIT_SECTION()

Definition: miscadmin.h:149

END_CRIT_SECTION

#define END_CRIT_SECTION()

Definition: miscadmin.h:151

makeNode

#define makeNode(_type_)

Definition: nodes.h:161

InvalidOffsetNumber

#define InvalidOffsetNumber

Definition: off.h:26

OffsetNumberIsValid

#define OffsetNumberIsValid(offsetNumber)

Definition: off.h:39

OffsetNumberNext

#define OffsetNumberNext(offsetNumber)

Definition: off.h:52

OffsetNumber

uint16 OffsetNumber

Definition: off.h:24

FirstOffsetNumber

#define FirstOffsetNumber

Definition: off.h:27

MemoryContextSwitchTo

static MemoryContext MemoryContextSwitchTo(MemoryContext context)

Definition: palloc.h:124

pg_collation.h

INDEX_MAX_KEYS

#define INDEX_MAX_KEYS

Definition: pg_config_manual.h:69

len

const void size_t len

Definition: pg_crc32c_sse42.c:28

data

const void * data

Definition: pg_crc32c_sse42.c:27

list_length

static int list_length(const List *l)

Definition: pg_list.h:152

NIL

#define NIL

Definition: pg_list.h:68

list_make1

#define list_make1(x1)

Definition: pg_list.h:212

list_nth

static void * list_nth(const List *list, int n)

Definition: pg_list.h:299

linitial

#define linitial(l)

Definition: pg_list.h:178

lsecond

#define lsecond(l)

Definition: pg_list.h:183

buf

static char * buf

Definition: pg_test_fsync.c:72

postgres.h

Datum

uint64_t Datum

Definition: postgres.h:70

InvalidOid

#define InvalidOid

Definition: postgres_ext.h:37

PredicateLockPageSplit

void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)

Definition: predicate.c:3144

CheckForSerializableConflictIn

void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)

Definition: predicate.c:4336

predicate.h

rel.h

RelationGetRelationName

#define RelationGetRelationName(relation)

Definition: rel.h:548

RelationNeedsWAL

#define RelationNeedsWAL(relation)

Definition: rel.h:637

IndexRelationGetNumberOfKeyAttributes

#define IndexRelationGetNumberOfKeyAttributes(relation)

Definition: rel.h:533

INIT_FORKNUM

@ INIT_FORKNUM

Definition: relpath.h:61

gistcostestimate

void gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)

Definition: selfuncs.c:7791

check_stack_depth

void check_stack_depth(void)

Definition: stack_depth.c:95

FmgrInfo::fn_oid

Oid fn_oid

Definition: fmgr.h:59

GISTInsertStack

Definition: gist_private.h:208

GISTInsertStack::buffer

Buffer buffer

Definition: gist_private.h:211

GISTInsertStack::blkno

BlockNumber blkno

Definition: gist_private.h:210

GISTInsertStack::page

Page page

Definition: gist_private.h:212

GISTInsertStack::downlinkoffnum

OffsetNumber downlinkoffnum

Definition: gist_private.h:228

GISTInsertStack::lsn

GistNSN lsn

Definition: gist_private.h:218

GISTInsertStack::parent

struct GISTInsertStack * parent

Definition: gist_private.h:231

GISTInsertStack::retry_from_parent

bool retry_from_parent

Definition: gist_private.h:225

GISTInsertState

Definition: gist_private.h:252

GISTPageSplitInfo

Definition: gist_private.h:420

GISTPageSplitInfo::downlink

IndexTuple downlink

Definition: gist_private.h:422

GISTPageSplitInfo::buf

Buffer buf

Definition: gist_private.h:421

GISTSTATE

Definition: gist_private.h:76

GISTSTATE::fetchFn

FmgrInfo fetchFn[INDEX_MAX_KEYS]

Definition: gist_private.h:94

GISTSTATE::leafTupdesc

TupleDesc leafTupdesc

Definition: gist_private.h:80

GISTSTATE::nonLeafTupdesc

TupleDesc nonLeafTupdesc

Definition: gist_private.h:81

GISTSTATE::penaltyFn

FmgrInfo penaltyFn[INDEX_MAX_KEYS]

Definition: gist_private.h:90

GISTSTATE::tempCxt

MemoryContext tempCxt

Definition: gist_private.h:78

GISTSTATE::supportCollation

Oid supportCollation[INDEX_MAX_KEYS]

Definition: gist_private.h:97

GISTSTATE::distanceFn

FmgrInfo distanceFn[INDEX_MAX_KEYS]

Definition: gist_private.h:93

GISTSTATE::consistentFn

FmgrInfo consistentFn[INDEX_MAX_KEYS]

Definition: gist_private.h:86

GISTSTATE::scanCxt

MemoryContext scanCxt

Definition: gist_private.h:77

GISTSTATE::decompressFn

FmgrInfo decompressFn[INDEX_MAX_KEYS]

Definition: gist_private.h:89

GISTSTATE::compressFn

FmgrInfo compressFn[INDEX_MAX_KEYS]

Definition: gist_private.h:88

GISTSTATE::equalFn

FmgrInfo equalFn[INDEX_MAX_KEYS]

Definition: gist_private.h:92

GISTSTATE::unionFn

FmgrInfo unionFn[INDEX_MAX_KEYS]

Definition: gist_private.h:87

GISTSTATE::picksplitFn

FmgrInfo picksplitFn[INDEX_MAX_KEYS]

Definition: gist_private.h:91

GIST_SPLITVEC::spl_nleft

int spl_nleft

Definition: gist.h:144

GIST_SPLITVEC::spl_right

OffsetNumber * spl_right

Definition: gist.h:148

GIST_SPLITVEC::spl_nright

int spl_nright

Definition: gist.h:149

GIST_SPLITVEC::spl_left

OffsetNumber * spl_left

Definition: gist.h:143

GistSplitVector

Definition: gist_private.h:236

GistSplitVector::splitVector

GIST_SPLITVEC splitVector

Definition: gist_private.h:237

GistSplitVector::spl_lattr

Datum spl_lattr[INDEX_MAX_KEYS]

Definition: gist_private.h:239

GistSplitVector::spl_lisnull

bool spl_lisnull[INDEX_MAX_KEYS]

Definition: gist_private.h:241

GistSplitVector::spl_rattr

Datum spl_rattr[INDEX_MAX_KEYS]

Definition: gist_private.h:243

GistSplitVector::spl_risnull

bool spl_risnull[INDEX_MAX_KEYS]

Definition: gist_private.h:245

IndexAmRoutine

Definition: amapi.h:233

IndexAmRoutine::ambuildphasename

ambuildphasename_function ambuildphasename

Definition: amapi.h:306

IndexAmRoutine::ambuildempty

ambuildempty_function ambuildempty

Definition: amapi.h:296

IndexAmRoutine::amvacuumcleanup

amvacuumcleanup_function amvacuumcleanup

Definition: amapi.h:300

IndexAmRoutine::amclusterable

bool amclusterable

Definition: amapi.h:270

IndexAmRoutine::amoptions

amoptions_function amoptions

Definition: amapi.h:304

IndexAmRoutine::amestimateparallelscan

amestimateparallelscan_function amestimateparallelscan

Definition: amapi.h:318

IndexAmRoutine::amrestrpos

amrestrpos_function amrestrpos

Definition: amapi.h:315

IndexAmRoutine::aminsert

aminsert_function aminsert

Definition: amapi.h:297

IndexAmRoutine::amendscan

amendscan_function amendscan

Definition: amapi.h:313

IndexAmRoutine::amtranslatestrategy

amtranslate_strategy_function amtranslatestrategy

Definition: amapi.h:323

IndexAmRoutine::amoptsprocnum

uint16 amoptsprocnum

Definition: amapi.h:244

IndexAmRoutine::amparallelrescan

amparallelrescan_function amparallelrescan

Definition: amapi.h:320

IndexAmRoutine::amkeytype

Oid amkeytype

Definition: amapi.h:286

IndexAmRoutine::amconsistentordering

bool amconsistentordering

Definition: amapi.h:254

IndexAmRoutine::ampredlocks

bool ampredlocks

Definition: amapi.h:272

IndexAmRoutine::amsupport

uint16 amsupport

Definition: amapi.h:242

IndexAmRoutine::amtranslatecmptype

amtranslate_cmptype_function amtranslatecmptype

Definition: amapi.h:324

IndexAmRoutine::amcostestimate

amcostestimate_function amcostestimate

Definition: amapi.h:302

IndexAmRoutine::amcanorderbyop

bool amcanorderbyop

Definition: amapi.h:248

IndexAmRoutine::amadjustmembers

amadjustmembers_function amadjustmembers

Definition: amapi.h:308

IndexAmRoutine::ambuild

ambuild_function ambuild

Definition: amapi.h:295

IndexAmRoutine::amstorage

bool amstorage

Definition: amapi.h:268

IndexAmRoutine::amstrategies

uint16 amstrategies

Definition: amapi.h:240

IndexAmRoutine::amoptionalkey

bool amoptionalkey

Definition: amapi.h:262

IndexAmRoutine::amgettuple

amgettuple_function amgettuple

Definition: amapi.h:311

IndexAmRoutine::amcanreturn

amcanreturn_function amcanreturn

Definition: amapi.h:301

IndexAmRoutine::amcanunique

bool amcanunique

Definition: amapi.h:258

IndexAmRoutine::amgetbitmap

amgetbitmap_function amgetbitmap

Definition: amapi.h:312

IndexAmRoutine::amproperty

amproperty_function amproperty

Definition: amapi.h:305

IndexAmRoutine::ambulkdelete

ambulkdelete_function ambulkdelete

Definition: amapi.h:299

IndexAmRoutine::amsearcharray

bool amsearcharray

Definition: amapi.h:264

IndexAmRoutine::amsummarizing

bool amsummarizing

Definition: amapi.h:282

IndexAmRoutine::amvalidate

amvalidate_function amvalidate

Definition: amapi.h:307

IndexAmRoutine::ammarkpos

ammarkpos_function ammarkpos

Definition: amapi.h:314

IndexAmRoutine::amcanmulticol

bool amcanmulticol

Definition: amapi.h:260

IndexAmRoutine::amusemaintenanceworkmem

bool amusemaintenanceworkmem

Definition: amapi.h:280

IndexAmRoutine::ambeginscan

ambeginscan_function ambeginscan

Definition: amapi.h:309

IndexAmRoutine::amcanparallel

bool amcanparallel

Definition: amapi.h:274

IndexAmRoutine::amrescan

amrescan_function amrescan

Definition: amapi.h:310

IndexAmRoutine::amcanorder

bool amcanorder

Definition: amapi.h:246

IndexAmRoutine::amcanbuildparallel

bool amcanbuildparallel

Definition: amapi.h:276

IndexAmRoutine::aminitparallelscan

aminitparallelscan_function aminitparallelscan

Definition: amapi.h:319

IndexAmRoutine::amparallelvacuumoptions

uint8 amparallelvacuumoptions

Definition: amapi.h:284

IndexAmRoutine::aminsertcleanup

aminsertcleanup_function aminsertcleanup

Definition: amapi.h:298

IndexAmRoutine::amcanbackward

bool amcanbackward

Definition: amapi.h:256

IndexAmRoutine::amgettreeheight

amgettreeheight_function amgettreeheight

Definition: amapi.h:303

IndexAmRoutine::amcaninclude

bool amcaninclude

Definition: amapi.h:278

IndexAmRoutine::amsearchnulls

bool amsearchnulls

Definition: amapi.h:266

IndexAmRoutine::amconsistentequality

bool amconsistentequality

Definition: amapi.h:252

IndexAmRoutine::amcanhash

bool amcanhash

Definition: amapi.h:250

IndexInfo

Definition: execnodes.h:163

IndexInfo::ii_AmCache

void * ii_AmCache

Definition: execnodes.h:223

IndexInfo::ii_Context

MemoryContext ii_Context

Definition: execnodes.h:226

IndexTupleData

Definition: itup.h:36

IndexTupleData::t_tid

ItemPointerData t_tid

Definition: itup.h:37

ItemIdData

Definition: itemid.h:26

ItemPointerData

Definition: itemptr.h:37

List

Definition: pg_list.h:54

MemoryContextData

Definition: memnodes.h:118

RelationData

Definition: rel.h:56

SplitPageLayout

Definition: gist_private.h:192

SplitPageLayout::page

Page page

Definition: gist_private.h:197

SplitPageLayout::lenlist

int lenlist

Definition: gist_private.h:195

SplitPageLayout::block

gistxlogPage block

Definition: gist_private.h:193

SplitPageLayout::next

struct SplitPageLayout * next

Definition: gist_private.h:200

SplitPageLayout::buffer

Buffer buffer

Definition: gist_private.h:198

SplitPageLayout::itup

IndexTuple itup

Definition: gist_private.h:196

SplitPageLayout::list

IndexTupleData * list

Definition: gist_private.h:194

TupleDescData::natts

int natts

Definition: tupdesc.h:137

gistxlogPage::num

int num

Definition: gist_private.h:187

gistxlogPage::blkno

BlockNumber blkno

Definition: gist_private.h:186

index

Definition: type.h:96

state

Definition: regguts.h:323

InvalidTransactionId

#define InvalidTransactionId

Definition: transam.h:31

CreateTupleDescTruncatedCopy

TupleDesc CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts)

Definition: tupdesc.c:296

vacuum.h

VACUUM_OPTION_PARALLEL_BULKDEL

#define VACUUM_OPTION_PARALLEL_BULKDEL

Definition: vacuum.h:48

VACUUM_OPTION_PARALLEL_COND_CLEANUP

#define VACUUM_OPTION_PARALLEL_COND_CLEANUP

Definition: vacuum.h:55

XLogStandbyInfoActive

#define XLogStandbyInfoActive()

Definition: xlog.h:123

XLogRecPtrIsInvalid

#define XLogRecPtrIsInvalid(r)

Definition: xlogdefs.h:29

XLogRecPtr

uint64 XLogRecPtr

Definition: xlogdefs.h:21

log_newpage_buffer

XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

Definition: xloginsert.c:1249

XLogEnsureRecordSpace

void XLogEnsureRecordSpace(int max_block_id, int ndatas)

Definition: xloginsert.c:175

xloginsert.h

PostgreSQL Source Code: src/backend/access/gist/gist.c Source File