[フレーム]

gistbuild.c

1/*-------------------------------------------------------------------------

2 *

3 * gistbuild.c

4 * build algorithm for GiST indexes implementation.

5 *

6 * There are two different strategies:

7 *

8 * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted

9 * order, and create downlinks and internal pages as we go. This builds

10 * the index from the bottom up, similar to how B-tree index build

11 * works.

12 *

13 * 2. Start with an empty index, and insert all tuples one by one.

14 *

15 * The sorted method is used if the operator classes for all columns have

16 * a 'sortsupport' defined. Otherwise, we resort to the second strategy.

17 *

18 * The second strategy can optionally use buffers at different levels of

19 * the tree to reduce I/O, see "Buffering build algorithm" in the README

20 * for a more detailed explanation. It initially calls insert over and

21 * over, but switches to the buffered algorithm after a certain number of

22 * tuples (unless buffering mode is disabled).

23 *

24 *

27 *

28 * IDENTIFICATION

29 * src/backend/access/gist/gistbuild.c

30 *

31 *-------------------------------------------------------------------------

32 */

33#include "postgres.h"

34

35#include <math.h>

36

37#include "access/genam.h"

38#include "access/gist_private.h"

39#include "access/tableam.h"

40#include "access/xloginsert.h"

41#include "miscadmin.h"

42#include "nodes/execnodes.h"

43#include "optimizer/optimizer.h"

44#include "storage/bufmgr.h"

45#include "storage/bulk_write.h"

46

47#include "utils/memutils.h"

48#include "utils/rel.h"

49#include "utils/tuplesort.h"

50

51/* Step of index tuples for check whether to switch to buffering build mode */

52 #define BUFFERING_MODE_SWITCH_CHECK_STEP 256

53

54/*

55 * Number of tuples to process in the slow way before switching to buffering

56 * mode, when buffering is explicitly turned on. Also, the number of tuples

57 * to process between readjusting the buffer size parameter, while in

58 * buffering mode.

59 */

60 #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096

61

62/*

63 * Strategy used to build the index. It can change between the

64 * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,

65 * that needs to be decided up-front and cannot be changed afterwards.

66 */

67 typedef enum

68{

69 GIST_SORTED_BUILD, /* bottom-up build by sorting */

70 GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to

71 * switch */

72 GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to

73 * buffering build mode if the index grows too

74 * big */

75 GIST_BUFFERING_STATS, /* gathering statistics of index tuple size

76 * before switching to the buffering build

77 * mode */

78 GIST_BUFFERING_ACTIVE, /* in buffering build mode */

79} GistBuildMode;

80

81/* Working state for gistbuild and its callback */

82 typedef struct

83{

84 Relation indexrel;

85 Relation heaprel;

86 GISTSTATE *giststate;

87

88 Size freespace; /* amount of free space to leave on pages */

89

90 GistBuildMode buildMode;

91

92 int64 indtuples; /* number of tuples indexed */

93

94 /*

95 * Extra data structures used during a buffering build. 'gfbb' contains

96 * information related to managing the build buffers. 'parentMap' is a

97 * lookup table of the parent of each internal page.

98 */

99 int64 indtuplesSize; /* total size of all indexed tuples */

100 GISTBuildBuffers *gfbb;

101 HTAB *parentMap;

102

103 /*

104 * Extra data structures used during a sorting build.

105 */

106 Tuplesortstate *sortstate; /* state data for tuplesort.c */

107

108 BlockNumber pages_allocated;

109

110 BulkWriteState *bulkstate;

111} GISTBuildState;

112

113 #define GIST_SORTED_BUILD_PAGE_NUM 4

114

115/*

116 * In sorted build, we use a stack of these structs, one for each level,

117 * to hold an in-memory buffer of last pages at the level.

118 *

119 * Sorting GiST build requires good linearization of the sort opclass. This is

120 * not always the case in multidimensional data. To tackle the anomalies, we

121 * buffer index tuples and apply picksplit that can be multidimension-aware.

122 */

123 typedef struct GistSortedBuildLevelState

124{

125 int current_page;

126 BlockNumber last_blkno;

127 struct GistSortedBuildLevelState *parent; /* Upper level, if any */

128 Page pages[GIST_SORTED_BUILD_PAGE_NUM];

129 } GistSortedBuildLevelState;

130

131/* prototypes for private functions */

132

133static void gistSortedBuildCallback(Relation index, ItemPointer tid,

134 Datum *values, bool *isnull,

135 bool tupleIsAlive, void *state);

136static void gist_indexsortbuild(GISTBuildState *state);

137static void gist_indexsortbuild_levelstate_add(GISTBuildState *state,

138 GistSortedBuildLevelState *levelstate,

139 IndexTuple itup);

140static void gist_indexsortbuild_levelstate_flush(GISTBuildState *state,

141 GistSortedBuildLevelState *levelstate);

142

143static void gistInitBuffering(GISTBuildState *buildstate);

144static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);

145static void gistBuildCallback(Relation index,

146 ItemPointer tid,

147 Datum *values,

148 bool *isnull,

149 bool tupleIsAlive,

150 void *state);

151static void gistBufferingBuildInsert(GISTBuildState *buildstate,

152 IndexTuple itup);

153static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,

154 BlockNumber startblkno, int startlevel);

155static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate,

156 Buffer buffer, int level,

157 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,

158 BlockNumber parentblk, OffsetNumber downlinkoffnum);

159static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate,

160 BlockNumber childblkno, int level,

161 BlockNumber *parentblkno,

162 OffsetNumber *downlinkoffnum);

163static void gistProcessEmptyingQueue(GISTBuildState *buildstate);

164static void gistEmptyAllBuffers(GISTBuildState *buildstate);

165static int gistGetMaxLevel(Relation index);

166

167static void gistInitParentMap(GISTBuildState *buildstate);

168static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,

169 BlockNumber parent);

170static void gistMemorizeAllDownlinks(GISTBuildState *buildstate,

171 Buffer parentbuf);

172static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);

173

174

175/*

176 * Main entry point to GiST index build.

177 */

178IndexBuildResult *

179 gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)

180{

181 IndexBuildResult *result;

182 double reltuples;

183 GISTBuildState buildstate;

184 MemoryContext oldcxt = CurrentMemoryContext;

185 int fillfactor;

186 Oid SortSupportFnOids[INDEX_MAX_KEYS];

187 GiSTOptions *options = (GiSTOptions *) index->rd_options;

188

189 /*

190 * We expect to be called exactly once for any index relation. If that's

191 * not the case, big trouble's what we have.

192 */

193 if (RelationGetNumberOfBlocks(index) != 0)

194 elog(ERROR, "index \"%s\" already contains data",

195 RelationGetRelationName(index));

196

197 buildstate.indexrel = index;

198 buildstate.heaprel = heap;

199 buildstate.sortstate = NULL;

200 buildstate.giststate = initGISTstate(index);

201

202 /*

203 * Create a temporary memory context that is reset once for each tuple

204 * processed. (Note: we don't bother to make this a child of the

205 * giststate's scanCxt, so we have to delete it separately at the end.)

206 */

207 buildstate.giststate->tempCxt = createTempGistContext();

208

209 /*

210 * Choose build strategy. First check whether the user specified to use

211 * buffering mode. (The use-case for that in the field is somewhat

212 * questionable perhaps, but it's important for testing purposes.)

213 */

214 if (options)

215 {

216 if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)

217 buildstate.buildMode = GIST_BUFFERING_STATS;

218 else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)

219 buildstate.buildMode = GIST_BUFFERING_DISABLED;

220 else /* must be "auto" */

221 buildstate.buildMode = GIST_BUFFERING_AUTO;

222 }

223 else

224 {

225 buildstate.buildMode = GIST_BUFFERING_AUTO;

226 }

227

228 /*

229 * Unless buffering mode was forced, see if we can use sorting instead.

230 */

231 if (buildstate.buildMode != GIST_BUFFERING_STATS)

232 {

233 bool hasallsortsupports = true;

234 int keyscount = IndexRelationGetNumberOfKeyAttributes(index);

235

236 for (int i = 0; i < keyscount; i++)

237 {

238 SortSupportFnOids[i] = index_getprocid(index, i + 1,

239 GIST_SORTSUPPORT_PROC);

240 if (!OidIsValid(SortSupportFnOids[i]))

241 {

242 hasallsortsupports = false;

243 break;

244 }

245 }

246 if (hasallsortsupports)

247 buildstate.buildMode = GIST_SORTED_BUILD;

248 }

249

250 /*

251 * Calculate target amount of free space to leave on pages.

252 */

253 fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;

254 buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;

255

256 /*

257 * Build the index using the chosen strategy.

258 */

259 buildstate.indtuples = 0;

260 buildstate.indtuplesSize = 0;

261

262 if (buildstate.buildMode == GIST_SORTED_BUILD)

263 {

264 /*

265 * Sort all data, build the index from bottom up.

266 */

267 buildstate.sortstate = tuplesort_begin_index_gist(heap,

268 index,

269 maintenance_work_mem,

270 NULL,

271 TUPLESORT_NONE);

272

273 /* Scan the table, adding all tuples to the tuplesort */

274 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,

275 gistSortedBuildCallback,

276 &buildstate, NULL);

277

278 /*

279 * Perform the sort and build index pages.

280 */

281 tuplesort_performsort(buildstate.sortstate);

282

283 gist_indexsortbuild(&buildstate);

284

285 tuplesort_end(buildstate.sortstate);

286 }

287 else

288 {

289 /*

290 * Initialize an empty index and insert all tuples, possibly using

291 * buffers on intermediate levels.

292 */

293 Buffer buffer;

294 Page page;

295

296 /* initialize the root page */

297 buffer = gistNewBuffer(index, heap);

298 Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);

299 page = BufferGetPage(buffer);

300

301 START_CRIT_SECTION();

302

303 GISTInitBuffer(buffer, F_LEAF);

304

305 MarkBufferDirty(buffer);

306 PageSetLSN(page, GistBuildLSN);

307

308 UnlockReleaseBuffer(buffer);

309

310 END_CRIT_SECTION();

311

312 /* Scan the table, inserting all the tuples to the index. */

313 reltuples = table_index_build_scan(heap, index, indexInfo, true, true,

314 gistBuildCallback,

315 &buildstate, NULL);

316

317 /*

318 * If buffering was used, flush out all the tuples that are still in

319 * the buffers.

320 */

321 if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)

322 {

323 elog(DEBUG1, "all tuples processed, emptying buffers");

324 gistEmptyAllBuffers(&buildstate);

325 gistFreeBuildBuffers(buildstate.gfbb);

326 }

327

328 /*

329 * We didn't write WAL records as we built the index, so if

330 * WAL-logging is required, write all pages to the WAL now.

331 */

332 if (RelationNeedsWAL(index))

333 {

334 log_newpage_range(index, MAIN_FORKNUM,

335 0, RelationGetNumberOfBlocks(index),

336 true);

337 }

338 }

339

340 /* okay, all heap tuples are indexed */

341 MemoryContextSwitchTo(oldcxt);

342 MemoryContextDelete(buildstate.giststate->tempCxt);

343

344 freeGISTstate(buildstate.giststate);

345

346 /*

347 * Return statistics

348 */

349 result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));

350

351 result->heap_tuples = reltuples;

352 result->index_tuples = (double) buildstate.indtuples;

353

354 return result;

355}

356

357/*-------------------------------------------------------------------------

358 * Routines for sorted build

359 *-------------------------------------------------------------------------

360 */

361

362/*

363 * Per-tuple callback for table_index_build_scan.

364 */

365static void

366 gistSortedBuildCallback(Relation index,

367 ItemPointer tid,

368 Datum *values,

369 bool *isnull,

370 bool tupleIsAlive,

371 void *state)

372{

373 GISTBuildState *buildstate = (GISTBuildState *) state;

374 MemoryContext oldCtx;

375 Datum compressed_values[INDEX_MAX_KEYS];

376

377 oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);

378

379 /* Form an index tuple and point it at the heap tuple */

380 gistCompressValues(buildstate->giststate, index,

381 values, isnull,

382 true, compressed_values);

383

384 tuplesort_putindextuplevalues(buildstate->sortstate,

385 buildstate->indexrel,

386 tid,

387 compressed_values, isnull);

388

389 MemoryContextSwitchTo(oldCtx);

390 MemoryContextReset(buildstate->giststate->tempCxt);

391

392 /* Update tuple count. */

393 buildstate->indtuples += 1;

394}

395

396/*

397 * Build GiST index from bottom up from pre-sorted tuples.

398 */

399static void

400 gist_indexsortbuild(GISTBuildState *state)

401{

402 IndexTuple itup;

403 GistSortedBuildLevelState *levelstate;

404 BulkWriteBuffer rootbuf;

405

406 /* Reserve block 0 for the root page */

407 state->pages_allocated = 1;

408

409 state->bulkstate = smgr_bulk_start_rel(state->indexrel, MAIN_FORKNUM);

410

411 /* Allocate a temporary buffer for the first leaf page batch. */

412 levelstate = palloc0(sizeof(GistSortedBuildLevelState));

413 levelstate->pages[0] = palloc(BLCKSZ);

414 levelstate->parent = NULL;

415 gistinitpage(levelstate->pages[0], F_LEAF);

416

417 /*

418 * Fill index pages with tuples in the sorted order.

419 */

420 while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)

421 {

422 gist_indexsortbuild_levelstate_add(state, levelstate, itup);

423 MemoryContextReset(state->giststate->tempCxt);

424 }

425

426 /*

427 * Write out the partially full non-root pages.

428 *

429 * Keep in mind that flush can build a new root. If number of pages is > 1

430 * then new root is required.

431 */

432 while (levelstate->parent != NULL || levelstate->current_page != 0)

433 {

434 GistSortedBuildLevelState *parent;

435

436 gist_indexsortbuild_levelstate_flush(state, levelstate);

437 parent = levelstate->parent;

438 for (int i = 0; i < GIST_SORTED_BUILD_PAGE_NUM; i++)

439 if (levelstate->pages[i])

440 pfree(levelstate->pages[i]);

441 pfree(levelstate);

442 levelstate = parent;

443 }

444

445 /* Write out the root */

446 PageSetLSN(levelstate->pages[0], GistBuildLSN);

447 rootbuf = smgr_bulk_get_buf(state->bulkstate);

448 memcpy(rootbuf, levelstate->pages[0], BLCKSZ);

449 smgr_bulk_write(state->bulkstate, GIST_ROOT_BLKNO, rootbuf, true);

450

451 pfree(levelstate);

452

453 smgr_bulk_finish(state->bulkstate);

454}

455

456/*

457 * Add tuple to a page. If the pages are full, write them out and re-initialize

458 * a new page first.

459 */

460static void

461 gist_indexsortbuild_levelstate_add(GISTBuildState *state,

462 GistSortedBuildLevelState *levelstate,

463 IndexTuple itup)

464{

465 Size sizeNeeded;

466

467 /* Check if tuple can be added to the current page */

468 sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData); /* fillfactor ignored */

469 if (PageGetFreeSpace(levelstate->pages[levelstate->current_page]) < sizeNeeded)

470 {

471 Page newPage;

472 Page old_page = levelstate->pages[levelstate->current_page];

473 uint16 old_page_flags = GistPageGetOpaque(old_page)->flags;

474

475 if (levelstate->current_page + 1 == GIST_SORTED_BUILD_PAGE_NUM)

476 {

477 gist_indexsortbuild_levelstate_flush(state, levelstate);

478 }

479 else

480 levelstate->current_page++;

481

482 if (levelstate->pages[levelstate->current_page] == NULL)

483 levelstate->pages[levelstate->current_page] = palloc0(BLCKSZ);

484

485 newPage = levelstate->pages[levelstate->current_page];

486 gistinitpage(newPage, old_page_flags);

487 }

488

489 gistfillbuffer(levelstate->pages[levelstate->current_page], &itup, 1, InvalidOffsetNumber);

490}

491

492static void

493 gist_indexsortbuild_levelstate_flush(GISTBuildState *state,

494 GistSortedBuildLevelState *levelstate)

495{

496 GistSortedBuildLevelState *parent;

497 BlockNumber blkno;

498 MemoryContext oldCtx;

499 IndexTuple union_tuple;

500 SplitPageLayout *dist;

501 IndexTuple *itvec;

502 int vect_len;

503 bool isleaf = GistPageIsLeaf(levelstate->pages[0]);

504

505 CHECK_FOR_INTERRUPTS();

506

507 oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);

508

509 /* Get index tuples from first page */

510 itvec = gistextractpage(levelstate->pages[0], &vect_len);

511 if (levelstate->current_page > 0)

512 {

513 /* Append tuples from each page */

514 for (int i = 1; i < levelstate->current_page + 1; i++)

515 {

516 int len_local;

517 IndexTuple *itvec_local = gistextractpage(levelstate->pages[i], &len_local);

518

519 itvec = gistjoinvector(itvec, &vect_len, itvec_local, len_local);

520 pfree(itvec_local);

521 }

522

523 /* Apply picksplit to list of all collected tuples */

524 dist = gistSplit(state->indexrel, levelstate->pages[0], itvec, vect_len, state->giststate);

525 }

526 else

527 {

528 /* Create split layout from single page */

529 dist = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout));

530 union_tuple = gistunion(state->indexrel, itvec, vect_len,

531 state->giststate);

532 dist->itup = union_tuple;

533 dist->list = gistfillitupvec(itvec, vect_len, &(dist->lenlist));

534 dist->block.num = vect_len;

535 }

536

537 MemoryContextSwitchTo(oldCtx);

538

539 /* Reset page counter */

540 levelstate->current_page = 0;

541

542 /* Create pages for all partitions in split result */

543 for (; dist != NULL; dist = dist->next)

544 {

545 char *data;

546 BulkWriteBuffer buf;

547 Page target;

548

549 /* check once per page */

550 CHECK_FOR_INTERRUPTS();

551

552 /* Create page and copy data */

553 data = (char *) (dist->list);

554 buf = smgr_bulk_get_buf(state->bulkstate);

555 target = (Page) buf;

556 gistinitpage(target, isleaf ? F_LEAF : 0);

557 for (int i = 0; i < dist->block.num; i++)

558 {

559 IndexTuple thistup = (IndexTuple) data;

560

561 if (PageAddItem(target, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)

562 elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->indexrel));

563

564 data += IndexTupleSize(thistup);

565 }

566 union_tuple = dist->itup;

567

568 /*

569 * Set the right link to point to the previous page. This is just for

570 * debugging purposes: GiST only follows the right link if a page is

571 * split concurrently to a scan, and that cannot happen during index

572 * build.

573 *

574 * It's a bit counterintuitive that we set the right link on the new

575 * page to point to the previous page, not the other way around. But

576 * GiST pages are not ordered like B-tree pages are, so as long as the

577 * right-links form a chain through all the pages at the same level,

578 * the order doesn't matter.

579 */

580 if (levelstate->last_blkno)

581 GistPageGetOpaque(target)->rightlink = levelstate->last_blkno;

582

583 /*

584 * The page is now complete. Assign a block number to it, and pass it

585 * to the bulk writer.

586 */

587 blkno = state->pages_allocated++;

588 PageSetLSN(target, GistBuildLSN);

589 smgr_bulk_write(state->bulkstate, blkno, buf, true);

590 ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);

591 levelstate->last_blkno = blkno;

592

593 /*

594 * Insert the downlink to the parent page. If this was the root,

595 * create a new page as the parent, which becomes the new root.

596 */

597 parent = levelstate->parent;

598 if (parent == NULL)

599 {

600 parent = palloc0(sizeof(GistSortedBuildLevelState));

601 parent->pages[0] = palloc(BLCKSZ);

602 parent->parent = NULL;

603 gistinitpage(parent->pages[0], 0);

604

605 levelstate->parent = parent;

606 }

607 gist_indexsortbuild_levelstate_add(state, parent, union_tuple);

608 }

609}

610

611

612/*-------------------------------------------------------------------------

613 * Routines for non-sorted build

614 *-------------------------------------------------------------------------

615 */

616

617/*

618 * Attempt to switch to buffering mode.

619 *

620 * If there is not enough memory for buffering build, sets bufferingMode

621 * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch

622 * anymore. Otherwise initializes the build buffers, and sets bufferingMode to

623 * GIST_BUFFERING_ACTIVE.

624 */

625static void

626 gistInitBuffering(GISTBuildState *buildstate)

627{

628 Relation index = buildstate->indexrel;

629 int pagesPerBuffer;

630 Size pageFreeSpace;

631 Size itupAvgSize,

632 itupMinSize;

633 double avgIndexTuplesPerPage,

634 maxIndexTuplesPerPage;

635 int i;

636 int levelStep;

637

638 /* Calc space of index page which is available for index tuples */

639 pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)

640 - sizeof(ItemIdData)

641 - buildstate->freespace;

642

643 /*

644 * Calculate average size of already inserted index tuples using gathered

645 * statistics.

646 */

647 itupAvgSize = (double) buildstate->indtuplesSize /

648 (double) buildstate->indtuples;

649

650 /*

651 * Calculate minimal possible size of index tuple by index metadata.

652 * Minimal possible size of varlena is VARHDRSZ.

653 *

654 * XXX: that's not actually true, as a short varlen can be just 2 bytes.

655 * And we should take padding into account here.

656 */

657 itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));

658 for (i = 0; i < index->rd_att->natts; i++)

659 {

660 CompactAttribute *attr = TupleDescCompactAttr(index->rd_att, i);

661

662 if (attr->attlen < 0)

663 itupMinSize += VARHDRSZ;

664 else

665 itupMinSize += attr->attlen;

666 }

667

668 /* Calculate average and maximal number of index tuples which fit to page */

669 avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;

670 maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;

671

672 /*

673 * We need to calculate two parameters for the buffering algorithm:

674 * levelStep and pagesPerBuffer.

675 *

676 * levelStep determines the size of subtree that we operate on, while

677 * emptying a buffer. A higher value is better, as you need fewer buffer

678 * emptying steps to build the index. However, if you set it too high, the

679 * subtree doesn't fit in cache anymore, and you quickly lose the benefit

680 * of the buffers.

681 *

682 * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is

683 * the number of tuples on page (ie. fanout), and M is the amount of

684 * internal memory available. Curiously, they doesn't explain *why* that

685 * setting is optimal. We calculate it by taking the highest levelStep so

686 * that a subtree still fits in cache. For a small B, our way of

687 * calculating levelStep is very close to Arge et al's formula. For a

688 * large B, our formula gives a value that is 2x higher.

689 *

690 * The average size (in pages) of a subtree of depth n can be calculated

691 * as a geometric series:

692 *

693 * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)

694 *

695 * where B is the average number of index tuples on page. The subtree is

696 * cached in the shared buffer cache and the OS cache, so we choose

697 * levelStep so that the subtree size is comfortably smaller than

698 * effective_cache_size, with a safety factor of 4.

699 *

700 * The estimate on the average number of index tuples on page is based on

701 * average tuple sizes observed before switching to buffered build, so the

702 * real subtree size can be somewhat larger. Also, it would selfish to

703 * gobble the whole cache for our index build. The safety factor of 4

704 * should account for those effects.

705 *

706 * The other limiting factor for setting levelStep is that while

707 * processing a subtree, we need to hold one page for each buffer at the

708 * next lower buffered level. The max. number of buffers needed for that

709 * is maxIndexTuplesPerPage^levelStep. This is very conservative, but

710 * hopefully maintenance_work_mem is set high enough that you're

711 * constrained by effective_cache_size rather than maintenance_work_mem.

712 *

713 * XXX: the buffer hash table consumes a fair amount of memory too per

714 * buffer, but that is not currently taken into account. That scales on

715 * the total number of buffers used, ie. the index size and on levelStep.

716 * Note that a higher levelStep *reduces* the amount of memory needed for

717 * the hash table.

718 */

719 levelStep = 1;

720 for (;;)

721 {

722 double subtreesize;

723 double maxlowestlevelpages;

724

725 /* size of an average subtree at this levelStep (in pages). */

726 subtreesize =

727 (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /

728 (1 - avgIndexTuplesPerPage);

729

730 /* max number of pages at the lowest level of a subtree */

731 maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);

732

733 /* subtree must fit in cache (with safety factor of 4) */

734 if (subtreesize > effective_cache_size / 4)

735 break;

736

737 /* each node in the lowest level of a subtree has one page in memory */

738 if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)

739 break;

740

741 /* Good, we can handle this levelStep. See if we can go one higher. */

742 levelStep++;

743 }

744

745 /*

746 * We just reached an unacceptable value of levelStep in previous loop.

747 * So, decrease levelStep to get last acceptable value.

748 */

749 levelStep--;

750

751 /*

752 * If there's not enough cache or maintenance_work_mem, fall back to plain

753 * inserts.

754 */

755 if (levelStep <= 0)

756 {

757 elog(DEBUG1, "failed to switch to buffered GiST build");

758 buildstate->buildMode = GIST_BUFFERING_DISABLED;

759 return;

760 }

761

762 /*

763 * The second parameter to set is pagesPerBuffer, which determines the

764 * size of each buffer. We adjust pagesPerBuffer also during the build,

765 * which is why this calculation is in a separate function.

766 */

767 pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);

768

769 /* Initialize GISTBuildBuffers with these parameters */

770 buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,

771 gistGetMaxLevel(index));

772

773 gistInitParentMap(buildstate);

774

775 buildstate->buildMode = GIST_BUFFERING_ACTIVE;

776

777 elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",

778 levelStep, pagesPerBuffer);

779}

780

781/*

782 * Calculate pagesPerBuffer parameter for the buffering algorithm.

783 *

784 * Buffer size is chosen so that assuming that tuples are distributed

785 * randomly, emptying half a buffer fills on average one page in every buffer

786 * at the next lower level.

787 */

788static int

789 calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)

790{

791 double pagesPerBuffer;

792 double avgIndexTuplesPerPage;

793 double itupAvgSize;

794 Size pageFreeSpace;

795

796 /* Calc space of index page which is available for index tuples */

797 pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)

798 - sizeof(ItemIdData)

799 - buildstate->freespace;

800

801 /*

802 * Calculate average size of already inserted index tuples using gathered

803 * statistics.

804 */

805 itupAvgSize = (double) buildstate->indtuplesSize /

806 (double) buildstate->indtuples;

807

808 avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;

809

810 /*

811 * Recalculate required size of buffers.

812 */

813 pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);

814

815 return (int) rint(pagesPerBuffer);

816}

817

818/*

819 * Per-tuple callback for table_index_build_scan.

820 */

821static void

822 gistBuildCallback(Relation index,

823 ItemPointer tid,

824 Datum *values,

825 bool *isnull,

826 bool tupleIsAlive,

827 void *state)

828{

829 GISTBuildState *buildstate = (GISTBuildState *) state;

830 IndexTuple itup;

831 MemoryContext oldCtx;

832

833 oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);

834

835 /* form an index tuple and point it at the heap tuple */

836 itup = gistFormTuple(buildstate->giststate, index,

837 values, isnull,

838 true);

839 itup->t_tid = *tid;

840

841 /* Update tuple count and total size. */

842 buildstate->indtuples += 1;

843 buildstate->indtuplesSize += IndexTupleSize(itup);

844

845 /*

846 * XXX In buffering builds, the tempCxt is also reset down inside

847 * gistProcessEmptyingQueue(). This is not great because it risks

848 * confusion and possible use of dangling pointers (for example, itup

849 * might be already freed when control returns here). It's generally

850 * better that a memory context be "owned" by only one function. However,

851 * currently this isn't causing issues so it doesn't seem worth the amount

852 * of refactoring that would be needed to avoid it.

853 */

854 if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)

855 {

856 /* We have buffers, so use them. */

857 gistBufferingBuildInsert(buildstate, itup);

858 }

859 else

860 {

861 /*

862 * There's no buffers (yet). Since we already have the index relation

863 * locked, we call gistdoinsert directly.

864 */

865 gistdoinsert(index, itup, buildstate->freespace,

866 buildstate->giststate, buildstate->heaprel, true);

867 }

868

869 MemoryContextSwitchTo(oldCtx);

870 MemoryContextReset(buildstate->giststate->tempCxt);

871

872 if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&

873 buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)

874 {

875 /* Adjust the target buffer size now */

876 buildstate->gfbb->pagesPerBuffer =

877 calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);

878 }

879

880 /*

881 * In 'auto' mode, check if the index has grown too large to fit in cache,

882 * and switch to buffering mode if it has.

883 *

884 * To avoid excessive calls to smgrnblocks(), only check this every

885 * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples.

886 *

887 * In 'stats' state, switch as soon as we have seen enough tuples to have

888 * some idea of the average tuple size.

889 */

890 if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&

891 buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&

892 effective_cache_size < smgrnblocks(RelationGetSmgr(index),

893 MAIN_FORKNUM)) ||

894 (buildstate->buildMode == GIST_BUFFERING_STATS &&

895 buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))

896 {

897 /*

898 * Index doesn't fit in effective cache anymore. Try to switch to

899 * buffering build mode.

900 */

901 gistInitBuffering(buildstate);

902 }

903}

904

905/*

906 * Insert function for buffering index build.

907 */

908static void

909 gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)

910{

911 /* Insert the tuple to buffers. */

912 gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);

913

914 /* If we filled up (half of a) buffer, process buffer emptying. */

915 gistProcessEmptyingQueue(buildstate);

916}

917

918/*

919 * Process an index tuple. Runs the tuple down the tree until we reach a leaf

920 * page or node buffer, and inserts the tuple there. Returns true if we have

921 * to stop buffer emptying process (because one of child buffers can't take

922 * index tuples anymore).

923 */

924static bool

925 gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,

926 BlockNumber startblkno, int startlevel)

927{

928 GISTSTATE *giststate = buildstate->giststate;

929 GISTBuildBuffers *gfbb = buildstate->gfbb;

930 Relation indexrel = buildstate->indexrel;

931 BlockNumber childblkno;

932 Buffer buffer;

933 bool result = false;

934 BlockNumber blkno;

935 int level;

936 OffsetNumber downlinkoffnum = InvalidOffsetNumber;

937 BlockNumber parentblkno = InvalidBlockNumber;

938

939 CHECK_FOR_INTERRUPTS();

940

941 /*

942 * Loop until we reach a leaf page (level == 0) or a level with buffers

943 * (not including the level we start at, because we would otherwise make

944 * no progress).

945 */

946 blkno = startblkno;

947 level = startlevel;

948 for (;;)

949 {

950 ItemId iid;

951 IndexTuple idxtuple,

952 newtup;

953 Page page;

954 OffsetNumber childoffnum;

955

956 /* Have we reached a level with buffers? */

957 if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)

958 break;

959

960 /* Have we reached a leaf page? */

961 if (level == 0)

962 break;

963

964 /*

965 * Nope. Descend down to the next level then. Choose a child to

966 * descend down to.

967 */

968

969 buffer = ReadBuffer(indexrel, blkno);

970 LockBuffer(buffer, GIST_EXCLUSIVE);

971

972 page = BufferGetPage(buffer);

973 childoffnum = gistchoose(indexrel, page, itup, giststate);

974 iid = PageGetItemId(page, childoffnum);

975 idxtuple = (IndexTuple) PageGetItem(page, iid);

976 childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

977

978 if (level > 1)

979 gistMemorizeParent(buildstate, childblkno, blkno);

980

981 /*

982 * Check that the key representing the target child node is consistent

983 * with the key we're inserting. Update it if it's not.

984 */

985 newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);

986 if (newtup)

987 {

988 blkno = gistbufferinginserttuples(buildstate,

989 buffer,

990 level,

991 &newtup,

992 1,

993 childoffnum,

994 InvalidBlockNumber,

995 InvalidOffsetNumber);

996 /* gistbufferinginserttuples() released the buffer */

997 }

998 else

999 UnlockReleaseBuffer(buffer);

1000

1001 /* Descend to the child */

1002 parentblkno = blkno;

1003 blkno = childblkno;

1004 downlinkoffnum = childoffnum;

1005 Assert(level > 0);

1006 level--;

1007 }

1008

1009 if (LEVEL_HAS_BUFFERS(level, gfbb))

1010 {

1011 /*

1012 * We've reached level with buffers. Place the index tuple to the

1013 * buffer, and add the buffer to the emptying queue if it overflows.

1014 */

1015 GISTNodeBuffer *childNodeBuffer;

1016

1017 /* Find the buffer or create a new one */

1018 childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);

1019

1020 /* Add index tuple to it */

1021 gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);

1022

1023 if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))

1024 result = true;

1025 }

1026 else

1027 {

1028 /*

1029 * We've reached a leaf page. Place the tuple here.

1030 */

1031 Assert(level == 0);

1032 buffer = ReadBuffer(indexrel, blkno);

1033 LockBuffer(buffer, GIST_EXCLUSIVE);

1034 gistbufferinginserttuples(buildstate, buffer, level,

1035 &itup, 1, InvalidOffsetNumber,

1036 parentblkno, downlinkoffnum);

1037 /* gistbufferinginserttuples() released the buffer */

1038 }

1039

1040 return result;

1041}

1042

1043/*

1044 * Insert tuples to a given page.

1045 *

1046 * This is analogous with gistinserttuples() in the regular insertion code.

1047 *

1048 * Returns the block number of the page where the (first) new or updated tuple

1049 * was inserted. Usually that's the original page, but might be a sibling page

1050 * if the original page was split.

1051 *

1052 * Caller should hold a lock on 'buffer' on entry. This function will unlock

1053 * and unpin it.

1054 */

1055static BlockNumber

1056 gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,

1057 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,

1058 BlockNumber parentblk, OffsetNumber downlinkoffnum)

1059{

1060 GISTBuildBuffers *gfbb = buildstate->gfbb;

1061 List *splitinfo;

1062 bool is_split;

1063 BlockNumber placed_to_blk = InvalidBlockNumber;

1064

1065 is_split = gistplacetopage(buildstate->indexrel,

1066 buildstate->freespace,

1067 buildstate->giststate,

1068 buffer,

1069 itup, ntup, oldoffnum, &placed_to_blk,

1070 InvalidBuffer,

1071 &splitinfo,

1072 false,

1073 buildstate->heaprel, true);

1074

1075 /*

1076 * If this is a root split, update the root path item kept in memory. This

1077 * ensures that all path stacks are always complete, including all parent

1078 * nodes up to the root. That simplifies the algorithm to re-find correct

1079 * parent.

1080 */

1081 if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)

1082 {

1083 Page page = BufferGetPage(buffer);

1084 OffsetNumber off;

1085 OffsetNumber maxoff;

1086

1087 Assert(level == gfbb->rootlevel);

1088 gfbb->rootlevel++;

1089

1090 elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);

1091

1092 /*

1093 * All the downlinks on the old root page are now on one of the child

1094 * pages. Visit all the new child pages to memorize the parents of the

1095 * grandchildren.

1096 */

1097 if (gfbb->rootlevel > 1)

1098 {

1099 maxoff = PageGetMaxOffsetNumber(page);

1100 for (off = FirstOffsetNumber; off <= maxoff; off++)

1101 {

1102 ItemId iid = PageGetItemId(page, off);

1103 IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);

1104 BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

1105 Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno);

1106

1107 LockBuffer(childbuf, GIST_SHARE);

1108 gistMemorizeAllDownlinks(buildstate, childbuf);

1109 UnlockReleaseBuffer(childbuf);

1110

1111 /*

1112 * Also remember that the parent of the new child page is the

1113 * root block.

1114 */

1115 gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);

1116 }

1117 }

1118 }

1119

1120 if (splitinfo)

1121 {

1122 /*

1123 * Insert the downlinks to the parent. This is analogous with

1124 * gistfinishsplit() in the regular insertion code, but the locking is

1125 * simpler, and we have to maintain the buffers on internal nodes and

1126 * the parent map.

1127 */

1128 IndexTuple *downlinks;

1129 int ndownlinks,

1130 i;

1131 Buffer parentBuffer;

1132 ListCell *lc;

1133

1134 /* Parent may have changed since we memorized this path. */

1135 parentBuffer =

1136 gistBufferingFindCorrectParent(buildstate,

1137 BufferGetBlockNumber(buffer),

1138 level,

1139 &parentblk,

1140 &downlinkoffnum);

1141

1142 /*

1143 * If there's a buffer associated with this page, that needs to be

1144 * split too. gistRelocateBuildBuffersOnSplit() will also adjust the

1145 * downlinks in 'splitinfo', to make sure they're consistent not only

1146 * with the tuples already on the pages, but also the tuples in the

1147 * buffers that will eventually be inserted to them.

1148 */

1149 gistRelocateBuildBuffersOnSplit(gfbb,

1150 buildstate->giststate,

1151 buildstate->indexrel,

1152 level,

1153 buffer, splitinfo);

1154

1155 /* Create an array of all the downlink tuples */

1156 ndownlinks = list_length(splitinfo);

1157 downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);

1158 i = 0;

1159 foreach(lc, splitinfo)

1160 {

1161 GISTPageSplitInfo *splitinfo = lfirst(lc);

1162

1163 /*

1164 * Remember the parent of each new child page in our parent map.

1165 * This assumes that the downlinks fit on the parent page. If the

1166 * parent page is split, too, when we recurse up to insert the

1167 * downlinks, the recursive gistbufferinginserttuples() call will

1168 * update the map again.

1169 */

1170 if (level > 0)

1171 gistMemorizeParent(buildstate,

1172 BufferGetBlockNumber(splitinfo->buf),

1173 BufferGetBlockNumber(parentBuffer));

1174

1175 /*

1176 * Also update the parent map for all the downlinks that got moved

1177 * to a different page. (actually this also loops through the

1178 * downlinks that stayed on the original page, but it does no

1179 * harm).

1180 */

1181 if (level > 1)

1182 gistMemorizeAllDownlinks(buildstate, splitinfo->buf);

1183

1184 /*

1185 * Since there's no concurrent access, we can release the lower

1186 * level buffers immediately. This includes the original page.

1187 */

1188 UnlockReleaseBuffer(splitinfo->buf);

1189 downlinks[i++] = splitinfo->downlink;

1190 }

1191

1192 /* Insert them into parent. */

1193 gistbufferinginserttuples(buildstate, parentBuffer, level + 1,

1194 downlinks, ndownlinks, downlinkoffnum,

1195 InvalidBlockNumber, InvalidOffsetNumber);

1196

1197 list_free_deep(splitinfo); /* we don't need this anymore */

1198 }

1199 else

1200 UnlockReleaseBuffer(buffer);

1201

1202 return placed_to_blk;

1203}

1204

1205/*

1206 * Find the downlink pointing to a child page.

1207 *

1208 * 'childblkno' indicates the child page to find the parent for. 'level' is

1209 * the level of the child. On entry, *parentblkno and *downlinkoffnum can

1210 * point to a location where the downlink used to be - we will check that

1211 * location first, and save some cycles if it hasn't moved. The function

1212 * returns a buffer containing the downlink, exclusively-locked, and

1213 * *parentblkno and *downlinkoffnum are set to the real location of the

1214 * downlink.

1215 *

1216 * If the child page is a leaf (level == 0), the caller must supply a correct

1217 * parentblkno. Otherwise we use the parent map hash table to find the parent

1218 * block.

1219 *

1220 * This function serves the same purpose as gistFindCorrectParent() during

1221 * normal index inserts, but this is simpler because we don't need to deal

1222 * with concurrent inserts.

1223 */

1224static Buffer

1225 gistBufferingFindCorrectParent(GISTBuildState *buildstate,

1226 BlockNumber childblkno, int level,

1227 BlockNumber *parentblkno,

1228 OffsetNumber *downlinkoffnum)

1229{

1230 BlockNumber parent;

1231 Buffer buffer;

1232 Page page;

1233 OffsetNumber maxoff;

1234 OffsetNumber off;

1235

1236 if (level > 0)

1237 parent = gistGetParent(buildstate, childblkno);

1238 else

1239 {

1240 /*

1241 * For a leaf page, the caller must supply a correct parent block

1242 * number.

1243 */

1244 if (*parentblkno == InvalidBlockNumber)

1245 elog(ERROR, "no parent buffer provided of child %u", childblkno);

1246 parent = *parentblkno;

1247 }

1248

1249 buffer = ReadBuffer(buildstate->indexrel, parent);

1250 page = BufferGetPage(buffer);

1251 LockBuffer(buffer, GIST_EXCLUSIVE);

1252 gistcheckpage(buildstate->indexrel, buffer);

1253 maxoff = PageGetMaxOffsetNumber(page);

1254

1255 /* Check if it was not moved */

1256 if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&

1257 *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)

1258 {

1259 ItemId iid = PageGetItemId(page, *downlinkoffnum);

1260 IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);

1261

1262 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)

1263 {

1264 /* Still there */

1265 return buffer;

1266 }

1267 }

1268

1269 /*

1270 * Downlink was not at the offset where it used to be. Scan the page to

1271 * find it. During normal gist insertions, it might've moved to another

1272 * page, to the right, but during a buffering build, we keep track of the

1273 * parent of each page in the lookup table so we should always know what

1274 * page it's on.

1275 */

1276 for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))

1277 {

1278 ItemId iid = PageGetItemId(page, off);

1279 IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);

1280

1281 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)

1282 {

1283 /* yes!!, found it */

1284 *downlinkoffnum = off;

1285 return buffer;

1286 }

1287 }

1288

1289 elog(ERROR, "failed to re-find parent for block %u", childblkno);

1290 return InvalidBuffer; /* keep compiler quiet */

1291}

1292

1293/*

1294 * Process buffers emptying stack. Emptying of one buffer can cause emptying

1295 * of other buffers. This function iterates until this cascading emptying

1296 * process finished, e.g. until buffers emptying stack is empty.

1297 */

1298static void

1299 gistProcessEmptyingQueue(GISTBuildState *buildstate)

1300{

1301 GISTBuildBuffers *gfbb = buildstate->gfbb;

1302

1303 /* Iterate while we have elements in buffers emptying stack. */

1304 while (gfbb->bufferEmptyingQueue != NIL)

1305 {

1306 GISTNodeBuffer *emptyingNodeBuffer;

1307

1308 /* Get node buffer from emptying stack. */

1309 emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);

1310 gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);

1311 emptyingNodeBuffer->queuedForEmptying = false;

1312

1313 /*

1314 * We are going to load last pages of buffers where emptying will be

1315 * to. So let's unload any previously loaded buffers.

1316 */

1317 gistUnloadNodeBuffers(gfbb);

1318

1319 /*

1320 * Pop tuples from the buffer and run them down to the buffers at

1321 * lower level, or leaf pages. We continue until one of the lower

1322 * level buffers fills up, or this buffer runs empty.

1323 *

1324 * In Arge et al's paper, the buffer emptying is stopped after

1325 * processing 1/2 node buffer worth of tuples, to avoid overfilling

1326 * any of the lower level buffers. However, it's more efficient to

1327 * keep going until one of the lower level buffers actually fills up,

1328 * so that's what we do. This doesn't need to be exact, if a buffer

1329 * overfills by a few tuples, there's no harm done.

1330 */

1331 while (true)

1332 {

1333 IndexTuple itup;

1334

1335 /* Get next index tuple from the buffer */

1336 if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))

1337 break;

1338

1339 /*

1340 * Run it down to the underlying node buffer or leaf page.

1341 *

1342 * Note: it's possible that the buffer we're emptying splits as a

1343 * result of this call. If that happens, our emptyingNodeBuffer

1344 * points to the left half of the split. After split, it's very

1345 * likely that the new left buffer is no longer over the half-full

1346 * threshold, but we might as well keep flushing tuples from it

1347 * until we fill a lower-level buffer.

1348 */

1349 if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))

1350 {

1351 /*

1352 * A lower level buffer filled up. Stop emptying this buffer,

1353 * to avoid overflowing the lower level buffer.

1354 */

1355 break;

1356 }

1357

1358 /* Free all the memory allocated during index tuple processing */

1359 MemoryContextReset(buildstate->giststate->tempCxt);

1360 }

1361 }

1362}

1363

1364/*

1365 * Empty all node buffers, from top to bottom. This is done at the end of

1366 * index build to flush all remaining tuples to the index.

1367 *

1368 * Note: This destroys the buffersOnLevels lists, so the buffers should not

1369 * be inserted to after this call.

1370 */

1371static void

1372 gistEmptyAllBuffers(GISTBuildState *buildstate)

1373{

1374 GISTBuildBuffers *gfbb = buildstate->gfbb;

1375 MemoryContext oldCtx;

1376 int i;

1377

1378 oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);

1379

1380 /*

1381 * Iterate through the levels from top to bottom.

1382 */

1383 for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)

1384 {

1385 /*

1386 * Empty all buffers on this level. Note that new buffers can pop up

1387 * in the list during the processing, as a result of page splits, so a

1388 * simple walk through the list won't work. We remove buffers from the

1389 * list when we see them empty; a buffer can't become non-empty once

1390 * it's been fully emptied.

1391 */

1392 while (gfbb->buffersOnLevels[i] != NIL)

1393 {

1394 GISTNodeBuffer *nodeBuffer;

1395

1396 nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);

1397

1398 if (nodeBuffer->blocksCount != 0)

1399 {

1400 /*

1401 * Add this buffer to the emptying queue, and proceed to empty

1402 * the queue.

1403 */

1404 if (!nodeBuffer->queuedForEmptying)

1405 {

1406 MemoryContextSwitchTo(gfbb->context);

1407 nodeBuffer->queuedForEmptying = true;

1408 gfbb->bufferEmptyingQueue =

1409 lcons(nodeBuffer, gfbb->bufferEmptyingQueue);

1410 MemoryContextSwitchTo(buildstate->giststate->tempCxt);

1411 }

1412 gistProcessEmptyingQueue(buildstate);

1413 }

1414 else

1415 gfbb->buffersOnLevels[i] =

1416 list_delete_first(gfbb->buffersOnLevels[i]);

1417 }

1418 elog(DEBUG2, "emptied all buffers at level %d", i);

1419 }

1420 MemoryContextSwitchTo(oldCtx);

1421}

1422

1423/*

1424 * Get the depth of the GiST index.

1425 */

1426static int

1427 gistGetMaxLevel(Relation index)

1428{

1429 int maxLevel;

1430 BlockNumber blkno;

1431

1432 /*

1433 * Traverse down the tree, starting from the root, until we hit the leaf

1434 * level.

1435 */

1436 maxLevel = 0;

1437 blkno = GIST_ROOT_BLKNO;

1438 while (true)

1439 {

1440 Buffer buffer;

1441 Page page;

1442 IndexTuple itup;

1443

1444 buffer = ReadBuffer(index, blkno);

1445

1446 /*

1447 * There's no concurrent access during index build, so locking is just

1448 * pro forma.

1449 */

1450 LockBuffer(buffer, GIST_SHARE);

1451 page = BufferGetPage(buffer);

1452

1453 if (GistPageIsLeaf(page))

1454 {

1455 /* We hit the bottom, so we're done. */

1456 UnlockReleaseBuffer(buffer);

1457 break;

1458 }

1459

1460 /*

1461 * Pick the first downlink on the page, and follow it. It doesn't

1462 * matter which downlink we choose, the tree has the same depth

1463 * everywhere, so we just pick the first one.

1464 */

1465 itup = (IndexTuple) PageGetItem(page,

1466 PageGetItemId(page, FirstOffsetNumber));

1467 blkno = ItemPointerGetBlockNumber(&(itup->t_tid));

1468 UnlockReleaseBuffer(buffer);

1469

1470 /*

1471 * We're going down on the tree. It means that there is yet one more

1472 * level in the tree.

1473 */

1474 maxLevel++;

1475 }

1476 return maxLevel;

1477}

1478

1479

1480/*

1481 * Routines for managing the parent map.

1482 *

1483 * Whenever a page is split, we need to insert the downlinks into the parent.

1484 * We need to somehow find the parent page to do that. In normal insertions,

1485 * we keep a stack of nodes visited when we descend the tree. However, in

1486 * buffering build, we can start descending the tree from any internal node,

1487 * when we empty a buffer by cascading tuples to its children. So we don't

1488 * have a full stack up to the root available at that time.

1489 *

1490 * So instead, we maintain a hash table to track the parent of every internal

1491 * page. We don't need to track the parents of leaf nodes, however. Whenever

1492 * we insert to a leaf, we've just descended down from its parent, so we know

1493 * its immediate parent already. This helps a lot to limit the memory used

1494 * by this hash table.

1495 *

1496 * Whenever an internal node is split, the parent map needs to be updated.

1497 * the parent of the new child page needs to be recorded, and also the

1498 * entries for all page whose downlinks are moved to a new page at the split

1499 * needs to be updated.

1500 *

1501 * We also update the parent map whenever we descend the tree. That might seem

1502 * unnecessary, because we maintain the map whenever a downlink is moved or

1503 * created, but it is needed because we switch to buffering mode after

1504 * creating a tree with regular index inserts. Any pages created before

1505 * switching to buffering mode will not be present in the parent map initially,

1506 * but will be added there the first time we visit them.

1507 */

1508

1509 typedef struct

1510{

1511 BlockNumber childblkno; /* hash key */

1512 BlockNumber parentblkno;

1513} ParentMapEntry;

1514

1515static void

1516 gistInitParentMap(GISTBuildState *buildstate)

1517{

1518 HASHCTL hashCtl;

1519

1520 hashCtl.keysize = sizeof(BlockNumber);

1521 hashCtl.entrysize = sizeof(ParentMapEntry);

1522 hashCtl.hcxt = CurrentMemoryContext;

1523 buildstate->parentMap = hash_create("gistbuild parent map",

1524 1024,

1525 &hashCtl,

1526 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);

1527}

1528

1529static void

1530 gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)

1531{

1532 ParentMapEntry *entry;

1533 bool found;

1534

1535 entry = (ParentMapEntry *) hash_search(buildstate->parentMap,

1536 &child,

1537 HASH_ENTER,

1538 &found);

1539 entry->parentblkno = parent;

1540}

1541

1542/*

1543 * Scan all downlinks on a page, and memorize their parent.

1544 */

1545static void

1546 gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)

1547{

1548 OffsetNumber maxoff;

1549 OffsetNumber off;

1550 BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);

1551 Page page = BufferGetPage(parentbuf);

1552

1553 Assert(!GistPageIsLeaf(page));

1554

1555 maxoff = PageGetMaxOffsetNumber(page);

1556 for (off = FirstOffsetNumber; off <= maxoff; off++)

1557 {

1558 ItemId iid = PageGetItemId(page, off);

1559 IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);

1560 BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));

1561

1562 gistMemorizeParent(buildstate, childblkno, parentblkno);

1563 }

1564}

1565

1566static BlockNumber

1567 gistGetParent(GISTBuildState *buildstate, BlockNumber child)

1568{

1569 ParentMapEntry *entry;

1570 bool found;

1571

1572 /* Find node buffer in hash table */

1573 entry = (ParentMapEntry *) hash_search(buildstate->parentMap,

1574 &child,

1575 HASH_FIND,

1576 &found);

1577 if (!found)

1578 elog(ERROR, "could not find parent of block %u in lookup table", child);

1579

1580 return entry->parentblkno;

1581}

BlockNumber

uint32 BlockNumber

Definition: block.h:31

InvalidBlockNumber

#define InvalidBlockNumber

Definition: block.h:33

values

static Datum values[MAXATTR]

Definition: bootstrap.c:153

Buffer

int Buffer

Definition: buf.h:23

InvalidBuffer

#define InvalidBuffer

Definition: buf.h:25

BufferGetBlockNumber

BlockNumber BufferGetBlockNumber(Buffer buffer)

Definition: bufmgr.c:4198

UnlockReleaseBuffer

void UnlockReleaseBuffer(Buffer buffer)

Definition: bufmgr.c:5355

MarkBufferDirty

void MarkBufferDirty(Buffer buffer)

Definition: bufmgr.c:2921

LockBuffer

void LockBuffer(Buffer buffer, int mode)

Definition: bufmgr.c:5572

ReadBuffer

Buffer ReadBuffer(Relation reln, BlockNumber blockNum)

Definition: bufmgr.c:758

bufmgr.h

RelationGetNumberOfBlocks

#define RelationGetNumberOfBlocks(reln)

Definition: bufmgr.h:283

BufferGetPage

static Page BufferGetPage(Buffer buffer)

Definition: bufmgr.h:417

PageGetFreeSpace

Size PageGetFreeSpace(const PageData *page)

Definition: bufpage.c:906

PageGetItem

static Item PageGetItem(const PageData *page, const ItemIdData *itemId)

Definition: bufpage.h:354

SizeOfPageHeaderData

#define SizeOfPageHeaderData

Definition: bufpage.h:217

PageGetItemId

static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)

Definition: bufpage.h:244

PageSetLSN

static void PageSetLSN(Page page, XLogRecPtr lsn)

Definition: bufpage.h:391

Page

PageData * Page

Definition: bufpage.h:82

PageAddItem

#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)

Definition: bufpage.h:472

PageGetMaxOffsetNumber

static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)

Definition: bufpage.h:372

smgr_bulk_start_rel

BulkWriteState * smgr_bulk_start_rel(Relation rel, ForkNumber forknum)

Definition: bulk_write.c:87

smgr_bulk_write

void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)

Definition: bulk_write.c:323

smgr_bulk_get_buf

BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)

Definition: bulk_write.c:347

smgr_bulk_finish

void smgr_bulk_finish(BulkWriteState *bulkstate)

Definition: bulk_write.c:130

bulk_write.h

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:810

VARHDRSZ

#define VARHDRSZ

Definition: c.h:697

int64

int64_t int64

Definition: c.h:535

uint16

uint16_t uint16

Definition: c.h:537

OidIsValid

#define OidIsValid(objectId)

Definition: c.h:774

Size

size_t Size

Definition: c.h:610

effective_cache_size

int effective_cache_size

Definition: costsize.c:139

hash_search

void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)

Definition: dynahash.c:952

hash_create

HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)

Definition: dynahash.c:358

DEBUG2

#define DEBUG2

Definition: elog.h:29

DEBUG1

#define DEBUG1

Definition: elog.h:30

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

execnodes.h

genam.h

gistSplit

SplitPageLayout * gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate)

Definition: gist.c:1450

initGISTstate

GISTSTATE * initGISTstate(Relation index)

Definition: gist.c:1537

gistdoinsert

void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate, Relation heapRel, bool is_build)

Definition: gist.c:639

gistplacetopage

bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer buffer, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, bool markfollowright, Relation heapRel, bool is_build)

Definition: gist.c:230

createTempGistContext

MemoryContext createTempGistContext(void)

Definition: gist.c:128

freeGISTstate

void freeGISTstate(GISTSTATE *giststate)

Definition: gist.c:1664

F_LEAF

#define F_LEAF

Definition: gist.h:49

GIST_SORTSUPPORT_PROC

#define GIST_SORTSUPPORT_PROC

Definition: gist.h:42

GISTPageOpaqueData

struct GISTPageOpaqueData GISTPageOpaqueData

GistPageIsLeaf

#define GistPageIsLeaf(page)

Definition: gist.h:170

GistPageGetOpaque

#define GistPageGetOpaque(page)

Definition: gist.h:168

GistBuildLSN

#define GistBuildLSN

Definition: gist.h:70

gist_private.h

LEVEL_HAS_BUFFERS

#define LEVEL_HAS_BUFFERS(nlevel, gfbb)

Definition: gist_private.h:319

GIST_OPTION_BUFFERING_OFF

@ GIST_OPTION_BUFFERING_OFF

Definition: gist_private.h:388

GIST_OPTION_BUFFERING_ON

@ GIST_OPTION_BUFFERING_ON

Definition: gist_private.h:387

GIST_ROOT_BLKNO

#define GIST_ROOT_BLKNO

Definition: gist_private.h:262

GIST_DEFAULT_FILLFACTOR

#define GIST_DEFAULT_FILLFACTOR

Definition: gist_private.h:480

BUFFER_OVERFLOWED

#define BUFFER_OVERFLOWED(nodeBuffer, gfbb)

Definition: gist_private.h:332

GIST_EXCLUSIVE

#define GIST_EXCLUSIVE

Definition: gist_private.h:43

GIST_SHARE

#define GIST_SHARE

Definition: gist_private.h:42

GistBuildMode

Definition: gistbuild.c:68

GIST_BUFFERING_AUTO

@ GIST_BUFFERING_AUTO

Definition: gistbuild.c:72

GIST_SORTED_BUILD

@ GIST_SORTED_BUILD

Definition: gistbuild.c:69

GIST_BUFFERING_ACTIVE

@ GIST_BUFFERING_ACTIVE

Definition: gistbuild.c:78

GIST_BUFFERING_STATS

@ GIST_BUFFERING_STATS

Definition: gistbuild.c:75

GIST_BUFFERING_DISABLED

@ GIST_BUFFERING_DISABLED

Definition: gistbuild.c:70

gistbufferinginserttuples

static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber parentblk, OffsetNumber downlinkoffnum)

Definition: gistbuild.c:1056

gistMemorizeAllDownlinks

static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)

Definition: gistbuild.c:1546

gistProcessItup

static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, BlockNumber startblkno, int startlevel)

Definition: gistbuild.c:925

gist_indexsortbuild_levelstate_flush

static void gist_indexsortbuild_levelstate_flush(GISTBuildState *state, GistSortedBuildLevelState *levelstate)

Definition: gistbuild.c:493

BUFFERING_MODE_SWITCH_CHECK_STEP

#define BUFFERING_MODE_SWITCH_CHECK_STEP

Definition: gistbuild.c:52

gistBufferingFindCorrectParent

static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate, BlockNumber childblkno, int level, BlockNumber *parentblkno, OffsetNumber *downlinkoffnum)

Definition: gistbuild.c:1225

calculatePagesPerBuffer

static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)

Definition: gistbuild.c:789

gist_indexsortbuild_levelstate_add

static void gist_indexsortbuild_levelstate_add(GISTBuildState *state, GistSortedBuildLevelState *levelstate, IndexTuple itup)

Definition: gistbuild.c:461

gist_indexsortbuild

static void gist_indexsortbuild(GISTBuildState *state)

Definition: gistbuild.c:400

gistInitParentMap

static void gistInitParentMap(GISTBuildState *buildstate)

Definition: gistbuild.c:1516

gistBuildCallback

static void gistBuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)

Definition: gistbuild.c:822

gistbuild

IndexBuildResult * gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)

Definition: gistbuild.c:179

gistSortedBuildCallback

static void gistSortedBuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)

Definition: gistbuild.c:366

gistMemorizeParent

static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)

Definition: gistbuild.c:1530

GIST_SORTED_BUILD_PAGE_NUM

#define GIST_SORTED_BUILD_PAGE_NUM

Definition: gistbuild.c:113

gistEmptyAllBuffers

static void gistEmptyAllBuffers(GISTBuildState *buildstate)

Definition: gistbuild.c:1372

BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET

#define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET

Definition: gistbuild.c:60

gistInitBuffering

static void gistInitBuffering(GISTBuildState *buildstate)

Definition: gistbuild.c:626

gistBufferingBuildInsert

static void gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)

Definition: gistbuild.c:909

gistGetParent

static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child)

Definition: gistbuild.c:1567

gistGetMaxLevel

static int gistGetMaxLevel(Relation index)

Definition: gistbuild.c:1427

GistSortedBuildLevelState

struct GistSortedBuildLevelState GistSortedBuildLevelState

gistProcessEmptyingQueue

static void gistProcessEmptyingQueue(GISTBuildState *buildstate)

Definition: gistbuild.c:1299

gistPushItupToNodeBuffer

void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, IndexTuple itup)

Definition: gistbuildbuffers.c:336

gistRelocateBuildBuffersOnSplit

void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, Relation r, int level, Buffer buffer, List *splitinfo)

Definition: gistbuildbuffers.c:533

gistInitBuildBuffers

GISTBuildBuffers * gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)

Definition: gistbuildbuffers.c:44

gistFreeBuildBuffers

void gistFreeBuildBuffers(GISTBuildBuffers *gfbb)

Definition: gistbuildbuffers.c:507

gistPopItupFromNodeBuffer

bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, IndexTuple *itup)

Definition: gistbuildbuffers.c:406

gistGetNodeBuffer

GISTNodeBuffer * gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate, BlockNumber nodeBlocknum, int level)

Definition: gistbuildbuffers.c:113

gistUnloadNodeBuffers

void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)

Definition: gistbuildbuffers.c:272

gistNewBuffer

Buffer gistNewBuffer(Relation r, Relation heaprel)

Definition: gistutil.c:824

gistfillbuffer

void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)

Definition: gistutil.c:34

gistFormTuple

IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)

Definition: gistutil.c:575

gistextractpage

IndexTuple * gistextractpage(Page page, int *len)

Definition: gistutil.c:95

gistgetadjusted

IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)

Definition: gistutil.c:316

gistchoose

OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)

Definition: gistutil.c:374

gistCompressValues

void gistCompressValues(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)

Definition: gistutil.c:596

gistjoinvector

IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)

Definition: gistutil.c:114

gistinitpage

void gistinitpage(Page page, uint32 f)

Definition: gistutil.c:757

gistunion

IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)

Definition: gistutil.c:219

GISTInitBuffer

void GISTInitBuffer(Buffer b, uint32 f)

Definition: gistutil.c:773

gistcheckpage

void gistcheckpage(Relation rel, Buffer buf)

Definition: gistutil.c:785

gistfillitupvec

IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)

Definition: gistutil.c:127

maintenance_work_mem

int maintenance_work_mem

Definition: globals.c:133

Assert

Assert(PointerIsAligned(start, uint64))

HASH_FIND

@ HASH_FIND

Definition: hsearch.h:113

HASH_ENTER

@ HASH_ENTER

Definition: hsearch.h:114

HASH_CONTEXT

#define HASH_CONTEXT

Definition: hsearch.h:102

HASH_ELEM

#define HASH_ELEM

Definition: hsearch.h:95

HASH_BLOBS

#define HASH_BLOBS

Definition: hsearch.h:97

index_getprocid

RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)

Definition: indexam.c:883

i

int i

Definition: isn.c:77

if

if(TABLE==NULL||TABLE_index==NULL)

Definition: isn.c:81

Item

Pointer Item

Definition: item.h:17

ItemIdData

struct ItemIdData ItemIdData

ItemPointerSetBlockNumber

static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)

Definition: itemptr.h:147

ItemPointerGetBlockNumber

static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)

Definition: itemptr.h:103

IndexTuple

IndexTupleData * IndexTuple

Definition: itup.h:53

IndexTupleSize

static Size IndexTupleSize(const IndexTupleData *itup)

Definition: itup.h:71

list_delete_first

List * list_delete_first(List *list)

Definition: list.c:943

lcons

List * lcons(void *datum, List *list)

Definition: list.c:495

list_free_deep

void list_free_deep(List *list)

Definition: list.c:1560

MemoryContextReset

void MemoryContextReset(MemoryContext context)

Definition: mcxt.c:400

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

palloc0

void * palloc0(Size size)

Definition: mcxt.c:1395

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

CurrentMemoryContext

MemoryContext CurrentMemoryContext

Definition: mcxt.c:160

MemoryContextDelete

void MemoryContextDelete(MemoryContext context)

Definition: mcxt.c:469

memutils.h

miscadmin.h

START_CRIT_SECTION

#define START_CRIT_SECTION()

Definition: miscadmin.h:149

CHECK_FOR_INTERRUPTS

#define CHECK_FOR_INTERRUPTS()

Definition: miscadmin.h:122

END_CRIT_SECTION

#define END_CRIT_SECTION()

Definition: miscadmin.h:151

InvalidOffsetNumber

#define InvalidOffsetNumber

Definition: off.h:26

OffsetNumberNext

#define OffsetNumberNext(offsetNumber)

Definition: off.h:52

OffsetNumber

uint16 OffsetNumber

Definition: off.h:24

FirstOffsetNumber

#define FirstOffsetNumber

Definition: off.h:27

optimizer.h

MemoryContextSwitchTo

static MemoryContext MemoryContextSwitchTo(MemoryContext context)

Definition: palloc.h:124

INDEX_MAX_KEYS

#define INDEX_MAX_KEYS

Definition: pg_config_manual.h:69

data

const void * data

Definition: pg_crc32c_sse42.c:27

lfirst

#define lfirst(lc)

Definition: pg_list.h:172

list_length

static int list_length(const List *l)

Definition: pg_list.h:152

NIL

#define NIL

Definition: pg_list.h:68

linitial

#define linitial(l)

Definition: pg_list.h:178

buf

static char * buf

Definition: pg_test_fsync.c:72

fillfactor

static int fillfactor

Definition: pgbench.c:188

postgres.h

Datum

uint64_t Datum

Definition: postgres.h:70

Oid

unsigned int Oid

Definition: postgres_ext.h:32

rel.h

RelationGetSmgr

static SMgrRelation RelationGetSmgr(Relation rel)

Definition: rel.h:576

RelationGetRelationName

#define RelationGetRelationName(relation)

Definition: rel.h:548

RelationNeedsWAL

#define RelationNeedsWAL(relation)

Definition: rel.h:637

IndexRelationGetNumberOfKeyAttributes

#define IndexRelationGetNumberOfKeyAttributes(relation)

Definition: rel.h:533

MAIN_FORKNUM

@ MAIN_FORKNUM

Definition: relpath.h:58

smgrnblocks

BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)

Definition: smgr.c:819

BulkWriteState

Definition: bulk_write.c:62

CompactAttribute

Definition: tupdesc.h:69

CompactAttribute::attlen

int16 attlen

Definition: tupdesc.h:71

GISTBuildBuffers

Definition: gist_private.h:339

GISTBuildBuffers::levelStep

int levelStep

Definition: gist_private.h:364

GISTBuildBuffers::pagesPerBuffer

int pagesPerBuffer

Definition: gist_private.h:365

GISTBuildBuffers::bufferEmptyingQueue

List * bufferEmptyingQueue

Definition: gist_private.h:357

GISTBuildBuffers::buffersOnLevels

List ** buffersOnLevels

Definition: gist_private.h:368

GISTBuildBuffers::buffersOnLevelsLen

int buffersOnLevelsLen

Definition: gist_private.h:369

GISTBuildBuffers::rootlevel

int rootlevel

Definition: gist_private.h:380

GISTBuildBuffers::context

MemoryContext context

Definition: gist_private.h:341

GISTBuildState

Definition: gistbuild.c:83

GISTBuildState::gfbb

GISTBuildBuffers * gfbb

Definition: gistbuild.c:100

GISTBuildState::pages_allocated

BlockNumber pages_allocated

Definition: gistbuild.c:108

GISTBuildState::indexrel

Relation indexrel

Definition: gistbuild.c:84

GISTBuildState::giststate

GISTSTATE * giststate

Definition: gistbuild.c:86

GISTBuildState::parentMap

HTAB * parentMap

Definition: gistbuild.c:101

GISTBuildState::indtuples

int64 indtuples

Definition: gistbuild.c:92

GISTBuildState::bulkstate

BulkWriteState * bulkstate

Definition: gistbuild.c:110

GISTBuildState::freespace

Size freespace

Definition: gistbuild.c:88

GISTBuildState::heaprel

Relation heaprel

Definition: gistbuild.c:85

GISTBuildState::indtuplesSize

int64 indtuplesSize

Definition: gistbuild.c:99

GISTBuildState::sortstate

Tuplesortstate * sortstate

Definition: gistbuild.c:106

GISTBuildState::buildMode

GistBuildMode buildMode

Definition: gistbuild.c:90

GISTNodeBuffer

Definition: gist_private.h:299

GISTNodeBuffer::blocksCount

int32 blocksCount

Definition: gist_private.h:301

GISTNodeBuffer::level

int level

Definition: gist_private.h:312

GISTNodeBuffer::nodeBlocknum

BlockNumber nodeBlocknum

Definition: gist_private.h:300

GISTNodeBuffer::queuedForEmptying

bool queuedForEmptying

Definition: gist_private.h:307

GISTPageSplitInfo

Definition: gist_private.h:420

GISTPageSplitInfo::downlink

IndexTuple downlink

Definition: gist_private.h:422

GISTPageSplitInfo::buf

Buffer buf

Definition: gist_private.h:421

GISTSTATE

Definition: gist_private.h:76

GISTSTATE::tempCxt

MemoryContext tempCxt

Definition: gist_private.h:78

GiSTOptions

Definition: gist_private.h:395

GistSortedBuildLevelState

Definition: gistbuild.c:124

GistSortedBuildLevelState::current_page

int current_page

Definition: gistbuild.c:125

GistSortedBuildLevelState::last_blkno

BlockNumber last_blkno

Definition: gistbuild.c:126

GistSortedBuildLevelState::pages

Page pages[GIST_SORTED_BUILD_PAGE_NUM]

Definition: gistbuild.c:128

GistSortedBuildLevelState::parent

struct GistSortedBuildLevelState * parent

Definition: gistbuild.c:127

HASHCTL

Definition: hsearch.h:66

HASHCTL::keysize

Size keysize

Definition: hsearch.h:75

HASHCTL::entrysize

Size entrysize

Definition: hsearch.h:76

HASHCTL::hcxt

MemoryContext hcxt

Definition: hsearch.h:86

HTAB

Definition: dynahash.c:222

IndexBuildResult

Definition: genam.h:58

IndexBuildResult::heap_tuples

double heap_tuples

Definition: genam.h:59

IndexBuildResult::index_tuples

double index_tuples

Definition: genam.h:60

IndexInfo

Definition: execnodes.h:163

IndexTupleData

Definition: itup.h:36

IndexTupleData::t_tid

ItemPointerData t_tid

Definition: itup.h:37

ItemIdData

Definition: itemid.h:26

ItemPointerData

Definition: itemptr.h:37

List

Definition: pg_list.h:54

MemoryContextData

Definition: memnodes.h:118

ParentMapEntry

Definition: gistbuild.c:1510

ParentMapEntry::parentblkno

BlockNumber parentblkno

Definition: gistbuild.c:1512

ParentMapEntry::childblkno

BlockNumber childblkno

Definition: gistbuild.c:1511

RelationData

Definition: rel.h:56

SplitPageLayout

Definition: gist_private.h:192

SplitPageLayout::lenlist

int lenlist

Definition: gist_private.h:195

SplitPageLayout::block

gistxlogPage block

Definition: gist_private.h:193

SplitPageLayout::next

struct SplitPageLayout * next

Definition: gist_private.h:200

SplitPageLayout::itup

IndexTuple itup

Definition: gist_private.h:196

SplitPageLayout::list

IndexTupleData * list

Definition: gist_private.h:194

Tuplesortstate

Definition: tuplesort.c:186

gistxlogPage::num

int num

Definition: gist_private.h:187

index

Definition: type.h:96

options

Definition: oid2name.c:30

state

Definition: regguts.h:323

tableam.h

table_index_build_scan

static double table_index_build_scan(Relation table_rel, Relation index_rel, IndexInfo *index_info, bool allow_sync, bool progress, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)

Definition: tableam.h:1744

TupleDescCompactAttr

static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)

Definition: tupdesc.h:175

tuplesort_performsort

void tuplesort_performsort(Tuplesortstate *state)

Definition: tuplesort.c:1359

tuplesort_end

void tuplesort_end(Tuplesortstate *state)

Definition: tuplesort.c:947

tuplesort.h

TUPLESORT_NONE