[フレーム]

nbtsplitloc.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 *

3 * nbtsplitloc.c

4 * Choose split point code for Postgres btree implementation.

5 *

8 *

9 *

10 * IDENTIFICATION

11 * src/backend/access/nbtree/nbtsplitloc.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#include "postgres.h"

16

17#include "access/nbtree.h"

18#include "access/tableam.h"

19#include "common/int.h"

20

21 typedef enum

22{

23 /* strategy for searching through materialized list of split points */

24 SPLIT_DEFAULT, /* give some weight to truncation */

25 SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */

26 SPLIT_SINGLE_VALUE, /* leave left page almost full */

27} FindSplitStrat;

28

29 typedef struct

30{

31 /* details of free space left by split */

32 int16 curdelta; /* current leftfree/rightfree delta */

33 int16 leftfree; /* space left on left page post-split */

34 int16 rightfree; /* space left on right page post-split */

35

36 /* split point identifying fields (returned by _bt_findsplitloc) */

37 OffsetNumber firstrightoff; /* first origpage item on rightpage */

38 bool newitemonleft; /* new item goes on left, or right? */

39} SplitPoint;

40

41 typedef struct

42{

43 /* context data for _bt_recsplitloc */

44 Relation rel; /* index relation */

45 Page origpage; /* page undergoing split */

46 IndexTuple newitem; /* new item (cause of page split) */

47 Size newitemsz; /* size of newitem (includes line pointer) */

48 bool is_leaf; /* T if splitting a leaf page */

49 bool is_rightmost; /* T if splitting rightmost page on level */

50 OffsetNumber newitemoff; /* where the new item is to be inserted */

51 int leftspace; /* space available for items on left page */

52 int rightspace; /* space available for items on right page */

53 int olddataitemstotal; /* space taken by old items */

54 Size minfirstrightsz; /* smallest firstright size */

55

56 /* candidate split point data */

57 int maxsplits; /* maximum number of splits */

58 int nsplits; /* current number of splits */

59 SplitPoint *splits; /* all candidate split points for page */

60 int interval; /* current range of acceptable split points */

61} FindSplitData;

62

63static void _bt_recsplitloc(FindSplitData *state,

64 OffsetNumber firstrightoff, bool newitemonleft,

65 int olddataitemstoleft,

66 Size firstrightofforigpagetuplesz);

67static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,

68 bool usemult);

69static int _bt_splitcmp(const void *arg1, const void *arg2);

70static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,

71 int leaffillfactor, bool *usemult);

72static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);

73static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,

74 bool *newitemonleft, FindSplitStrat strategy);

75static int _bt_defaultinterval(FindSplitData *state);

76static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage,

77 SplitPoint *rightpage, FindSplitStrat *strategy);

78static void _bt_interval_edges(FindSplitData *state,

79 SplitPoint **leftinterval, SplitPoint **rightinterval);

80static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);

81static inline IndexTuple _bt_split_lastleft(FindSplitData *state,

82 SplitPoint *split);

83static inline IndexTuple _bt_split_firstright(FindSplitData *state,

84 SplitPoint *split);

85

86

87/*

88 * _bt_findsplitloc() -- find an appropriate place to split a page.

89 *

90 * The main goal here is to equalize the free space that will be on each

91 * split page, *after accounting for the inserted tuple*. (If we fail to

92 * account for it, we might find ourselves with too little room on the page

93 * that it needs to go into!)

94 *

95 * If the page is the rightmost page on its level, we instead try to arrange

96 * to leave the left split page fillfactor% full. In this way, when we are

97 * inserting successively increasing keys (consider sequences, timestamps,

98 * etc) we will end up with a tree whose pages are about fillfactor% full,

99 * instead of the 50% full result that we'd get without this special case.

100 * This is the same as nbtsort.c produces for a newly-created tree. Note

101 * that leaf and nonleaf pages use different fillfactors. Note also that

102 * there are a number of further special cases where fillfactor is not

103 * applied in the standard way.

104 *

105 * We are passed the intended insert position of the new tuple, expressed as

106 * the offsetnumber of the tuple it must go in front of (this could be

107 * maxoff+1 if the tuple is to go at the end). The new tuple itself is also

108 * passed, since it's needed to give some weight to how effective suffix

109 * truncation will be. The implementation picks the split point that

110 * maximizes the effectiveness of suffix truncation from a small list of

111 * alternative candidate split points that leave each side of the split with

112 * about the same share of free space. Suffix truncation is secondary to

113 * equalizing free space, except in cases with large numbers of duplicates.

114 * Note that it is always assumed that caller goes on to perform truncation,

115 * even with pg_upgrade'd indexes where that isn't actually the case

116 * (!heapkeyspace indexes). See nbtree/README for more information about

117 * suffix truncation.

118 *

119 * We return the index of the first existing tuple that should go on the

120 * righthand page (which is called firstrightoff), plus a boolean

121 * indicating whether the new tuple goes on the left or right page. You

122 * can think of the returned state as a point _between_ two adjacent data

123 * items (lastleft and firstright data items) on an imaginary version of

124 * origpage that already includes newitem. The bool is necessary to

125 * disambiguate the case where firstrightoff == newitemoff (i.e. it is

126 * sometimes needed to determine if the firstright tuple for the split is

127 * newitem rather than the tuple from origpage at offset firstrightoff).

128 */

129OffsetNumber

130 _bt_findsplitloc(Relation rel,

131 Page origpage,

132 OffsetNumber newitemoff,

133 Size newitemsz,

134 IndexTuple newitem,

135 bool *newitemonleft)

136{

137 BTPageOpaque opaque;

138 int leftspace,

139 rightspace,

140 olddataitemstotal,

141 olddataitemstoleft,

142 perfectpenalty,

143 leaffillfactor;

144 FindSplitData state;

145 FindSplitStrat strategy;

146 ItemId itemid;

147 OffsetNumber offnum,

148 maxoff,

149 firstrightoff;

150 double fillfactormult;

151 bool usemult;

152 SplitPoint leftpage,

153 rightpage;

154

155 opaque = BTPageGetOpaque(origpage);

156 maxoff = PageGetMaxOffsetNumber(origpage);

157

158 /* Total free space available on a btree page, after fixed overhead */

159 leftspace = rightspace =

160 PageGetPageSize(origpage) - SizeOfPageHeaderData -

161 MAXALIGN(sizeof(BTPageOpaqueData));

162

163 /* The right page will have the same high key as the old page */

164 if (!P_RIGHTMOST(opaque))

165 {

166 itemid = PageGetItemId(origpage, P_HIKEY);

167 rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +

168 sizeof(ItemIdData));

169 }

170

171 /* Count up total space in data items before actually scanning 'em */

172 olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);

173 leaffillfactor = BTGetFillFactor(rel);

174

175 /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */

176 newitemsz += sizeof(ItemIdData);

177 state.rel = rel;

178 state.origpage = origpage;

179 state.newitem = newitem;

180 state.newitemsz = newitemsz;

181 state.is_leaf = P_ISLEAF(opaque);

182 state.is_rightmost = P_RIGHTMOST(opaque);

183 state.leftspace = leftspace;

184 state.rightspace = rightspace;

185 state.olddataitemstotal = olddataitemstotal;

186 state.minfirstrightsz = SIZE_MAX;

187 state.newitemoff = newitemoff;

188

189 /* newitem cannot be a posting list item */

190 Assert(!BTreeTupleIsPosting(newitem));

191

192 /*

193 * nsplits should never exceed maxoff because there will be at most as

194 * many candidate split points as there are points _between_ tuples, once

195 * you imagine that the new item is already on the original page (the

196 * final number of splits may be slightly lower because not all points

197 * between tuples will be legal).

198 */

199 state.maxsplits = maxoff;

200 state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);

201 state.nsplits = 0;

202

203 /*

204 * Scan through the data items and calculate space usage for a split at

205 * each possible position

206 */

207 olddataitemstoleft = 0;

208

209 for (offnum = P_FIRSTDATAKEY(opaque);

210 offnum <= maxoff;

211 offnum = OffsetNumberNext(offnum))

212 {

213 Size itemsz;

214

215 itemid = PageGetItemId(origpage, offnum);

216 itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);

217

218 /*

219 * When item offset number is not newitemoff, neither side of the

220 * split can be newitem. Record a split after the previous data item

221 * from original page, but before the current data item from original

222 * page. (_bt_recsplitloc() will reject the split when there are no

223 * previous items, which we rely on.)

224 */

225 if (offnum < newitemoff)

226 _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);

227 else if (offnum > newitemoff)

228 _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);

229 else

230 {

231 /*

232 * Record a split after all "offnum < newitemoff" original page

233 * data items, but before newitem

234 */

235 _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);

236

237 /*

238 * Record a split after newitem, but before data item from

239 * original page at offset newitemoff/current offset

240 */

241 _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);

242 }

243

244 olddataitemstoleft += itemsz;

245 }

246

247 /*

248 * Record a split after all original page data items, but before newitem.

249 * (Though only when it's possible that newitem will end up alone on new

250 * right page.)

251 */

252 Assert(olddataitemstoleft == olddataitemstotal);

253 if (newitemoff > maxoff)

254 _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);

255

256 /*

257 * I believe it is not possible to fail to find a feasible split, but just

258 * in case ...

259 */

260 if (state.nsplits == 0)

261 elog(ERROR, "could not find a feasible split point for index \"%s\"",

262 RelationGetRelationName(rel));

263

264 /*

265 * Start search for a split point among list of legal split points. Give

266 * primary consideration to equalizing available free space in each half

267 * of the split initially (start with default strategy), while applying

268 * rightmost and split-after-new-item optimizations where appropriate.

269 * Either of the two other fallback strategies may be required for cases

270 * with a large number of duplicates around the original/space-optimal

271 * split point.

272 *

273 * Default strategy gives some weight to suffix truncation in deciding a

274 * split point on leaf pages. It attempts to select a split point where a

275 * distinguishing attribute appears earlier in the new high key for the

276 * left side of the split, in order to maximize the number of trailing

277 * attributes that can be truncated away. Only candidate split points

278 * that imply an acceptable balance of free space on each side are

279 * considered. See _bt_defaultinterval().

280 */

281 if (!state.is_leaf)

282 {

283 /* fillfactormult only used on rightmost page */

284 usemult = state.is_rightmost;

285 fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;

286 }

287 else if (state.is_rightmost)

288 {

289 /* Rightmost leaf page -- fillfactormult always used */

290 usemult = true;

291 fillfactormult = leaffillfactor / 100.0;

292 }

293 else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))

294 {

295 /*

296 * New item inserted at rightmost point among a localized grouping on

297 * a leaf page -- apply "split after new item" optimization, either by

298 * applying leaf fillfactor multiplier, or by choosing the exact split

299 * point that leaves newitem as lastleft. (usemult is set for us.)

300 */

301 if (usemult)

302 {

303 /* fillfactormult should be set based on leaf fillfactor */

304 fillfactormult = leaffillfactor / 100.0;

305 }

306 else

307 {

308 /* find precise split point after newitemoff */

309 for (int i = 0; i < state.nsplits; i++)

310 {

311 SplitPoint *split = state.splits + i;

312

313 if (split->newitemonleft &&

314 newitemoff == split->firstrightoff)

315 {

316 pfree(state.splits);

317 *newitemonleft = true;

318 return newitemoff;

319 }

320 }

321

322 /*

323 * Cannot legally split after newitemoff; proceed with split

324 * without using fillfactor multiplier. This is defensive, and

325 * should never be needed in practice.

326 */

327 fillfactormult = 0.50;

328 }

329 }

330 else

331 {

332 /* Other leaf page. 50:50 page split. */

333 usemult = false;

334 /* fillfactormult not used, but be tidy */

335 fillfactormult = 0.50;

336 }

337

338 /*

339 * Save leftmost and rightmost splits for page before original ordinal

340 * sort order is lost by delta/fillfactormult sort

341 */

342 leftpage = state.splits[0];

343 rightpage = state.splits[state.nsplits - 1];

344

345 /* Give split points a fillfactormult-wise delta, and sort on deltas */

346 _bt_deltasortsplits(&state, fillfactormult, usemult);

347

348 /* Determine split interval for default strategy */

349 state.interval = _bt_defaultinterval(&state);

350

351 /*

352 * Determine if default strategy/split interval will produce a

353 * sufficiently distinguishing split, or if we should change strategies.

354 * Alternative strategies change the range of split points that are

355 * considered acceptable (split interval), and possibly change

356 * fillfactormult, in order to deal with pages with a large number of

357 * duplicates gracefully.

358 *

359 * Pass low and high splits for the entire page (actually, they're for an

360 * imaginary version of the page that includes newitem). These are used

361 * when the initial split interval encloses split points that are full of

362 * duplicates, and we need to consider if it's even possible to avoid

363 * appending a heap TID.

364 */

365 perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);

366

367 if (strategy == SPLIT_DEFAULT)

368 {

369 /*

370 * Default strategy worked out (always works out with internal page).

371 * Original split interval still stands.

372 */

373 }

374

375 /*

376 * Many duplicates strategy is used when a heap TID would otherwise be

377 * appended, but the page isn't completely full of logical duplicates.

378 *

379 * The split interval is widened to include all legal candidate split

380 * points. There might be a few as two distinct values in the whole-page

381 * split interval, though it's also possible that most of the values on

382 * the page are unique. The final split point will either be to the

383 * immediate left or to the immediate right of the group of duplicate

384 * tuples that enclose the first/delta-optimal split point (perfect

385 * penalty was set so that the lowest delta split point that avoids

386 * appending a heap TID will be chosen). Maximizing the number of

387 * attributes that can be truncated away is not a goal of the many

388 * duplicates strategy.

389 *

390 * Single value strategy is used when it is impossible to avoid appending

391 * a heap TID. It arranges to leave the left page very full. This

392 * maximizes space utilization in cases where tuples with the same

393 * attribute values span many pages. Newly inserted duplicates will tend

394 * to have higher heap TID values, so we'll end up splitting to the right

395 * consistently. (Single value strategy is harmless though not

396 * particularly useful with !heapkeyspace indexes.)

397 */

398 else if (strategy == SPLIT_MANY_DUPLICATES)

399 {

400 Assert(state.is_leaf);

401 /* Shouldn't try to truncate away extra user attributes */

402 Assert(perfectpenalty ==

403 IndexRelationGetNumberOfKeyAttributes(state.rel));

404 /* No need to resort splits -- no change in fillfactormult/deltas */

405 state.interval = state.nsplits;

406 }

407 else if (strategy == SPLIT_SINGLE_VALUE)

408 {

409 Assert(state.is_leaf);

410 /* Split near the end of the page */

411 usemult = true;

412 fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;

413 /* Resort split points with new delta */

414 _bt_deltasortsplits(&state, fillfactormult, usemult);

415 /* Appending a heap TID is unavoidable, so interval of 1 is fine */

416 state.interval = 1;

417 }

418

419 /*

420 * Search among acceptable split points (using final split interval) for

421 * the entry that has the lowest penalty, and is therefore expected to

422 * maximize fan-out. Sets *newitemonleft for us.

423 */

424 firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,

425 strategy);

426 pfree(state.splits);

427

428 return firstrightoff;

429}

430

431/*

432 * Subroutine to record a particular point between two tuples (possibly the

433 * new item) on page (ie, combination of firstrightoff and newitemonleft

434 * settings) in *state for later analysis. This is also a convenient point to

435 * check if the split is legal (if it isn't, it won't be recorded).

436 *

437 * firstrightoff is the offset of the first item on the original page that

438 * goes to the right page, and firstrightofforigpagetuplesz is the size of

439 * that tuple. firstrightoff can be > max offset, which means that all the

440 * old items go to the left page and only the new item goes to the right page.

441 * We don't actually use firstrightofforigpagetuplesz in that case (actually,

442 * we don't use it for _any_ split where the firstright tuple happens to be

443 * newitem).

444 *

445 * olddataitemstoleft is the total size of all old items to the left of the

446 * split point that is recorded here when legal. Should not include

447 * newitemsz, since that is handled here.

448 */

449static void

450 _bt_recsplitloc(FindSplitData *state,

451 OffsetNumber firstrightoff,

452 bool newitemonleft,

453 int olddataitemstoleft,

454 Size firstrightofforigpagetuplesz)

455{

456 int16 leftfree,

457 rightfree;

458 Size firstrightsz;

459 Size postingsz = 0;

460 bool newitemisfirstright;

461

462 /* Is the new item going to be split point's firstright tuple? */

463 newitemisfirstright = (firstrightoff == state->newitemoff &&

464 !newitemonleft);

465

466 if (newitemisfirstright)

467 firstrightsz = state->newitemsz;

468 else

469 {

470 firstrightsz = firstrightofforigpagetuplesz;

471

472 /*

473 * Calculate suffix truncation space saving when firstright tuple is a

474 * posting list tuple, though only when the tuple is over 64 bytes

475 * including line pointer overhead (arbitrary). This avoids accessing

476 * the tuple in cases where its posting list must be very small (if

477 * tuple has one at all).

478 *

479 * Note: We don't do this in the case where firstright tuple is

480 * newitem, since newitem cannot have a posting list.

481 */

482 if (state->is_leaf && firstrightsz > 64)

483 {

484 ItemId itemid;

485 IndexTuple newhighkey;

486

487 itemid = PageGetItemId(state->origpage, firstrightoff);

488 newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);

489

490 if (BTreeTupleIsPosting(newhighkey))

491 postingsz = IndexTupleSize(newhighkey) -

492 BTreeTupleGetPostingOffset(newhighkey);

493 }

494 }

495

496 /* Account for all the old tuples */

497 leftfree = state->leftspace - olddataitemstoleft;

498 rightfree = state->rightspace -

499 (state->olddataitemstotal - olddataitemstoleft);

500

501 /*

502 * The first item on the right page becomes the high key of the left page;

503 * therefore it counts against left space as well as right space (we

504 * cannot assume that suffix truncation will make it any smaller). When

505 * index has included attributes, then those attributes of left page high

506 * key will be truncated leaving that page with slightly more free space.

507 * However, that shouldn't affect our ability to find valid split

508 * location, since we err in the direction of being pessimistic about free

509 * space on the left half. Besides, even when suffix truncation of

510 * non-TID attributes occurs, the new high key often won't even be a

511 * single MAXALIGN() quantum smaller than the firstright tuple it's based

512 * on.

513 *

514 * If we are on the leaf level, assume that suffix truncation cannot avoid

515 * adding a heap TID to the left half's new high key when splitting at the

516 * leaf level. In practice the new high key will often be smaller and

517 * will rarely be larger, but conservatively assume the worst case. We do

518 * go to the trouble of subtracting away posting list overhead, though

519 * only when it looks like it will make an appreciable difference.

520 * (Posting lists are the only case where truncation will typically make

521 * the final high key far smaller than firstright, so being a bit more

522 * precise there noticeably improves the balance of free space.)

523 */

524 if (state->is_leaf)

525 leftfree -= (int16) (firstrightsz +

526 MAXALIGN(sizeof(ItemPointerData)) -

527 postingsz);

528 else

529 leftfree -= (int16) firstrightsz;

530

531 /* account for the new item */

532 if (newitemonleft)

533 leftfree -= (int16) state->newitemsz;

534 else

535 rightfree -= (int16) state->newitemsz;

536

537 /*

538 * If we are not on the leaf level, we will be able to discard the key

539 * data from the first item that winds up on the right page.

540 */

541 if (!state->is_leaf)

542 rightfree += (int16) firstrightsz -

543 (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));

544

545 /* Record split if legal */

546 if (leftfree >= 0 && rightfree >= 0)

547 {

548 Assert(state->nsplits < state->maxsplits);

549

550 /* Determine smallest firstright tuple size among legal splits */

551 state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);

552

553 state->splits[state->nsplits].curdelta = 0;

554 state->splits[state->nsplits].leftfree = leftfree;

555 state->splits[state->nsplits].rightfree = rightfree;

556 state->splits[state->nsplits].firstrightoff = firstrightoff;

557 state->splits[state->nsplits].newitemonleft = newitemonleft;

558 state->nsplits++;

559 }

560}

561

562/*

563 * Subroutine to assign space deltas to materialized array of candidate split

564 * points based on current fillfactor, and to sort array using that fillfactor

565 */

566static void

567 _bt_deltasortsplits(FindSplitData *state, double fillfactormult,

568 bool usemult)

569{

570 for (int i = 0; i < state->nsplits; i++)

571 {

572 SplitPoint *split = state->splits + i;

573 int16 delta;

574

575 if (usemult)

576 delta = fillfactormult * split->leftfree -

577 (1.0 - fillfactormult) * split->rightfree;

578 else

579 delta = split->leftfree - split->rightfree;

580

581 if (delta < 0)

582 delta = -delta;

583

584 /* Save delta */

585 split->curdelta = delta;

586 }

587

588 qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);

589}

590

591/*

592 * qsort-style comparator used by _bt_deltasortsplits()

593 */

594static int

595 _bt_splitcmp(const void *arg1, const void *arg2)

596{

597 SplitPoint *split1 = (SplitPoint *) arg1;

598 SplitPoint *split2 = (SplitPoint *) arg2;

599

600 return pg_cmp_s16(split1->curdelta, split2->curdelta);

601}

602

603/*

604 * Subroutine to determine whether or not a non-rightmost leaf page should be

605 * split immediately after the would-be original page offset for the

606 * new/incoming tuple (or should have leaf fillfactor applied when new item is

607 * to the right on original page). This is appropriate when there is a

608 * pattern of localized monotonically increasing insertions into a composite

609 * index, where leading attribute values form local groupings, and we

610 * anticipate further insertions of the same/current grouping (new item's

611 * grouping) in the near future. This can be thought of as a variation on

612 * applying leaf fillfactor during rightmost leaf page splits, since cases

613 * that benefit will converge on packing leaf pages leaffillfactor% full over

614 * time.

615 *

616 * We may leave extra free space remaining on the rightmost page of a "most

617 * significant column" grouping of tuples if that grouping never ends up

618 * having future insertions that use the free space. That effect is

619 * self-limiting; a future grouping that becomes the "nearest on the right"

620 * grouping of the affected grouping usually puts the extra free space to good

621 * use.

622 *

623 * Caller uses optimization when routine returns true, though the exact action

624 * taken by caller varies. Caller uses original leaf page fillfactor in

625 * standard way rather than using the new item offset directly when *usemult

626 * was also set to true here. Otherwise, caller applies optimization by

627 * locating the legal split point that makes the new tuple the lastleft tuple

628 * for the split.

629 */

630static bool

631 _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,

632 int leaffillfactor, bool *usemult)

633{

634 int16 nkeyatts;

635 ItemId itemid;

636 IndexTuple tup;

637 int keepnatts;

638

639 Assert(state->is_leaf && !state->is_rightmost);

640

641 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);

642

643 /* Single key indexes not considered here */

644 if (nkeyatts == 1)

645 return false;

646

647 /* Ascending insertion pattern never inferred when new item is first */

648 if (state->newitemoff == P_FIRSTKEY)

649 return false;

650

651 /*

652 * Only apply optimization on pages with equisized tuples, since ordinal

653 * keys are likely to be fixed-width. Testing if the new tuple is

654 * variable width directly might also work, but that fails to apply the

655 * optimization to indexes with a numeric_ops attribute.

656 *

657 * Conclude that page has equisized tuples when the new item is the same

658 * width as the smallest item observed during pass over page, and other

659 * non-pivot tuples must be the same width as well. (Note that the

660 * possibly-truncated existing high key isn't counted in

661 * olddataitemstotal, and must be subtracted from maxoff.)

662 */

663 if (state->newitemsz != state->minfirstrightsz)

664 return false;

665 if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)

666 return false;

667

668 /*

669 * Avoid applying optimization when tuples are wider than a tuple

670 * consisting of two non-NULL int8/int64 attributes (or four non-NULL

671 * int4/int32 attributes)

672 */

673 if (state->newitemsz >

674 MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +

675 sizeof(ItemIdData))

676 return false;

677

678 /*

679 * At least the first attribute's value must be equal to the corresponding

680 * value in previous tuple to apply optimization. New item cannot be a

681 * duplicate, either.

682 *

683 * Handle case where new item is to the right of all items on the existing

684 * page. This is suggestive of monotonically increasing insertions in

685 * itself, so the "heap TID adjacency" test is not applied here.

686 */

687 if (state->newitemoff > maxoff)

688 {

689 itemid = PageGetItemId(state->origpage, maxoff);

690 tup = (IndexTuple) PageGetItem(state->origpage, itemid);

691 keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);

692

693 if (keepnatts > 1 && keepnatts <= nkeyatts)

694 {

695 *usemult = true;

696 return true;

697 }

698

699 return false;

700 }

701

702 /*

703 * "Low cardinality leading column, high cardinality suffix column"

704 * indexes with a random insertion pattern (e.g., an index with a boolean

705 * column, such as an index on '(book_is_in_print, book_isbn)') present us

706 * with a risk of consistently misapplying the optimization. We're

707 * willing to accept very occasional misapplication of the optimization,

708 * provided the cases where we get it wrong are rare and self-limiting.

709 *

710 * Heap TID adjacency strongly suggests that the item just to the left was

711 * inserted very recently, which limits overapplication of the

712 * optimization. Besides, all inappropriate cases triggered here will

713 * still split in the middle of the page on average.

714 */

715 itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));

716 tup = (IndexTuple) PageGetItem(state->origpage, itemid);

717 /* Do cheaper test first */

718 if (BTreeTupleIsPosting(tup) ||

719 !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))

720 return false;

721 /* Check same conditions as rightmost item case, too */

722 keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);

723

724 if (keepnatts > 1 && keepnatts <= nkeyatts)

725 {

726 double interp = (double) state->newitemoff / ((double) maxoff + 1);

727 double leaffillfactormult = (double) leaffillfactor / 100.0;

728

729 /*

730 * Don't allow caller to split after a new item when it will result in

731 * a split point to the right of the point that a leaf fillfactor

732 * split would use -- have caller apply leaf fillfactor instead

733 */

734 *usemult = interp > leaffillfactormult;

735

736 return true;

737 }

738

739 return false;

740}

741

742/*

743 * Subroutine for determining if two heap TIDS are "adjacent".

744 *

745 * Adjacent means that the high TID is very likely to have been inserted into

746 * heap relation immediately after the low TID, probably during the current

747 * transaction.

748 */

749static bool

750 _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)

751{

752 BlockNumber lowblk,

753 highblk;

754

755 lowblk = ItemPointerGetBlockNumber(lowhtid);

756 highblk = ItemPointerGetBlockNumber(highhtid);

757

758 /* Make optimistic assumption of adjacency when heap blocks match */

759 if (lowblk == highblk)

760 return true;

761

762 /* When heap block one up, second offset should be FirstOffsetNumber */

763 if (lowblk + 1 == highblk &&

764 ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)

765 return true;

766

767 return false;

768}

769

770/*

771 * Subroutine to find the "best" split point among candidate split points.

772 * The best split point is the split point with the lowest penalty among split

773 * points that fall within current/final split interval. Penalty is an

774 * abstract score, with a definition that varies depending on whether we're

775 * splitting a leaf page or an internal page. See _bt_split_penalty() for

776 * details.

777 *

778 * "perfectpenalty" is assumed to be the lowest possible penalty among

779 * candidate split points. This allows us to return early without wasting

780 * cycles on calculating the first differing attribute for all candidate

781 * splits when that clearly cannot improve our choice (or when we only want a

782 * minimally distinguishing split point, and don't want to make the split any

783 * more unbalanced than is necessary).

784 *

785 * We return the index of the first existing tuple that should go on the right

786 * page, plus a boolean indicating if new item is on left of split point.

787 */

788static OffsetNumber

789 _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,

790 bool *newitemonleft, FindSplitStrat strategy)

791{

792 int bestpenalty,

793 lowsplit;

794 int highsplit = Min(state->interval, state->nsplits);

795 SplitPoint *final;

796

797 bestpenalty = INT_MAX;

798 lowsplit = 0;

799 for (int i = lowsplit; i < highsplit; i++)

800 {

801 int penalty;

802

803 penalty = _bt_split_penalty(state, state->splits + i);

804

805 if (penalty < bestpenalty)

806 {

807 bestpenalty = penalty;

808 lowsplit = i;

809 }

810

811 if (penalty <= perfectpenalty)

812 break;

813 }

814

815 final = &state->splits[lowsplit];

816

817 /*

818 * There is a risk that the "many duplicates" strategy will repeatedly do

819 * the wrong thing when there are monotonically decreasing insertions to

820 * the right of a large group of duplicates. Repeated splits could leave

821 * a succession of right half pages with free space that can never be

822 * used. This must be avoided.

823 *

824 * Consider the example of the leftmost page in a single integer attribute

825 * NULLS FIRST index which is almost filled with NULLs. Monotonically

826 * decreasing integer insertions might cause the same leftmost page to

827 * split repeatedly at the same point. Each split derives its new high

828 * key from the lowest current value to the immediate right of the large

829 * group of NULLs, which will always be higher than all future integer

830 * insertions, directing all future integer insertions to the same

831 * leftmost page.

832 */

833 if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&

834 !final->newitemonleft && final->firstrightoff >= state->newitemoff &&

835 final->firstrightoff < state->newitemoff + 9)

836 {

837 /*

838 * Avoid the problem by performing a 50:50 split when the new item is

839 * just to the right of the would-be "many duplicates" split point.

840 * (Note that the test used for an insert that is "just to the right"

841 * of the split point is conservative.)

842 */

843 final = &state->splits[0];

844 }

845

846 *newitemonleft = final->newitemonleft;

847 return final->firstrightoff;

848}

849

850 #define LEAF_SPLIT_DISTANCE 0.050

851 #define INTERNAL_SPLIT_DISTANCE 0.075

852

853/*

854 * Return a split interval to use for the default strategy. This is a limit

855 * on the number of candidate split points to give further consideration to.

856 * Only a fraction of all candidate splits points (those located at the start

857 * of the now-sorted splits array) fall within the split interval. Split

858 * interval is applied within _bt_bestsplitloc().

859 *

860 * Split interval represents an acceptable range of split points -- those that

861 * have leftfree and rightfree values that are acceptably balanced. The final

862 * split point chosen is the split point with the lowest "penalty" among split

863 * points in this split interval (unless we change our entire strategy, in

864 * which case the interval also changes -- see _bt_strategy()).

865 *

866 * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,

867 * and sigma b for internal ("branch") splits. It's hard to provide a

868 * theoretical justification for the size of the split interval, though it's

869 * clear that a small split interval can make tuples on level L+1 much smaller

870 * on average, without noticeably affecting space utilization on level L.

871 * (Note that the way that we calculate split interval might need to change if

872 * suffix truncation is taught to truncate tuples "within" the last

873 * attribute/datum for data types like text, which is more or less how it is

874 * assumed to work in the paper.)

875 */

876static int

877 _bt_defaultinterval(FindSplitData *state)

878{

879 SplitPoint *spaceoptimal;

880 int16 tolerance,

881 lowleftfree,

882 lowrightfree,

883 highleftfree,

884 highrightfree;

885

886 /*

887 * Determine leftfree and rightfree values that are higher and lower than

888 * we're willing to tolerate. Note that the final split interval will be

889 * about 10% of nsplits in the common case where all non-pivot tuples

890 * (data items) from a leaf page are uniformly sized. We're a bit more

891 * aggressive when splitting internal pages.

892 */

893 if (state->is_leaf)

894 tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;

895 else

896 tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;

897

898 /* First candidate split point is the most evenly balanced */

899 spaceoptimal = state->splits;

900 lowleftfree = spaceoptimal->leftfree - tolerance;

901 lowrightfree = spaceoptimal->rightfree - tolerance;

902 highleftfree = spaceoptimal->leftfree + tolerance;

903 highrightfree = spaceoptimal->rightfree + tolerance;

904

905 /*

906 * Iterate through split points, starting from the split immediately after

907 * 'spaceoptimal'. Find the first split point that divides free space so

908 * unevenly that including it in the split interval would be unacceptable.

909 */

910 for (int i = 1; i < state->nsplits; i++)

911 {

912 SplitPoint *split = state->splits + i;

913

914 /* Cannot use curdelta here, since its value is often weighted */

915 if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||

916 split->leftfree > highleftfree || split->rightfree > highrightfree)

917 return i;

918 }

919

920 return state->nsplits;

921}

922

923/*

924 * Subroutine to decide whether split should use default strategy/initial

925 * split interval, or whether it should finish splitting the page using

926 * alternative strategies (this is only possible with leaf pages).

927 *

928 * Caller uses alternative strategy (or sticks with default strategy) based

929 * on how *strategy is set here. Return value is "perfect penalty", which is

930 * passed to _bt_bestsplitloc() as a final constraint on how far caller is

931 * willing to go to avoid appending a heap TID when using the many duplicates

932 * strategy (it also saves _bt_bestsplitloc() useless cycles).

933 */

934static int

935 _bt_strategy(FindSplitData *state, SplitPoint *leftpage,

936 SplitPoint *rightpage, FindSplitStrat *strategy)

937{

938 IndexTuple leftmost,

939 rightmost;

940 SplitPoint *leftinterval,

941 *rightinterval;

942 int perfectpenalty;

943 int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);

944

945 /* Assume that alternative strategy won't be used for now */

946 *strategy = SPLIT_DEFAULT;

947

948 /*

949 * Use smallest observed firstright item size for entire page (actually,

950 * entire imaginary version of page that includes newitem) as perfect

951 * penalty on internal pages. This can save cycles in the common case

952 * where most or all splits (not just splits within interval) have

953 * firstright tuples that are the same size.

954 */

955 if (!state->is_leaf)

956 return state->minfirstrightsz;

957

958 /*

959 * Use leftmost and rightmost tuples from leftmost and rightmost splits in

960 * current split interval

961 */

962 _bt_interval_edges(state, &leftinterval, &rightinterval);

963 leftmost = _bt_split_lastleft(state, leftinterval);

964 rightmost = _bt_split_firstright(state, rightinterval);

965

966 /*

967 * If initial split interval can produce a split point that will at least

968 * avoid appending a heap TID in new high key, we're done. Finish split

969 * with default strategy and initial split interval.

970 */

971 perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);

972 if (perfectpenalty <= indnkeyatts)

973 return perfectpenalty;

974

975 /*

976 * Work out how caller should finish split when even their "perfect"

977 * penalty for initial/default split interval indicates that the interval

978 * does not contain even a single split that avoids appending a heap TID.

979 *

980 * Use the leftmost split's lastleft tuple and the rightmost split's

981 * firstright tuple to assess every possible split.

982 */

983 leftmost = _bt_split_lastleft(state, leftpage);

984 rightmost = _bt_split_firstright(state, rightpage);

985

986 /*

987 * If page (including new item) has many duplicates but is not entirely

988 * full of duplicates, a many duplicates strategy split will be performed.

989 * If page is entirely full of duplicates, a single value strategy split

990 * will be performed.

991 */

992 perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);

993 if (perfectpenalty <= indnkeyatts)

994 {

995 *strategy = SPLIT_MANY_DUPLICATES;

996

997 /*

998 * Many duplicates strategy should split at either side the group of

999 * duplicates that enclose the delta-optimal split point. Return

1000 * indnkeyatts rather than the true perfect penalty to make that

1001 * happen. (If perfectpenalty was returned here then low cardinality

1002 * composite indexes could have continual unbalanced splits.)

1003 *

1004 * Note that caller won't go through with a many duplicates split in

1005 * rare cases where it looks like there are ever-decreasing insertions

1006 * to the immediate right of the split point. This must happen just

1007 * before a final decision is made, within _bt_bestsplitloc().

1008 */

1009 return indnkeyatts;

1010 }

1011

1012 /*

1013 * Single value strategy is only appropriate with ever-increasing heap

1014 * TIDs; otherwise, original default strategy split should proceed to

1015 * avoid pathological performance. Use page high key to infer if this is

1016 * the rightmost page among pages that store the same duplicate value.

1017 * This should not prevent insertions of heap TIDs that are slightly out

1018 * of order from using single value strategy, since that's expected with

1019 * concurrent inserters of the same duplicate value.

1020 */

1021 else if (state->is_rightmost)

1022 *strategy = SPLIT_SINGLE_VALUE;

1023 else

1024 {

1025 ItemId itemid;

1026 IndexTuple hikey;

1027

1028 itemid = PageGetItemId(state->origpage, P_HIKEY);

1029 hikey = (IndexTuple) PageGetItem(state->origpage, itemid);

1030 perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,

1031 state->newitem);

1032 if (perfectpenalty <= indnkeyatts)

1033 *strategy = SPLIT_SINGLE_VALUE;

1034 else

1035 {

1036 /*

1037 * Have caller finish split using default strategy, since page

1038 * does not appear to be the rightmost page for duplicates of the

1039 * value the page is filled with

1040 */

1041 }

1042 }

1043

1044 return perfectpenalty;

1045}

1046

1047/*

1048 * Subroutine to locate leftmost and rightmost splits for current/default

1049 * split interval. Note that it will be the same split iff there is only one

1050 * split in interval.

1051 */

1052static void

1053 _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,

1054 SplitPoint **rightinterval)

1055{

1056 int highsplit = Min(state->interval, state->nsplits);

1057 SplitPoint *deltaoptimal;

1058

1059 deltaoptimal = state->splits;

1060 *leftinterval = NULL;

1061 *rightinterval = NULL;

1062

1063 /*

1064 * Delta is an absolute distance to optimal split point, so both the

1065 * leftmost and rightmost split point will usually be at the end of the

1066 * array

1067 */

1068 for (int i = highsplit - 1; i >= 0; i--)

1069 {

1070 SplitPoint *distant = state->splits + i;

1071

1072 if (distant->firstrightoff < deltaoptimal->firstrightoff)

1073 {

1074 if (*leftinterval == NULL)

1075 *leftinterval = distant;

1076 }

1077 else if (distant->firstrightoff > deltaoptimal->firstrightoff)

1078 {

1079 if (*rightinterval == NULL)

1080 *rightinterval = distant;

1081 }

1082 else if (!distant->newitemonleft && deltaoptimal->newitemonleft)

1083 {

1084 /*

1085 * "incoming tuple will become firstright" (distant) is to the

1086 * left of "incoming tuple will become lastleft" (delta-optimal)

1087 */

1088 Assert(distant->firstrightoff == state->newitemoff);

1089 if (*leftinterval == NULL)

1090 *leftinterval = distant;

1091 }

1092 else if (distant->newitemonleft && !deltaoptimal->newitemonleft)

1093 {

1094 /*

1095 * "incoming tuple will become lastleft" (distant) is to the right

1096 * of "incoming tuple will become firstright" (delta-optimal)

1097 */

1098 Assert(distant->firstrightoff == state->newitemoff);

1099 if (*rightinterval == NULL)

1100 *rightinterval = distant;

1101 }

1102 else

1103 {

1104 /* There was only one or two splits in initial split interval */

1105 Assert(distant == deltaoptimal);

1106 if (*leftinterval == NULL)

1107 *leftinterval = distant;

1108 if (*rightinterval == NULL)

1109 *rightinterval = distant;

1110 }

1111

1112 if (*leftinterval && *rightinterval)

1113 return;

1114 }

1115

1116 Assert(false);

1117}

1118

1119/*

1120 * Subroutine to find penalty for caller's candidate split point.

1121 *

1122 * On leaf pages, penalty is the attribute number that distinguishes each side

1123 * of a split. It's the last attribute that needs to be included in new high

1124 * key for left page. It can be greater than the number of key attributes in

1125 * cases where a heap TID will need to be appended during truncation.

1126 *

1127 * On internal pages, penalty is simply the size of the firstright tuple for

1128 * the split (including line pointer overhead). This tuple will become the

1129 * new high key for the left page.

1130 */

1131static inline int

1132 _bt_split_penalty(FindSplitData *state, SplitPoint *split)

1133{

1134 IndexTuple lastleft;

1135 IndexTuple firstright;

1136

1137 if (!state->is_leaf)

1138 {

1139 ItemId itemid;

1140

1141 if (!split->newitemonleft &&

1142 split->firstrightoff == state->newitemoff)

1143 return state->newitemsz;

1144

1145 itemid = PageGetItemId(state->origpage, split->firstrightoff);

1146

1147 return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);

1148 }

1149

1150 lastleft = _bt_split_lastleft(state, split);

1151 firstright = _bt_split_firstright(state, split);

1152

1153 return _bt_keep_natts_fast(state->rel, lastleft, firstright);

1154}

1155

1156/*

1157 * Subroutine to get a lastleft IndexTuple for a split point

1158 */

1159static inline IndexTuple

1160 _bt_split_lastleft(FindSplitData *state, SplitPoint *split)

1161{

1162 ItemId itemid;

1163

1164 if (split->newitemonleft && split->firstrightoff == state->newitemoff)

1165 return state->newitem;

1166

1167 itemid = PageGetItemId(state->origpage,

1168 OffsetNumberPrev(split->firstrightoff));

1169 return (IndexTuple) PageGetItem(state->origpage, itemid);

1170}

1171

1172/*

1173 * Subroutine to get a firstright IndexTuple for a split point

1174 */

1175static inline IndexTuple

1176 _bt_split_firstright(FindSplitData *state, SplitPoint *split)

1177{

1178 ItemId itemid;

1179

1180 if (!split->newitemonleft && split->firstrightoff == state->newitemoff)

1181 return state->newitem;

1182

1183 itemid = PageGetItemId(state->origpage, split->firstrightoff);

1184 return (IndexTuple) PageGetItem(state->origpage, itemid);

1185}

BlockNumber

uint32 BlockNumber

Definition: block.h:31

PageGetExactFreeSpace

Size PageGetExactFreeSpace(const PageData *page)

Definition: bufpage.c:957

PageGetPageSize

static Size PageGetPageSize(const PageData *page)

Definition: bufpage.h:277

PageGetItem

static Item PageGetItem(const PageData *page, const ItemIdData *itemId)

Definition: bufpage.h:354

SizeOfPageHeaderData

#define SizeOfPageHeaderData

Definition: bufpage.h:217

PageGetItemId

static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)

Definition: bufpage.h:244

Page

PageData * Page

Definition: bufpage.h:82

PageGetMaxOffsetNumber

static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)

Definition: bufpage.h:372

Min

#define Min(x, y)

Definition: c.h:1003

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:810

int64

int64_t int64

Definition: c.h:535

int16

int16_t int16

Definition: c.h:533

Size

size_t Size

Definition: c.h:610

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

final

#define final(a, b, c)

Definition: hashfn.c:116

Assert

Assert(PointerIsAligned(start, uint64))

int.h

pg_cmp_s16

static int pg_cmp_s16(int16 a, int16 b)

Definition: int.h:634

i

int i

Definition: isn.c:77

if

if(TABLE==NULL||TABLE_index==NULL)

Definition: isn.c:81

ItemIdGetLength

#define ItemIdGetLength(itemId)

Definition: itemid.h:59

ItemIdData

struct ItemIdData ItemIdData

ItemPointerGetOffsetNumber

static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)

Definition: itemptr.h:124

ItemPointerGetBlockNumber

static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)

Definition: itemptr.h:103

IndexTuple

IndexTupleData * IndexTuple

Definition: itup.h:53

IndexTupleSize

static Size IndexTupleSize(const IndexTupleData *itup)

Definition: itup.h:71

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

nbtree.h

P_ISLEAF

#define P_ISLEAF(opaque)

Definition: nbtree.h:221

BTREE_SINGLEVAL_FILLFACTOR

#define BTREE_SINGLEVAL_FILLFACTOR

Definition: nbtree.h:203

P_HIKEY

#define P_HIKEY

Definition: nbtree.h:368

BTPageGetOpaque

#define BTPageGetOpaque(page)

Definition: nbtree.h:74

P_FIRSTDATAKEY

#define P_FIRSTDATAKEY(opaque)

Definition: nbtree.h:370

BTreeTupleGetPostingOffset

static uint32 BTreeTupleGetPostingOffset(IndexTuple posting)

Definition: nbtree.h:530

P_RIGHTMOST

#define P_RIGHTMOST(opaque)

Definition: nbtree.h:220

BTreeTupleIsPosting

static bool BTreeTupleIsPosting(IndexTuple itup)

Definition: nbtree.h:493

BTGetFillFactor

#define BTGetFillFactor(relation)

Definition: nbtree.h:1158

P_FIRSTKEY

#define P_FIRSTKEY

Definition: nbtree.h:369

BTREE_NONLEAF_FILLFACTOR

#define BTREE_NONLEAF_FILLFACTOR

Definition: nbtree.h:202

_bt_findsplitloc

OffsetNumber _bt_findsplitloc(Relation rel, Page origpage, OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem, bool *newitemonleft)

Definition: nbtsplitloc.c:130

LEAF_SPLIT_DISTANCE

#define LEAF_SPLIT_DISTANCE

Definition: nbtsplitloc.c:850

_bt_split_penalty

static int _bt_split_penalty(FindSplitData *state, SplitPoint *split)

Definition: nbtsplitloc.c:1132

INTERNAL_SPLIT_DISTANCE

#define INTERNAL_SPLIT_DISTANCE

Definition: nbtsplitloc.c:851

_bt_deltasortsplits

static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult, bool usemult)

Definition: nbtsplitloc.c:567

_bt_strategy

static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage, SplitPoint *rightpage, FindSplitStrat *strategy)

Definition: nbtsplitloc.c:935

_bt_afternewitemoff

static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult)

Definition: nbtsplitloc.c:631

_bt_splitcmp

static int _bt_splitcmp(const void *arg1, const void *arg2)

Definition: nbtsplitloc.c:595

_bt_split_lastleft

static IndexTuple _bt_split_lastleft(FindSplitData *state, SplitPoint *split)

Definition: nbtsplitloc.c:1160

_bt_recsplitloc

static void _bt_recsplitloc(FindSplitData *state, OffsetNumber firstrightoff, bool newitemonleft, int olddataitemstoleft, Size firstrightofforigpagetuplesz)

Definition: nbtsplitloc.c:450

FindSplitStrat

Definition: nbtsplitloc.c:22

SPLIT_DEFAULT

@ SPLIT_DEFAULT

Definition: nbtsplitloc.c:24

SPLIT_MANY_DUPLICATES

@ SPLIT_MANY_DUPLICATES

Definition: nbtsplitloc.c:25

SPLIT_SINGLE_VALUE

@ SPLIT_SINGLE_VALUE

Definition: nbtsplitloc.c:26

_bt_bestsplitloc

static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty, bool *newitemonleft, FindSplitStrat strategy)

Definition: nbtsplitloc.c:789

_bt_defaultinterval

static int _bt_defaultinterval(FindSplitData *state)

Definition: nbtsplitloc.c:877

_bt_interval_edges

static void _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval, SplitPoint **rightinterval)

Definition: nbtsplitloc.c:1053

_bt_split_firstright

static IndexTuple _bt_split_firstright(FindSplitData *state, SplitPoint *split)

Definition: nbtsplitloc.c:1176

_bt_adjacenthtid

static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)

Definition: nbtsplitloc.c:750

_bt_keep_natts_fast

int _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)

Definition: nbtutils.c:4063

OffsetNumberNext

#define OffsetNumberNext(offsetNumber)

Definition: off.h:52

OffsetNumber

uint16 OffsetNumber

Definition: off.h:24

FirstOffsetNumber

#define FirstOffsetNumber

Definition: off.h:27

OffsetNumberPrev

#define OffsetNumberPrev(offsetNumber)

Definition: off.h:54

qsort

#define qsort(a, b, c, d)

Definition: port.h:479

postgres.h

RelationGetRelationName

#define RelationGetRelationName(relation)

Definition: rel.h:548

IndexRelationGetNumberOfKeyAttributes

#define IndexRelationGetNumberOfKeyAttributes(relation)

Definition: rel.h:533

BTPageOpaqueData

Definition: nbtree.h:64

FindSplitData

Definition: nbtsplitloc.c:42

FindSplitData::maxsplits

int maxsplits

Definition: nbtsplitloc.c:57

FindSplitData::origpage

Page origpage

Definition: nbtsplitloc.c:45

FindSplitData::nsplits

int nsplits

Definition: nbtsplitloc.c:58

FindSplitData::rightspace

int rightspace

Definition: nbtsplitloc.c:52

FindSplitData::newitemoff

OffsetNumber newitemoff

Definition: nbtsplitloc.c:50

FindSplitData::is_rightmost

bool is_rightmost

Definition: nbtsplitloc.c:49

FindSplitData::is_leaf

bool is_leaf

Definition: nbtsplitloc.c:48

FindSplitData::minfirstrightsz

Size minfirstrightsz

Definition: nbtsplitloc.c:54

FindSplitData::newitem

IndexTuple newitem

Definition: nbtsplitloc.c:46

FindSplitData::splits

SplitPoint * splits

Definition: nbtsplitloc.c:59

FindSplitData::newitemsz

Size newitemsz

Definition: nbtsplitloc.c:47

FindSplitData::rel

Relation rel

Definition: nbtsplitloc.c:44

FindSplitData::leftspace

int leftspace

Definition: nbtsplitloc.c:51

FindSplitData::interval

int interval

Definition: nbtsplitloc.c:60

FindSplitData::olddataitemstotal

int olddataitemstotal

Definition: nbtsplitloc.c:53

IndexTupleData

Definition: itup.h:36

IndexTupleData::t_tid

ItemPointerData t_tid

Definition: itup.h:37

ItemIdData

Definition: itemid.h:26

ItemPointerData

Definition: itemptr.h:37

RelationData

Definition: rel.h:56

SplitPoint

Definition: nbtsplitloc.c:30

SplitPoint::newitemonleft

bool newitemonleft

Definition: nbtsplitloc.c:38

SplitPoint::firstrightoff

OffsetNumber firstrightoff

Definition: nbtsplitloc.c:37

SplitPoint::leftfree

int16 leftfree

Definition: nbtsplitloc.c:33

SplitPoint::rightfree

int16 rightfree

Definition: nbtsplitloc.c:34

SplitPoint::curdelta

int16 curdelta

Definition: nbtsplitloc.c:32

state

Definition: regguts.h:323

tableam.h

PostgreSQL Source Code: src/backend/access/nbtree/nbtsplitloc.c Source File