[フレーム]

nodeIncrementalSort.c

1/*-------------------------------------------------------------------------

2 *

3 * nodeIncrementalSort.c

4 * Routines to handle incremental sorting of relations.

5 *

8 *

9 * IDENTIFICATION

10 * src/backend/executor/nodeIncrementalSort.c

11 *

12 * DESCRIPTION

13 *

14 * Incremental sort is an optimized variant of multikey sort for cases

15 * when the input is already sorted by a prefix of the sort keys. For

16 * example when a sort by (key1, key2 ... keyN) is requested, and the

17 * input is already sorted by (key1, key2 ... keyM), M < N, we can

18 * divide the input into groups where keys (key1, ... keyM) are equal,

19 * and only sort on the remaining columns.

20 *

21 * Consider the following example. We have input tuples consisting of

22 * two integers (X, Y) already presorted by X, while it's required to

23 * sort them by both X and Y. Let input tuples be following.

24 *

25 * (1, 5)

26 * (1, 2)

27 * (2, 9)

28 * (2, 1)

29 * (2, 5)

30 * (3, 3)

31 * (3, 7)

32 *

33 * An incremental sort algorithm would split the input into the following

34 * groups, which have equal X, and then sort them by Y individually:

35 *

36 * (1, 5) (1, 2)

37 * (2, 9) (2, 1) (2, 5)

38 * (3, 3) (3, 7)

39 *

40 * After sorting these groups and putting them altogether, we would get

41 * the following result which is sorted by X and Y, as requested:

42 *

43 * (1, 2)

44 * (1, 5)

45 * (2, 1)

46 * (2, 5)

47 * (2, 9)

48 * (3, 3)

49 * (3, 7)

50 *

51 * Incremental sort may be more efficient than plain sort, particularly

52 * on large datasets, as it reduces the amount of data to sort at once,

53 * making it more likely it fits into work_mem (eliminating the need to

54 * spill to disk). But the main advantage of incremental sort is that

55 * it can start producing rows early, before sorting the whole dataset,

56 * which is a significant benefit especially for queries with LIMIT.

57 *

58 * The algorithm we've implemented here is modified from the theoretical

59 * base described above by operating in two different modes:

60 * - Fetching a minimum number of tuples without checking prefix key

61 * group membership and sorting on all columns when safe.

62 * - Fetching all tuples for a single prefix key group and sorting on

63 * solely the unsorted columns.

64 * We always begin in the first mode, and employ a heuristic to switch

65 * into the second mode if we believe it's beneficial.

66 *

67 * Sorting incrementally can potentially use less memory, avoid fetching

68 * and sorting all tuples in the dataset, and begin returning tuples before

69 * the entire result set is available.

70 *

71 * The hybrid mode approach allows us to optimize for both very small

72 * groups (where the overhead of a new tuplesort is high) and very large

73 * groups (where we can lower cost by not having to sort on already sorted

74 * columns), albeit at some extra cost while switching between modes.

75 *

76 *-------------------------------------------------------------------------

77 */

78

79#include "postgres.h"

80

81#include "executor/execdebug.h"

82#include "executor/nodeIncrementalSort.h"

83#include "miscadmin.h"

84#include "utils/lsyscache.h"

85#include "utils/tuplesort.h"

86

87/*

88 * We need to store the instrumentation information in either local node's sort

89 * info or, for a parallel worker process, in the shared info (this avoids

90 * having to additionally memcpy the info from local memory to shared memory

91 * at each instrumentation call). This macro expands to choose the proper sort

92 * state and group info.

93 *

94 * Arguments:

95 * - node: type IncrementalSortState *

96 * - groupName: the token fullsort or prefixsort

97 */

98 #define INSTRUMENT_SORT_GROUP(node, groupName) \

99 do { \

100 if ((node)->ss.ps.instrument != NULL) \

101 { \

102 if ((node)->shared_info && (node)->am_worker) \

103 { \

104 Assert(IsParallelWorker()); \

105 Assert(ParallelWorkerNumber <= (node)->shared_info->num_workers); \

106 instrumentSortedGroup(&(node)->shared_info->sinfo[ParallelWorkerNumber].groupName##GroupInfo, \

107 (node)->groupName##_state); \

108 } \

109 else \

110 { \

111 instrumentSortedGroup(&(node)->incsort_info.groupName##GroupInfo, \

112 (node)->groupName##_state); \

113 } \

114 } \

115 } while (0)

116

117

118/* ----------------------------------------------------------------

119 * instrumentSortedGroup

120 *

121 * Because incremental sort processes (potentially many) sort batches, we need

122 * to capture tuplesort stats each time we finalize a sort state. This summary

123 * data is later used for EXPLAIN ANALYZE output.

124 * ----------------------------------------------------------------

125 */

126static void

127 instrumentSortedGroup(IncrementalSortGroupInfo *groupInfo,

128 Tuplesortstate *sortState)

129{

130 TuplesortInstrumentation sort_instr;

131

132 groupInfo->groupCount++;

133

134 tuplesort_get_stats(sortState, &sort_instr);

135

136 /* Calculate total and maximum memory and disk space used. */

137 switch (sort_instr.spaceType)

138 {

139 case SORT_SPACE_TYPE_DISK:

140 groupInfo->totalDiskSpaceUsed += sort_instr.spaceUsed;

141 if (sort_instr.spaceUsed > groupInfo->maxDiskSpaceUsed)

142 groupInfo->maxDiskSpaceUsed = sort_instr.spaceUsed;

143

144 break;

145 case SORT_SPACE_TYPE_MEMORY:

146 groupInfo->totalMemorySpaceUsed += sort_instr.spaceUsed;

147 if (sort_instr.spaceUsed > groupInfo->maxMemorySpaceUsed)

148 groupInfo->maxMemorySpaceUsed = sort_instr.spaceUsed;

149

150 break;

151 }

152

153 /* Track each sort method we've used. */

154 groupInfo->sortMethods |= sort_instr.sortMethod;

155}

156

157/* ----------------------------------------------------------------

158 * preparePresortedCols

159 *

160 * Prepare information for presorted_keys comparisons.

161 * ----------------------------------------------------------------

162 */

163static void

164 preparePresortedCols(IncrementalSortState *node)

165{

166 IncrementalSort *plannode = castNode(IncrementalSort, node->ss.ps.plan);

167

168 node->presorted_keys =

169 (PresortedKeyData *) palloc(plannode->nPresortedCols *

170 sizeof(PresortedKeyData));

171

172 /* Pre-cache comparison functions for each pre-sorted key. */

173 for (int i = 0; i < plannode->nPresortedCols; i++)

174 {

175 Oid equalityOp,

176 equalityFunc;

177 PresortedKeyData *key;

178

179 key = &node->presorted_keys[i];

180 key->attno = plannode->sort.sortColIdx[i];

181

182 equalityOp = get_equality_op_for_ordering_op(plannode->sort.sortOperators[i],

183 NULL);

184 if (!OidIsValid(equalityOp))

185 elog(ERROR, "missing equality operator for ordering operator %u",

186 plannode->sort.sortOperators[i]);

187

188 equalityFunc = get_opcode(equalityOp);

189 if (!OidIsValid(equalityFunc))

190 elog(ERROR, "missing function for operator %u", equalityOp);

191

192 /* Lookup the comparison function */

193 fmgr_info_cxt(equalityFunc, &key->flinfo, CurrentMemoryContext);

194

195 /* We can initialize the callinfo just once and re-use it */

196 key->fcinfo = palloc0(SizeForFunctionCallInfo(2));

197 InitFunctionCallInfoData(*key->fcinfo, &key->flinfo, 2,

198 plannode->sort.collations[i], NULL, NULL);

199 key->fcinfo->args[0].isnull = false;

200 key->fcinfo->args[1].isnull = false;

201 }

202}

203

204/* ----------------------------------------------------------------

205 * isCurrentGroup

206 *

207 * Check whether a given tuple belongs to the current sort group by comparing

208 * the presorted column values to the pivot tuple of the current group.

209 * ----------------------------------------------------------------

210 */

211static bool

212 isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot *tuple)

213{

214 int nPresortedCols;

215

216 nPresortedCols = castNode(IncrementalSort, node->ss.ps.plan)->nPresortedCols;

217

218 /*

219 * That the input is sorted by keys * (0, ... n) implies that the tail

220 * keys are more likely to change. Therefore we do our comparison starting

221 * from the last pre-sorted column to optimize for early detection of

222 * inequality and minimizing the number of function calls..

223 */

224 for (int i = nPresortedCols - 1; i >= 0; i--)

225 {

226 Datum datumA,

227 datumB,

228 result;

229 bool isnullA,

230 isnullB;

231 AttrNumber attno = node->presorted_keys[i].attno;

232 PresortedKeyData *key;

233

234 datumA = slot_getattr(pivot, attno, &isnullA);

235 datumB = slot_getattr(tuple, attno, &isnullB);

236

237 /* Special case for NULL-vs-NULL, else use standard comparison */

238 if (isnullA || isnullB)

239 {

240 if (isnullA == isnullB)

241 continue;

242 else

243 return false;

244 }

245

246 key = &node->presorted_keys[i];

247

248 key->fcinfo->args[0].value = datumA;

249 key->fcinfo->args[1].value = datumB;

250

251 /* just for paranoia's sake, we reset isnull each time */

252 key->fcinfo->isnull = false;

253

254 result = FunctionCallInvoke(key->fcinfo);

255

256 /* Check for null result, since caller is clearly not expecting one */

257 if (key->fcinfo->isnull)

258 elog(ERROR, "function %u returned NULL", key->flinfo.fn_oid);

259

260 if (!DatumGetBool(result))

261 return false;

262 }

263 return true;

264}

265

266/* ----------------------------------------------------------------

267 * switchToPresortedPrefixMode

268 *

269 * When we determine that we've likely encountered a large batch of tuples all

270 * having the same presorted prefix values, we want to optimize tuplesort by

271 * only sorting on unsorted suffix keys.

272 *

273 * The problem is that we've already accumulated several tuples in another

274 * tuplesort configured to sort by all columns (assuming that there may be

275 * more than one prefix key group). So to switch to presorted prefix mode we

276 * have to go back and look at all the tuples we've already accumulated to

277 * verify they're all part of the same prefix key group before sorting them

278 * solely by unsorted suffix keys.

279 *

280 * While it's likely that all tuples already fetched are all part of a single

281 * prefix group, we also have to handle the possibility that there is at least

282 * one different prefix key group before the large prefix key group.

283 * ----------------------------------------------------------------

284 */

285static void

286 switchToPresortedPrefixMode(PlanState *pstate)

287{

288 IncrementalSortState *node = castNode(IncrementalSortState, pstate);

289 ScanDirection dir;

290 int64 nTuples;

291 TupleDesc tupDesc;

292 PlanState *outerNode;

293 IncrementalSort *plannode = castNode(IncrementalSort, node->ss.ps.plan);

294

295 dir = node->ss.ps.state->es_direction;

296 outerNode = outerPlanState(node);

297 tupDesc = ExecGetResultType(outerNode);

298

299 /* Configure the prefix sort state the first time around. */

300 if (node->prefixsort_state == NULL)

301 {

302 Tuplesortstate *prefixsort_state;

303 int nPresortedCols = plannode->nPresortedCols;

304

305 /*

306 * Optimize the sort by assuming the prefix columns are all equal and

307 * thus we only need to sort by any remaining columns.

308 */

309 prefixsort_state = tuplesort_begin_heap(tupDesc,

310 plannode->sort.numCols - nPresortedCols,

311 &(plannode->sort.sortColIdx[nPresortedCols]),

312 &(plannode->sort.sortOperators[nPresortedCols]),

313 &(plannode->sort.collations[nPresortedCols]),

314 &(plannode->sort.nullsFirst[nPresortedCols]),

315 work_mem,

316 NULL,

317 node->bounded ? TUPLESORT_ALLOWBOUNDED : TUPLESORT_NONE);

318 node->prefixsort_state = prefixsort_state;

319 }

320 else

321 {

322 /* Next group of presorted data */

323 tuplesort_reset(node->prefixsort_state);

324 }

325

326 /*

327 * If the current node has a bound, then it's reasonably likely that a

328 * large prefix key group will benefit from bounded sort, so configure the

329 * tuplesort to allow for that optimization.

330 */

331 if (node->bounded)

332 {

333 SO1_printf("Setting bound on presorted prefix tuplesort to: " INT64_FORMAT "\n",

334 node->bound - node->bound_Done);

335 tuplesort_set_bound(node->prefixsort_state,

336 node->bound - node->bound_Done);

337 }

338

339 /*

340 * Copy as many tuples as we can (i.e., in the same prefix key group) from

341 * the full sort state to the prefix sort state.

342 */

343 for (nTuples = 0; nTuples < node->n_fullsort_remaining; nTuples++)

344 {

345 /*

346 * When we encounter multiple prefix key groups inside the full sort

347 * tuplesort we have to carry over the last read tuple into the next

348 * batch.

349 */

350 if (nTuples == 0 && !TupIsNull(node->transfer_tuple))

351 {

352 tuplesort_puttupleslot(node->prefixsort_state, node->transfer_tuple);

353 /* The carried over tuple is our new group pivot tuple. */

354 ExecCopySlot(node->group_pivot, node->transfer_tuple);

355 }

356 else

357 {

358 tuplesort_gettupleslot(node->fullsort_state,

359 ScanDirectionIsForward(dir),

360 false, node->transfer_tuple, NULL);

361

362 /*

363 * If this is our first time through the loop, then we need to

364 * save the first tuple we get as our new group pivot.

365 */

366 if (TupIsNull(node->group_pivot))

367 ExecCopySlot(node->group_pivot, node->transfer_tuple);

368

369 if (isCurrentGroup(node, node->group_pivot, node->transfer_tuple))

370 {

371 tuplesort_puttupleslot(node->prefixsort_state, node->transfer_tuple);

372 }

373 else

374 {

375 /*

376 * The tuple isn't part of the current batch so we need to

377 * carry it over into the next batch of tuples we transfer out

378 * of the full sort tuplesort into the presorted prefix

379 * tuplesort. We don't actually have to do anything special to

380 * save the tuple since we've already loaded it into the

381 * node->transfer_tuple slot, and, even though that slot

382 * points to memory inside the full sort tuplesort, we can't

383 * reset that tuplesort anyway until we've fully transferred

384 * out its tuples, so this reference is safe. We do need to

385 * reset the group pivot tuple though since we've finished the

386 * current prefix key group.

387 */

388 ExecClearTuple(node->group_pivot);

389

390 /* Break out of for-loop early */

391 break;

392 }

393 }

394 }

395

396 /*

397 * Track how many tuples remain in the full sort batch so that we know if

398 * we need to sort multiple prefix key groups before processing tuples

399 * remaining in the large single prefix key group we think we've

400 * encountered.

401 */

402 SO1_printf("Moving " INT64_FORMAT " tuples to presorted prefix tuplesort\n", nTuples);

403 node->n_fullsort_remaining -= nTuples;

404 SO1_printf("Setting n_fullsort_remaining to " INT64_FORMAT "\n", node->n_fullsort_remaining);

405

406 if (node->n_fullsort_remaining == 0)

407 {

408 /*

409 * We've found that all tuples remaining in the full sort batch are in

410 * the same prefix key group and moved all of those tuples into the

411 * presorted prefix tuplesort. We don't know that we've yet found the

412 * last tuple in the current prefix key group, so save our pivot

413 * comparison tuple and continue fetching tuples from the outer

414 * execution node to load into the presorted prefix tuplesort.

415 */

416 ExecCopySlot(node->group_pivot, node->transfer_tuple);

417 SO_printf("Setting execution_status to INCSORT_LOADPREFIXSORT (switchToPresortedPrefixMode)\n");

418 node->execution_status = INCSORT_LOADPREFIXSORT;

419

420 /*

421 * Make sure we clear the transfer tuple slot so that next time we

422 * encounter a large prefix key group we don't incorrectly assume we

423 * have a tuple carried over from the previous group.

424 */

425 ExecClearTuple(node->transfer_tuple);

426 }

427 else

428 {

429 /*

430 * We finished a group but didn't consume all of the tuples from the

431 * full sort state, so we'll sort this batch, let the outer node read

432 * out all of those tuples, and then come back around to find another

433 * batch.

434 */

435 SO1_printf("Sorting presorted prefix tuplesort with " INT64_FORMAT " tuples\n", nTuples);

436 tuplesort_performsort(node->prefixsort_state);

437

438 INSTRUMENT_SORT_GROUP(node, prefixsort);

439

440 if (node->bounded)

441 {

442 /*

443 * If the current node has a bound and we've already sorted n

444 * tuples, then the functional bound remaining is (original bound

445 * - n), so store the current number of processed tuples for use

446 * in configuring sorting bound.

447 */

448 SO2_printf("Changing bound_Done from " INT64_FORMAT " to " INT64_FORMAT "\n",

449 Min(node->bound, node->bound_Done + nTuples), node->bound_Done);

450 node->bound_Done = Min(node->bound, node->bound_Done + nTuples);

451 }

452

453 SO_printf("Setting execution_status to INCSORT_READPREFIXSORT (switchToPresortedPrefixMode)\n");

454 node->execution_status = INCSORT_READPREFIXSORT;

455 }

456}

457

458/*

459 * Sorting many small groups with tuplesort is inefficient. In order to

460 * cope with this problem we don't start a new group until the current one

461 * contains at least DEFAULT_MIN_GROUP_SIZE tuples (unfortunately this also

462 * means we can't assume small groups of tuples all have the same prefix keys.)

463 * When we have a bound that's less than DEFAULT_MIN_GROUP_SIZE we start looking

464 * for the new group as soon as we've met our bound to avoid fetching more

465 * tuples than we absolutely have to fetch.

466 */

467 #define DEFAULT_MIN_GROUP_SIZE 32

468

469/*

470 * While we've optimized for small prefix key groups by not starting our prefix

471 * key comparisons until we've reached a minimum number of tuples, we don't want

472 * that optimization to cause us to lose out on the benefits of being able to

473 * assume a large group of tuples is fully presorted by its prefix keys.

474 * Therefore we use the DEFAULT_MAX_FULL_SORT_GROUP_SIZE cutoff as a heuristic

475 * for determining when we believe we've encountered a large group, and, if we

476 * get to that point without finding a new prefix key group we transition to

477 * presorted prefix key mode.

478 */

479 #define DEFAULT_MAX_FULL_SORT_GROUP_SIZE (2 * DEFAULT_MIN_GROUP_SIZE)

480

481/* ----------------------------------------------------------------

482 * ExecIncrementalSort

483 *

484 * Assuming that outer subtree returns tuple presorted by some prefix

485 * of target sort columns, performs incremental sort.

486 *

487 * Conditions:

488 * -- none.

489 *

490 * Initial States:

491 * -- the outer child is prepared to return the first tuple.

492 * ----------------------------------------------------------------

493 */

494static TupleTableSlot *

495 ExecIncrementalSort(PlanState *pstate)

496{

497 IncrementalSortState *node = castNode(IncrementalSortState, pstate);

498 EState *estate;

499 ScanDirection dir;

500 Tuplesortstate *read_sortstate;

501 Tuplesortstate *fullsort_state;

502 TupleTableSlot *slot;

503 IncrementalSort *plannode = (IncrementalSort *) node->ss.ps.plan;

504 PlanState *outerNode;

505 TupleDesc tupDesc;

506 int64 nTuples = 0;

507 int64 minGroupSize;

508

509 CHECK_FOR_INTERRUPTS();

510

511 estate = node->ss.ps.state;

512 dir = estate->es_direction;

513 fullsort_state = node->fullsort_state;

514

515 /*

516 * If a previous iteration has sorted a batch, then we need to check to

517 * see if there are any remaining tuples in that batch that we can return

518 * before moving on to other execution states.

519 */

520 if (node->execution_status == INCSORT_READFULLSORT

521 || node->execution_status == INCSORT_READPREFIXSORT)

522 {

523 /*

524 * Return next tuple from the current sorted group set if available.

525 */

526 read_sortstate = node->execution_status == INCSORT_READFULLSORT ?

527 fullsort_state : node->prefixsort_state;

528 slot = node->ss.ps.ps_ResultTupleSlot;

529

530 /*

531 * We have to populate the slot from the tuplesort before checking

532 * outerNodeDone because it will set the slot to NULL if no more

533 * tuples remain. If the tuplesort is empty, but we don't have any

534 * more tuples available for sort from the outer node, then

535 * outerNodeDone will have been set so we'll return that now-empty

536 * slot to the caller.

537 */

538 if (tuplesort_gettupleslot(read_sortstate, ScanDirectionIsForward(dir),

539 false, slot, NULL) || node->outerNodeDone)

540

541 /*

542 * Note: there isn't a good test case for the node->outerNodeDone

543 * check directly, but we need it for any plan where the outer

544 * node will fail when trying to fetch too many tuples.

545 */

546 return slot;

547 else if (node->n_fullsort_remaining > 0)

548 {

549 /*

550 * When we transition to presorted prefix mode, we might have

551 * accumulated at least one additional prefix key group in the

552 * full sort tuplesort. The first call to

553 * switchToPresortedPrefixMode() will have pulled the first one of

554 * those groups out, and we've returned those tuples to the parent

555 * node, but if at this point we still have tuples remaining in

556 * the full sort state (i.e., n_fullsort_remaining > 0), then we

557 * need to re-execute the prefix mode transition function to pull

558 * out the next prefix key group.

559 */

560 SO1_printf("Re-calling switchToPresortedPrefixMode() because n_fullsort_remaining is > 0 (" INT64_FORMAT ")\n",

561 node->n_fullsort_remaining);

562 switchToPresortedPrefixMode(pstate);

563 }

564 else

565 {

566 /*

567 * If we don't have any sorted tuples to read and we're not

568 * currently transitioning into presorted prefix sort mode, then

569 * it's time to start the process all over again by building a new

570 * group in the full sort state.

571 */

572 SO_printf("Setting execution_status to INCSORT_LOADFULLSORT (n_fullsort_remaining > 0)\n");

573 node->execution_status = INCSORT_LOADFULLSORT;

574 }

575 }

576

577 /*

578 * Scan the subplan in the forward direction while creating the sorted

579 * data.

580 */

581 estate->es_direction = ForwardScanDirection;

582

583 outerNode = outerPlanState(node);

584 tupDesc = ExecGetResultType(outerNode);

585

586 /* Load tuples into the full sort state. */

587 if (node->execution_status == INCSORT_LOADFULLSORT)

588 {

589 /*

590 * Initialize sorting structures.

591 */

592 if (fullsort_state == NULL)

593 {

594 /*

595 * Initialize presorted column support structures for

596 * isCurrentGroup(). It's correct to do this along with the

597 * initial initialization for the full sort state (and not for the

598 * prefix sort state) since we always load the full sort state

599 * first.

600 */

601 preparePresortedCols(node);

602

603 /*

604 * Since we optimize small prefix key groups by accumulating a

605 * minimum number of tuples before sorting, we can't assume that a

606 * group of tuples all have the same prefix key values. Hence we

607 * setup the full sort tuplesort to sort by all requested sort

608 * keys.

609 */

610 fullsort_state = tuplesort_begin_heap(tupDesc,

611 plannode->sort.numCols,

612 plannode->sort.sortColIdx,

613 plannode->sort.sortOperators,

614 plannode->sort.collations,

615 plannode->sort.nullsFirst,

616 work_mem,

617 NULL,

618 node->bounded ?

619 TUPLESORT_ALLOWBOUNDED :

620 TUPLESORT_NONE);

621 node->fullsort_state = fullsort_state;

622 }

623 else

624 {

625 /* Reset sort for the next batch. */

626 tuplesort_reset(fullsort_state);

627 }

628

629 /*

630 * Calculate the remaining tuples left if bounded and configure both

631 * bounded sort and the minimum group size accordingly.

632 */

633 if (node->bounded)

634 {

635 int64 currentBound = node->bound - node->bound_Done;

636

637 /*

638 * Bounded sort isn't likely to be a useful optimization for full

639 * sort mode since we limit full sort mode to a relatively small

640 * number of tuples and tuplesort doesn't switch over to top-n

641 * heap sort anyway unless it hits (2 * bound) tuples.

642 */

643 if (currentBound < DEFAULT_MIN_GROUP_SIZE)

644 tuplesort_set_bound(fullsort_state, currentBound);

645

646 minGroupSize = Min(DEFAULT_MIN_GROUP_SIZE, currentBound);

647 }

648 else

649 minGroupSize = DEFAULT_MIN_GROUP_SIZE;

650

651 /*

652 * Because we have to read the next tuple to find out that we've

653 * encountered a new prefix key group, on subsequent groups we have to

654 * carry over that extra tuple and add it to the new group's sort here

655 * before we read any new tuples from the outer node.

656 */

657 if (!TupIsNull(node->group_pivot))

658 {

659 tuplesort_puttupleslot(fullsort_state, node->group_pivot);

660 nTuples++;

661

662 /*

663 * We're in full sort mode accumulating a minimum number of tuples

664 * and not checking for prefix key equality yet, so we can't

665 * assume the group pivot tuple will remain the same -- unless

666 * we're using a minimum group size of 1, in which case the pivot

667 * is obviously still the pivot.

668 */

669 if (nTuples != minGroupSize)

670 ExecClearTuple(node->group_pivot);

671 }

672

673

674 /*

675 * Pull as many tuples from the outer node as possible given our

676 * current operating mode.

677 */

678 for (;;)

679 {

680 slot = ExecProcNode(outerNode);

681

682 /*

683 * If the outer node can't provide us any more tuples, then we can

684 * sort the current group and return those tuples.

685 */

686 if (TupIsNull(slot))

687 {

688 /*

689 * We need to know later if the outer node has completed to be

690 * able to distinguish between being done with a batch and

691 * being done with the whole node.

692 */

693 node->outerNodeDone = true;

694

695 SO1_printf("Sorting fullsort with " INT64_FORMAT " tuples\n", nTuples);

696 tuplesort_performsort(fullsort_state);

697

698 INSTRUMENT_SORT_GROUP(node, fullsort);

699

700 SO_printf("Setting execution_status to INCSORT_READFULLSORT (final tuple)\n");

701 node->execution_status = INCSORT_READFULLSORT;

702 break;

703 }

704

705 /* Accumulate the next group of presorted tuples. */

706 if (nTuples < minGroupSize)

707 {

708 /*

709 * If we haven't yet hit our target minimum group size, then

710 * we don't need to bother checking for inclusion in the

711 * current prefix group since at this point we'll assume that

712 * we'll full sort this batch to avoid a large number of very

713 * tiny (and thus inefficient) sorts.

714 */

715 tuplesort_puttupleslot(fullsort_state, slot);

716 nTuples++;

717

718 /*

719 * If we've reached our minimum group size, then we need to

720 * store the most recent tuple as a pivot.

721 */

722 if (nTuples == minGroupSize)

723 ExecCopySlot(node->group_pivot, slot);

724 }

725 else

726 {

727 /*

728 * If we've already accumulated enough tuples to reach our

729 * minimum group size, then we need to compare any additional

730 * tuples to our pivot tuple to see if we reach the end of

731 * that prefix key group. Only after we find changed prefix

732 * keys can we guarantee sort stability of the tuples we've

733 * already accumulated.

734 */

735 if (isCurrentGroup(node, node->group_pivot, slot))

736 {

737 /*

738 * As long as the prefix keys match the pivot tuple then

739 * load the tuple into the tuplesort.

740 */

741 tuplesort_puttupleslot(fullsort_state, slot);

742 nTuples++;

743 }

744 else

745 {

746 /*

747 * Since the tuple we fetched isn't part of the current

748 * prefix key group we don't want to sort it as part of

749 * the current batch. Instead we use the group_pivot slot

750 * to carry it over to the next batch (even though we

751 * won't actually treat it as a group pivot).

752 */

753 ExecCopySlot(node->group_pivot, slot);

754

755 if (node->bounded)

756 {

757 /*

758 * If the current node has a bound, and we've already

759 * sorted n tuples, then the functional bound

760 * remaining is (original bound - n), so store the

761 * current number of processed tuples for later use

762 * configuring the sort state's bound.

763 */

764 SO2_printf("Changing bound_Done from " INT64_FORMAT " to " INT64_FORMAT "\n",

765 node->bound_Done,

766 Min(node->bound, node->bound_Done + nTuples));

767 node->bound_Done = Min(node->bound, node->bound_Done + nTuples);

768 }

769

770 /*

771 * Once we find changed prefix keys we can complete the

772 * sort and transition modes to reading out the sorted

773 * tuples.

774 */

775 SO1_printf("Sorting fullsort tuplesort with " INT64_FORMAT " tuples\n",

776 nTuples);

777 tuplesort_performsort(fullsort_state);

778

779 INSTRUMENT_SORT_GROUP(node, fullsort);

780

781 SO_printf("Setting execution_status to INCSORT_READFULLSORT (found end of group)\n");

782 node->execution_status = INCSORT_READFULLSORT;

783 break;

784 }

785 }

786

787 /*

788 * Unless we've already transitioned modes to reading from the

789 * full sort state, then we assume that having read at least

790 * DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples means it's likely we're

791 * processing a large group of tuples all having equal prefix keys

792 * (but haven't yet found the final tuple in that prefix key

793 * group), so we need to transition into presorted prefix mode.

794 */

795 if (nTuples > DEFAULT_MAX_FULL_SORT_GROUP_SIZE &&

796 node->execution_status != INCSORT_READFULLSORT)

797 {

798 /*

799 * The group pivot we have stored has already been put into

800 * the tuplesort; we don't want to carry it over. Since we

801 * haven't yet found the end of the prefix key group, it might

802 * seem like we should keep this, but we don't actually know

803 * how many prefix key groups might be represented in the full

804 * sort state, so we'll let the mode transition function

805 * manage this state for us.

806 */

807 ExecClearTuple(node->group_pivot);

808

809 /*

810 * Unfortunately the tuplesort API doesn't include a way to

811 * retrieve tuples unless a sort has been performed, so we

812 * perform the sort even though we could just as easily rely

813 * on FIFO retrieval semantics when transferring them to the

814 * presorted prefix tuplesort.

815 */

816 SO1_printf("Sorting fullsort tuplesort with " INT64_FORMAT " tuples\n", nTuples);

817 tuplesort_performsort(fullsort_state);

818

819 INSTRUMENT_SORT_GROUP(node, fullsort);

820

821 /*

822 * If the full sort tuplesort happened to switch into top-n

823 * heapsort mode then we will only be able to retrieve

824 * currentBound tuples (since the tuplesort will have only

825 * retained the top-n tuples). This is safe even though we

826 * haven't yet completed fetching the current prefix key group

827 * because the tuples we've "lost" already sorted "below" the

828 * retained ones, and we're already contractually guaranteed

829 * to not need any more than the currentBound tuples.

830 */

831 if (tuplesort_used_bound(node->fullsort_state))

832 {

833 int64 currentBound = node->bound - node->bound_Done;

834

835 SO2_printf("Read " INT64_FORMAT " tuples, but setting to " INT64_FORMAT " because we used bounded sort\n",

836 nTuples, Min(currentBound, nTuples));

837 nTuples = Min(currentBound, nTuples);

838 }

839

840 SO1_printf("Setting n_fullsort_remaining to " INT64_FORMAT " and calling switchToPresortedPrefixMode()\n",

841 nTuples);

842

843 /*

844 * We might have multiple prefix key groups in the full sort

845 * state, so the mode transition function needs to know that

846 * it needs to move from the fullsort to presorted prefix

847 * sort.

848 */

849 node->n_fullsort_remaining = nTuples;

850

851 /* Transition the tuples to the presorted prefix tuplesort. */

852 switchToPresortedPrefixMode(pstate);

853

854 /*

855 * Since we know we had tuples to move to the presorted prefix

856 * tuplesort, we know that unless that transition has verified

857 * that all tuples belonged to the same prefix key group (in

858 * which case we can go straight to continuing to load tuples

859 * into that tuplesort), we should have a tuple to return

860 * here.

861 *

862 * Either way, the appropriate execution status should have

863 * been set by switchToPresortedPrefixMode(), so we can drop

864 * out of the loop here and let the appropriate path kick in.

865 */

866 break;

867 }

868 }

869 }

870

871 if (node->execution_status == INCSORT_LOADPREFIXSORT)

872 {

873 /*

874 * We only enter this state after the mode transition function has

875 * confirmed all remaining tuples from the full sort state have the

876 * same prefix and moved those tuples to the prefix sort state. That

877 * function has also set a group pivot tuple (which doesn't need to be

878 * carried over; it's already been put into the prefix sort state).

879 */

880 Assert(!TupIsNull(node->group_pivot));

881

882 /*

883 * Read tuples from the outer node and load them into the prefix sort

884 * state until we encounter a tuple whose prefix keys don't match the

885 * current group_pivot tuple, since we can't guarantee sort stability

886 * until we have all tuples matching those prefix keys.

887 */

888 for (;;)

889 {

890 slot = ExecProcNode(outerNode);

891

892 /*

893 * If we've exhausted tuples from the outer node we're done

894 * loading the prefix sort state.

895 */

896 if (TupIsNull(slot))

897 {

898 /*

899 * We need to know later if the outer node has completed to be

900 * able to distinguish between being done with a batch and

901 * being done with the whole node.

902 */

903 node->outerNodeDone = true;

904 break;

905 }

906

907 /*

908 * If the tuple's prefix keys match our pivot tuple, we're not

909 * done yet and can load it into the prefix sort state. If not, we

910 * don't want to sort it as part of the current batch. Instead we

911 * use the group_pivot slot to carry it over to the next batch

912 * (even though we won't actually treat it as a group pivot).

913 */

914 if (isCurrentGroup(node, node->group_pivot, slot))

915 {

916 tuplesort_puttupleslot(node->prefixsort_state, slot);

917 nTuples++;

918 }

919 else

920 {

921 ExecCopySlot(node->group_pivot, slot);

922 break;

923 }

924 }

925

926 /*

927 * Perform the sort and begin returning the tuples to the parent plan

928 * node.

929 */

930 SO1_printf("Sorting presorted prefix tuplesort with " INT64_FORMAT " tuples\n", nTuples);

931 tuplesort_performsort(node->prefixsort_state);

932

933 INSTRUMENT_SORT_GROUP(node, prefixsort);

934

935 SO_printf("Setting execution_status to INCSORT_READPREFIXSORT (found end of group)\n");

936 node->execution_status = INCSORT_READPREFIXSORT;

937

938 if (node->bounded)

939 {

940 /*

941 * If the current node has a bound, and we've already sorted n

942 * tuples, then the functional bound remaining is (original bound

943 * - n), so store the current number of processed tuples for use

944 * in configuring sorting bound.

945 */

946 SO2_printf("Changing bound_Done from " INT64_FORMAT " to " INT64_FORMAT "\n",

947 node->bound_Done,

948 Min(node->bound, node->bound_Done + nTuples));

949 node->bound_Done = Min(node->bound, node->bound_Done + nTuples);

950 }

951 }

952

953 /* Restore to user specified direction. */

954 estate->es_direction = dir;

955

956 /*

957 * Get the first or next tuple from tuplesort. Returns NULL if no more

958 * tuples.

959 */

960 read_sortstate = node->execution_status == INCSORT_READFULLSORT ?

961 fullsort_state : node->prefixsort_state;

962 slot = node->ss.ps.ps_ResultTupleSlot;

963 (void) tuplesort_gettupleslot(read_sortstate, ScanDirectionIsForward(dir),

964 false, slot, NULL);

965 return slot;

966}

967

968/* ----------------------------------------------------------------

969 * ExecInitIncrementalSort

970 *

971 * Creates the run-time state information for the sort node

972 * produced by the planner and initializes its outer subtree.

973 * ----------------------------------------------------------------

974 */

975IncrementalSortState *

976 ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)

977{

978 IncrementalSortState *incrsortstate;

979

980 SO_printf("ExecInitIncrementalSort: initializing sort node\n");

981

982 /*

983 * Incremental sort can't be used with EXEC_FLAG_BACKWARD or

984 * EXEC_FLAG_MARK, because the current sort state contains only one sort

985 * batch rather than the full result set.

986 */

987 Assert((eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) == 0);

988

989 /* Initialize state structure. */

990 incrsortstate = makeNode(IncrementalSortState);

991 incrsortstate->ss.ps.plan = (Plan *) node;

992 incrsortstate->ss.ps.state = estate;

993 incrsortstate->ss.ps.ExecProcNode = ExecIncrementalSort;

994

995 incrsortstate->execution_status = INCSORT_LOADFULLSORT;

996 incrsortstate->bounded = false;

997 incrsortstate->outerNodeDone = false;

998 incrsortstate->bound_Done = 0;

999 incrsortstate->fullsort_state = NULL;

1000 incrsortstate->prefixsort_state = NULL;

1001 incrsortstate->group_pivot = NULL;

1002 incrsortstate->transfer_tuple = NULL;

1003 incrsortstate->n_fullsort_remaining = 0;

1004 incrsortstate->presorted_keys = NULL;

1005

1006 if (incrsortstate->ss.ps.instrument != NULL)

1007 {

1008 IncrementalSortGroupInfo *fullsortGroupInfo =

1009 &incrsortstate->incsort_info.fullsortGroupInfo;

1010 IncrementalSortGroupInfo *prefixsortGroupInfo =

1011 &incrsortstate->incsort_info.prefixsortGroupInfo;

1012

1013 fullsortGroupInfo->groupCount = 0;

1014 fullsortGroupInfo->maxDiskSpaceUsed = 0;

1015 fullsortGroupInfo->totalDiskSpaceUsed = 0;

1016 fullsortGroupInfo->maxMemorySpaceUsed = 0;

1017 fullsortGroupInfo->totalMemorySpaceUsed = 0;

1018 fullsortGroupInfo->sortMethods = 0;

1019 prefixsortGroupInfo->groupCount = 0;

1020 prefixsortGroupInfo->maxDiskSpaceUsed = 0;

1021 prefixsortGroupInfo->totalDiskSpaceUsed = 0;

1022 prefixsortGroupInfo->maxMemorySpaceUsed = 0;

1023 prefixsortGroupInfo->totalMemorySpaceUsed = 0;

1024 prefixsortGroupInfo->sortMethods = 0;

1025 }

1026

1027 /*

1028 * Miscellaneous initialization

1029 *

1030 * Sort nodes don't initialize their ExprContexts because they never call

1031 * ExecQual or ExecProject.

1032 */

1033

1034 /*

1035 * Initialize child nodes.

1036 *

1037 * Incremental sort does not support backwards scans and mark/restore, so

1038 * we don't bother removing the flags from eflags here. We allow passing a

1039 * REWIND flag, because although incremental sort can't use it, the child

1040 * nodes may be able to do something more useful.

1041 */

1042 outerPlanState(incrsortstate) = ExecInitNode(outerPlan(node), estate, eflags);

1043

1044 /*

1045 * Initialize scan slot and type.

1046 */

1047 ExecCreateScanSlotFromOuterPlan(estate, &incrsortstate->ss, &TTSOpsMinimalTuple);

1048

1049 /*

1050 * Initialize return slot and type. No need to initialize projection info

1051 * because we don't do any projections.

1052 */

1053 ExecInitResultTupleSlotTL(&incrsortstate->ss.ps, &TTSOpsMinimalTuple);

1054 incrsortstate->ss.ps.ps_ProjInfo = NULL;

1055

1056 /*

1057 * Initialize standalone slots to store a tuple for pivot prefix keys and

1058 * for carrying over a tuple from one batch to the next.

1059 */

1060 incrsortstate->group_pivot =

1061 MakeSingleTupleTableSlot(ExecGetResultType(outerPlanState(incrsortstate)),

1062 &TTSOpsMinimalTuple);

1063 incrsortstate->transfer_tuple =

1064 MakeSingleTupleTableSlot(ExecGetResultType(outerPlanState(incrsortstate)),

1065 &TTSOpsMinimalTuple);

1066

1067 SO_printf("ExecInitIncrementalSort: sort node initialized\n");

1068

1069 return incrsortstate;

1070}

1071

1072/* ----------------------------------------------------------------

1073 * ExecEndIncrementalSort(node)

1074 * ----------------------------------------------------------------

1075 */

1076void

1077 ExecEndIncrementalSort(IncrementalSortState *node)

1078{

1079 SO_printf("ExecEndIncrementalSort: shutting down sort node\n");

1080

1081 ExecDropSingleTupleTableSlot(node->group_pivot);

1082 ExecDropSingleTupleTableSlot(node->transfer_tuple);

1083

1084 /*

1085 * Release tuplesort resources.

1086 */

1087 if (node->fullsort_state != NULL)

1088 {

1089 tuplesort_end(node->fullsort_state);

1090 node->fullsort_state = NULL;

1091 }

1092 if (node->prefixsort_state != NULL)

1093 {

1094 tuplesort_end(node->prefixsort_state);

1095 node->prefixsort_state = NULL;

1096 }

1097

1098 /*

1099 * Shut down the subplan.

1100 */

1101 ExecEndNode(outerPlanState(node));

1102

1103 SO_printf("ExecEndIncrementalSort: sort node shutdown\n");

1104}

1105

1106void

1107 ExecReScanIncrementalSort(IncrementalSortState *node)

1108{

1109 PlanState *outerPlan = outerPlanState(node);

1110

1111 /*

1112 * Incremental sort doesn't support efficient rescan even when parameters

1113 * haven't changed (e.g., rewind) because unlike regular sort we don't

1114 * store all tuples at once for the full sort.

1115 *

1116 * So even if EXEC_FLAG_REWIND is set we just reset all of our state and

1117 * re-execute the sort along with the child node. Incremental sort itself

1118 * can't do anything smarter, but maybe the child nodes can.

1119 *

1120 * In theory if we've only filled the full sort with one batch (and

1121 * haven't reset it for a new batch yet) then we could efficiently rewind,

1122 * but that seems a narrow enough case that it's not worth handling

1123 * specially at this time.

1124 */

1125

1126 /* must drop pointer to sort result tuple */

1127 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);

1128

1129 if (node->group_pivot != NULL)

1130 ExecClearTuple(node->group_pivot);

1131 if (node->transfer_tuple != NULL)

1132 ExecClearTuple(node->transfer_tuple);

1133

1134 node->outerNodeDone = false;

1135 node->n_fullsort_remaining = 0;

1136 node->bound_Done = 0;

1137

1138 node->execution_status = INCSORT_LOADFULLSORT;

1139

1140 /*

1141 * If we've set up either of the sort states yet, we need to reset them.

1142 * We could end them and null out the pointers, but there's no reason to

1143 * repay the setup cost, and because ExecIncrementalSort guards presorted

1144 * column functions by checking to see if the full sort state has been

1145 * initialized yet, setting the sort states to null here might actually

1146 * cause a leak.

1147 */

1148 if (node->fullsort_state != NULL)

1149 tuplesort_reset(node->fullsort_state);

1150 if (node->prefixsort_state != NULL)

1151 tuplesort_reset(node->prefixsort_state);

1152

1153 /*

1154 * If chgParam of subnode is not null, then the plan will be re-scanned by

1155 * the first ExecProcNode.

1156 */

1157 if (outerPlan->chgParam == NULL)

1158 ExecReScan(outerPlan);

1159}

1160

1161/* ----------------------------------------------------------------

1162 * Parallel Query Support

1163 * ----------------------------------------------------------------

1164 */

1165

1166/* ----------------------------------------------------------------

1167 * ExecSortEstimate

1168 *

1169 * Estimate space required to propagate sort statistics.

1170 * ----------------------------------------------------------------

1171 */

1172void

1173 ExecIncrementalSortEstimate(IncrementalSortState *node, ParallelContext *pcxt)

1174{

1175 Size size;

1176

1177 /* don't need this if not instrumenting or no workers */

1178 if (!node->ss.ps.instrument || pcxt->nworkers == 0)

1179 return;

1180

1181 size = mul_size(pcxt->nworkers, sizeof(IncrementalSortInfo));

1182 size = add_size(size, offsetof(SharedIncrementalSortInfo, sinfo));

1183 shm_toc_estimate_chunk(&pcxt->estimator, size);

1184 shm_toc_estimate_keys(&pcxt->estimator, 1);

1185}

1186

1187/* ----------------------------------------------------------------

1188 * ExecSortInitializeDSM

1189 *

1190 * Initialize DSM space for sort statistics.

1191 * ----------------------------------------------------------------

1192 */

1193void

1194 ExecIncrementalSortInitializeDSM(IncrementalSortState *node, ParallelContext *pcxt)

1195{

1196 Size size;

1197

1198 /* don't need this if not instrumenting or no workers */

1199 if (!node->ss.ps.instrument || pcxt->nworkers == 0)

1200 return;

1201

1202 size = offsetof(SharedIncrementalSortInfo, sinfo)

1203 + pcxt->nworkers * sizeof(IncrementalSortInfo);

1204 node->shared_info = shm_toc_allocate(pcxt->toc, size);

1205 /* ensure any unfilled slots will contain zeroes */

1206 memset(node->shared_info, 0, size);

1207 node->shared_info->num_workers = pcxt->nworkers;

1208 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,

1209 node->shared_info);

1210}

1211

1212/* ----------------------------------------------------------------

1213 * ExecSortInitializeWorker

1214 *

1215 * Attach worker to DSM space for sort statistics.

1216 * ----------------------------------------------------------------

1217 */

1218void

1219 ExecIncrementalSortInitializeWorker(IncrementalSortState *node, ParallelWorkerContext *pwcxt)

1220{

1221 node->shared_info =

1222 shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);

1223 node->am_worker = true;

1224}

1225

1226/* ----------------------------------------------------------------

1227 * ExecSortRetrieveInstrumentation

1228 *

1229 * Transfer sort statistics from DSM to private memory.

1230 * ----------------------------------------------------------------

1231 */

1232void

1233 ExecIncrementalSortRetrieveInstrumentation(IncrementalSortState *node)

1234{

1235 Size size;

1236 SharedIncrementalSortInfo *si;

1237

1238 if (node->shared_info == NULL)

1239 return;

1240

1241 size = offsetof(SharedIncrementalSortInfo, sinfo)

1242 + node->shared_info->num_workers * sizeof(IncrementalSortInfo);

1243 si = palloc(size);

1244 memcpy(si, node->shared_info, size);

1245 node->shared_info = si;

1246}

AttrNumber

int16 AttrNumber

Definition: attnum.h:21

Min

#define Min(x, y)

Definition: c.h:1003

INT64_FORMAT

#define INT64_FORMAT

Definition: c.h:556

int64

int64_t int64

Definition: c.h:535

OidIsValid

#define OidIsValid(objectId)

Definition: c.h:774

Size

size_t Size

Definition: c.h:610

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ExecReScan

void ExecReScan(PlanState *node)

Definition: execAmi.c:77

ExecEndNode

void ExecEndNode(PlanState *node)

Definition: execProcnode.c:562

ExecInitNode

PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)

Definition: execProcnode.c:142

MakeSingleTupleTableSlot

TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)

Definition: execTuples.c:1427

ExecDropSingleTupleTableSlot

void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)

Definition: execTuples.c:1443

ExecInitResultTupleSlotTL

void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)

Definition: execTuples.c:1988

TTSOpsMinimalTuple

const TupleTableSlotOps TTSOpsMinimalTuple

Definition: execTuples.c:86

ExecGetResultType

TupleDesc ExecGetResultType(PlanState *planstate)

Definition: execUtils.c:495

ExecCreateScanSlotFromOuterPlan

void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)

Definition: execUtils.c:704

execdebug.h

SO_printf

#define SO_printf(s)

Definition: execdebug.h:92

SO2_printf

#define SO2_printf(s, p1, p2)

Definition: execdebug.h:94

SO1_printf

#define SO1_printf(s, p)

Definition: execdebug.h:93

outerPlanState

#define outerPlanState(node)

Definition: execnodes.h:1255

INCSORT_READFULLSORT

@ INCSORT_READFULLSORT

Definition: execnodes.h:2447

INCSORT_LOADPREFIXSORT

@ INCSORT_LOADPREFIXSORT

Definition: execnodes.h:2446

INCSORT_READPREFIXSORT

@ INCSORT_READPREFIXSORT

Definition: execnodes.h:2448

INCSORT_LOADFULLSORT

@ INCSORT_LOADFULLSORT

Definition: execnodes.h:2445

IncrementalSortInfo

struct IncrementalSortInfo IncrementalSortInfo

EXEC_FLAG_BACKWARD

#define EXEC_FLAG_BACKWARD

Definition: executor.h:69

ExecProcNode

static TupleTableSlot * ExecProcNode(PlanState *node)

Definition: executor.h:311

EXEC_FLAG_MARK

#define EXEC_FLAG_MARK

Definition: executor.h:70

fmgr_info_cxt

void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)

Definition: fmgr.c:138

SizeForFunctionCallInfo

#define SizeForFunctionCallInfo(nargs)

Definition: fmgr.h:102

InitFunctionCallInfoData

#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)

Definition: fmgr.h:150

FunctionCallInvoke

#define FunctionCallInvoke(fcinfo)

Definition: fmgr.h:172

work_mem

int work_mem

Definition: globals.c:131

Assert

Assert(PointerIsAligned(start, uint64))

i

int i

Definition: isn.c:77

get_equality_op_for_ordering_op

Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse)

Definition: lsyscache.c:331

get_opcode

RegProcedure get_opcode(Oid opno)

Definition: lsyscache.c:1452

lsyscache.h

palloc0

void * palloc0(Size size)

Definition: mcxt.c:1395

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

CurrentMemoryContext

MemoryContext CurrentMemoryContext

Definition: mcxt.c:160

miscadmin.h

CHECK_FOR_INTERRUPTS

#define CHECK_FOR_INTERRUPTS()

Definition: miscadmin.h:122

sort-test.key

key

Definition: sort-test.py:19

ExecIncrementalSortEstimate

void ExecIncrementalSortEstimate(IncrementalSortState *node, ParallelContext *pcxt)

Definition: nodeIncrementalSort.c:1173

switchToPresortedPrefixMode

static void switchToPresortedPrefixMode(PlanState *pstate)

Definition: nodeIncrementalSort.c:286

instrumentSortedGroup

static void instrumentSortedGroup(IncrementalSortGroupInfo *groupInfo, Tuplesortstate *sortState)

Definition: nodeIncrementalSort.c:127

ExecReScanIncrementalSort

void ExecReScanIncrementalSort(IncrementalSortState *node)

Definition: nodeIncrementalSort.c:1107

ExecIncrementalSortInitializeDSM

void ExecIncrementalSortInitializeDSM(IncrementalSortState *node, ParallelContext *pcxt)

Definition: nodeIncrementalSort.c:1194

ExecEndIncrementalSort

void ExecEndIncrementalSort(IncrementalSortState *node)

Definition: nodeIncrementalSort.c:1077

ExecIncrementalSort

static TupleTableSlot * ExecIncrementalSort(PlanState *pstate)

Definition: nodeIncrementalSort.c:495

ExecInitIncrementalSort

IncrementalSortState * ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)

Definition: nodeIncrementalSort.c:976

INSTRUMENT_SORT_GROUP

#define INSTRUMENT_SORT_GROUP(node, groupName)

Definition: nodeIncrementalSort.c:98

isCurrentGroup

static bool isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot *tuple)

Definition: nodeIncrementalSort.c:212

preparePresortedCols

static void preparePresortedCols(IncrementalSortState *node)

Definition: nodeIncrementalSort.c:164

ExecIncrementalSortRetrieveInstrumentation

void ExecIncrementalSortRetrieveInstrumentation(IncrementalSortState *node)

Definition: nodeIncrementalSort.c:1233

DEFAULT_MAX_FULL_SORT_GROUP_SIZE

#define DEFAULT_MAX_FULL_SORT_GROUP_SIZE

Definition: nodeIncrementalSort.c:479

ExecIncrementalSortInitializeWorker

void ExecIncrementalSortInitializeWorker(IncrementalSortState *node, ParallelWorkerContext *pwcxt)

Definition: nodeIncrementalSort.c:1219

DEFAULT_MIN_GROUP_SIZE

#define DEFAULT_MIN_GROUP_SIZE

Definition: nodeIncrementalSort.c:467

nodeIncrementalSort.h

makeNode

#define makeNode(_type_)

Definition: nodes.h:161

castNode

#define castNode(_type_, nodeptr)

Definition: nodes.h:182

outerPlan

#define outerPlan(node)

Definition: plannodes.h:252

postgres.h

DatumGetBool

static bool DatumGetBool(Datum X)

Definition: postgres.h:100

Datum

uint64_t Datum

Definition: postgres.h:70

Oid

unsigned int Oid

Definition: postgres_ext.h:32

ScanDirectionIsForward

#define ScanDirectionIsForward(direction)

Definition: sdir.h:64

ScanDirection

Definition: sdir.h:25

ForwardScanDirection

@ ForwardScanDirection

Definition: sdir.h:28

shm_toc_allocate

void * shm_toc_allocate(shm_toc *toc, Size nbytes)

Definition: shm_toc.c:88

shm_toc_insert

void shm_toc_insert(shm_toc *toc, uint64 key, void *address)

Definition: shm_toc.c:171

shm_toc_lookup

void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)

Definition: shm_toc.c:232

shm_toc_estimate_chunk

#define shm_toc_estimate_chunk(e, sz)

Definition: shm_toc.h:51

shm_toc_estimate_keys

#define shm_toc_estimate_keys(e, cnt)

Definition: shm_toc.h:53

add_size

Size add_size(Size s1, Size s2)

Definition: shmem.c:493

mul_size

Size mul_size(Size s1, Size s2)

Definition: shmem.c:510

EState

Definition: execnodes.h:655

EState::es_direction

ScanDirection es_direction

Definition: execnodes.h:659

IncrementalSortGroupInfo

Definition: execnodes.h:2414

IncrementalSortGroupInfo::groupCount

int64 groupCount

Definition: execnodes.h:2415

IncrementalSortGroupInfo::totalMemorySpaceUsed

int64 totalMemorySpaceUsed

Definition: execnodes.h:2419

IncrementalSortGroupInfo::totalDiskSpaceUsed

int64 totalDiskSpaceUsed

Definition: execnodes.h:2417

IncrementalSortGroupInfo::maxDiskSpaceUsed

int64 maxDiskSpaceUsed

Definition: execnodes.h:2416

IncrementalSortGroupInfo::maxMemorySpaceUsed

int64 maxMemorySpaceUsed

Definition: execnodes.h:2418

IncrementalSortGroupInfo::sortMethods

bits32 sortMethods

Definition: execnodes.h:2420

IncrementalSortInfo

Definition: execnodes.h:2424

IncrementalSortInfo::prefixsortGroupInfo

IncrementalSortGroupInfo prefixsortGroupInfo

Definition: execnodes.h:2426

IncrementalSortInfo::fullsortGroupInfo

IncrementalSortGroupInfo fullsortGroupInfo

Definition: execnodes.h:2425

IncrementalSortState

Definition: execnodes.h:2452

IncrementalSortState::prefixsort_state

Tuplesortstate * prefixsort_state

Definition: execnodes.h:2461

IncrementalSortState::group_pivot

TupleTableSlot * group_pivot

Definition: execnodes.h:2468

IncrementalSortState::am_worker

bool am_worker

Definition: execnodes.h:2470

IncrementalSortState::transfer_tuple

TupleTableSlot * transfer_tuple

Definition: execnodes.h:2469

IncrementalSortState::ss

ScanState ss

Definition: execnodes.h:2453

IncrementalSortState::outerNodeDone

bool outerNodeDone

Definition: execnodes.h:2456

IncrementalSortState::bound_Done

int64 bound_Done

Definition: execnodes.h:2457

IncrementalSortState::fullsort_state

Tuplesortstate * fullsort_state

Definition: execnodes.h:2460

IncrementalSortState::shared_info

SharedIncrementalSortInfo * shared_info

Definition: execnodes.h:2471

IncrementalSortState::bound

int64 bound

Definition: execnodes.h:2455

IncrementalSortState::execution_status

IncrementalSortExecutionStatus execution_status

Definition: execnodes.h:2458

IncrementalSortState::bounded

bool bounded

Definition: execnodes.h:2454

IncrementalSortState::presorted_keys

PresortedKeyData * presorted_keys

Definition: execnodes.h:2463

IncrementalSortState::incsort_info

IncrementalSortInfo incsort_info

Definition: execnodes.h:2465

IncrementalSortState::n_fullsort_remaining

int64 n_fullsort_remaining

Definition: execnodes.h:2459

IncrementalSort

Definition: plannodes.h:1140

IncrementalSort::nPresortedCols

int nPresortedCols

Definition: plannodes.h:1143

IncrementalSort::sort

Sort sort

Definition: plannodes.h:1141

ParallelContext

Definition: parallel.h:32

ParallelContext::estimator

shm_toc_estimator estimator

Definition: parallel.h:41

ParallelContext::toc

shm_toc * toc

Definition: parallel.h:44

ParallelContext::nworkers

int nworkers

Definition: parallel.h:35

ParallelWorkerContext

Definition: parallel.h:51

ParallelWorkerContext::toc

shm_toc * toc

Definition: parallel.h:53

PlanState

Definition: execnodes.h:1154

PlanState::instrument

Instrumentation * instrument

Definition: execnodes.h:1169

PlanState::plan

Plan * plan

Definition: execnodes.h:1159

PlanState::state

EState * state

Definition: execnodes.h:1161

PlanState::ps_ResultTupleSlot

TupleTableSlot * ps_ResultTupleSlot

Definition: execnodes.h:1197

PlanState::ps_ProjInfo

ProjectionInfo * ps_ProjInfo

Definition: execnodes.h:1199

PlanState::ExecProcNode

ExecProcNodeMtd ExecProcNode

Definition: execnodes.h:1165

Plan

Definition: plannodes.h:177

Plan::plan_node_id

int plan_node_id

Definition: plannodes.h:218

PresortedKeyData

Definition: execnodes.h:2374

PresortedKeyData::attno

OffsetNumber attno

Definition: execnodes.h:2377

ScanState::ps

PlanState ps

Definition: execnodes.h:1615

SharedIncrementalSortInfo

Definition: execnodes.h:2434

SharedIncrementalSortInfo::num_workers

int num_workers

Definition: execnodes.h:2435

Sort::numCols

int numCols

Definition: plannodes.h:1120

TupleDescData

Definition: tupdesc.h:136

TupleTableSlot

Definition: tuptable.h:114

TuplesortInstrumentation

Definition: tuplesort.h:112

TuplesortInstrumentation::sortMethod

TuplesortMethod sortMethod

Definition: tuplesort.h:113

TuplesortInstrumentation::spaceType

TuplesortSpaceType spaceType

Definition: tuplesort.h:114

TuplesortInstrumentation::spaceUsed

int64 spaceUsed

Definition: tuplesort.h:115

Tuplesortstate

Definition: tuplesort.c:186

tuplesort_performsort

void tuplesort_performsort(Tuplesortstate *state)

Definition: tuplesort.c:1359

tuplesort_reset

void tuplesort_reset(Tuplesortstate *state)

Definition: tuplesort.c:1015

tuplesort_used_bound

bool tuplesort_used_bound(Tuplesortstate *state)

Definition: tuplesort.c:882

tuplesort_get_stats

void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)

Definition: tuplesort.c:2495

tuplesort_end

void tuplesort_end(Tuplesortstate *state)

Definition: tuplesort.c:947

tuplesort_set_bound

void tuplesort_set_bound(Tuplesortstate *state, int64 bound)

Definition: tuplesort.c:834

tuplesort.h

TUPLESORT_NONE

#define TUPLESORT_NONE

Definition: tuplesort.h:94

TUPLESORT_ALLOWBOUNDED

#define TUPLESORT_ALLOWBOUNDED

Definition: tuplesort.h:100

SORT_SPACE_TYPE_DISK

@ SORT_SPACE_TYPE_DISK

Definition: tuplesort.h:89

SORT_SPACE_TYPE_MEMORY

@ SORT_SPACE_TYPE_MEMORY

Definition: tuplesort.h:90

tuplesort_puttupleslot

void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)

Definition: tuplesortvariants.c:735

tuplesort_begin_heap

Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, int sortopt)

Definition: tuplesortvariants.c:179

tuplesort_gettupleslot

bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)

Definition: tuplesortvariants.c:987

slot_getattr

static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)

Definition: tuptable.h:398

ExecClearTuple

static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)

Definition: tuptable.h:457

TupIsNull

#define TupIsNull(slot)

Definition: tuptable.h:309

ExecCopySlot

static TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)

Definition: tuptable.h:524

PostgreSQL Source Code: src/backend/executor/nodeIncrementalSort.c Source File