1/*-------------------------------------------------------------------------
3 * nodeIncrementalSort.c
4 * Routines to handle incremental sorting of relations.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/executor/nodeIncrementalSort.c
14 * Incremental sort is an optimized variant of multikey sort for cases
15 * when the input is already sorted by a prefix of the sort keys. For
16 * example when a sort by (key1, key2 ... keyN) is requested, and the
17 * input is already sorted by (key1, key2 ... keyM), M < N, we can
18 * divide the input into groups where keys (key1, ... keyM) are equal,
19 * and only sort on the remaining columns.
21 * Consider the following example. We have input tuples consisting of
22 * two integers (X, Y) already presorted by X, while it's required to
23 * sort them by both X and Y. Let input tuples be following.
33 * An incremental sort algorithm would split the input into the following
34 * groups, which have equal X, and then sort them by Y individually:
37 * (2, 9) (2, 1) (2, 5)
40 * After sorting these groups and putting them altogether, we would get
41 * the following result which is sorted by X and Y, as requested:
51 * Incremental sort may be more efficient than plain sort, particularly
52 * on large datasets, as it reduces the amount of data to sort at once,
53 * making it more likely it fits into work_mem (eliminating the need to
54 * spill to disk). But the main advantage of incremental sort is that
55 * it can start producing rows early, before sorting the whole dataset,
56 * which is a significant benefit especially for queries with LIMIT.
58 * The algorithm we've implemented here is modified from the theoretical
59 * base described above by operating in two different modes:
60 * - Fetching a minimum number of tuples without checking prefix key
61 * group membership and sorting on all columns when safe.
62 * - Fetching all tuples for a single prefix key group and sorting on
63 * solely the unsorted columns.
64 * We always begin in the first mode, and employ a heuristic to switch
65 * into the second mode if we believe it's beneficial.
67 * Sorting incrementally can potentially use less memory, avoid fetching
68 * and sorting all tuples in the dataset, and begin returning tuples before
69 * the entire result set is available.
71 * The hybrid mode approach allows us to optimize for both very small
72 * groups (where the overhead of a new tuplesort is high) and very large
73 * groups (where we can lower cost by not having to sort on already sorted
74 * columns), albeit at some extra cost while switching between modes.
76 *-------------------------------------------------------------------------
88 * We need to store the instrumentation information in either local node's sort
89 * info or, for a parallel worker process, in the shared info (this avoids
90 * having to additionally memcpy the info from local memory to shared memory
91 * at each instrumentation call). This macro expands to choose the proper sort
92 * state and group info.
95 * - node: type IncrementalSortState *
96 * - groupName: the token fullsort or prefixsort
98 #define INSTRUMENT_SORT_GROUP(node, groupName) \
100 if ((node)->ss.ps.instrument != NULL) \
102 if ((node)->shared_info && (node)->am_worker) \
104 Assert(IsParallelWorker()); \
105 Assert(ParallelWorkerNumber <= (node)->shared_info->num_workers); \
106 instrumentSortedGroup(&(node)->shared_info->sinfo[ParallelWorkerNumber].groupName##GroupInfo, \
107 (node)->groupName##_state); \
111 instrumentSortedGroup(&(node)->incsort_info.groupName##GroupInfo, \
112 (node)->groupName##_state); \
118/* ----------------------------------------------------------------
119 * instrumentSortedGroup
121 * Because incremental sort processes (potentially many) sort batches, we need
122 * to capture tuplesort stats each time we finalize a sort state. This summary
123 * data is later used for EXPLAIN ANALYZE output.
124 * ----------------------------------------------------------------
136 /* Calculate total and maximum memory and disk space used. */
153 /* Track each sort method we've used. */
157/* ----------------------------------------------------------------
158 * preparePresortedCols
160 * Prepare information for presorted_keys comparisons.
161 * ----------------------------------------------------------------
172 /* Pre-cache comparison functions for each pre-sorted key. */
180 key->attno = plannode->
sort.sortColIdx[
i];
185 elog(
ERROR,
"missing equality operator for ordering operator %u",
186 plannode->
sort.sortOperators[
i]);
190 elog(
ERROR,
"missing function for operator %u", equalityOp);
192 /* Lookup the comparison function */
195 /* We can initialize the callinfo just once and re-use it */
198 plannode->
sort.collations[
i], NULL, NULL);
199 key->fcinfo->args[0].isnull =
false;
200 key->fcinfo->args[1].isnull =
false;
204/* ----------------------------------------------------------------
207 * Check whether a given tuple belongs to the current sort group by comparing
208 * the presorted column values to the pivot tuple of the current group.
209 * ----------------------------------------------------------------
219 * That the input is sorted by keys * (0, ... n) implies that the tail
220 * keys are more likely to change. Therefore we do our comparison starting
221 * from the last pre-sorted column to optimize for early detection of
222 * inequality and minimizing the number of function calls..
224 for (
int i = nPresortedCols - 1;
i >= 0;
i--)
237 /* Special case for NULL-vs-NULL, else use standard comparison */
238 if (isnullA || isnullB)
240 if (isnullA == isnullB)
248 key->fcinfo->args[0].value = datumA;
249 key->fcinfo->args[1].value = datumB;
251 /* just for paranoia's sake, we reset isnull each time */
252 key->fcinfo->isnull =
false;
256 /* Check for null result, since caller is clearly not expecting one */
257 if (
key->fcinfo->isnull)
258 elog(
ERROR,
"function %u returned NULL",
key->flinfo.fn_oid);
266/* ----------------------------------------------------------------
267 * switchToPresortedPrefixMode
269 * When we determine that we've likely encountered a large batch of tuples all
270 * having the same presorted prefix values, we want to optimize tuplesort by
271 * only sorting on unsorted suffix keys.
273 * The problem is that we've already accumulated several tuples in another
274 * tuplesort configured to sort by all columns (assuming that there may be
275 * more than one prefix key group). So to switch to presorted prefix mode we
276 * have to go back and look at all the tuples we've already accumulated to
277 * verify they're all part of the same prefix key group before sorting them
278 * solely by unsorted suffix keys.
280 * While it's likely that all tuples already fetched are all part of a single
281 * prefix group, we also have to handle the possibility that there is at least
282 * one different prefix key group before the large prefix key group.
283 * ----------------------------------------------------------------
299 /* Configure the prefix sort state the first time around. */
306 * Optimize the sort by assuming the prefix columns are all equal and
307 * thus we only need to sort by any remaining columns.
311 &(plannode->
sort.sortColIdx[nPresortedCols]),
312 &(plannode->
sort.sortOperators[nPresortedCols]),
313 &(plannode->
sort.collations[nPresortedCols]),
314 &(plannode->
sort.nullsFirst[nPresortedCols]),
322 /* Next group of presorted data */
327 * If the current node has a bound, then it's reasonably likely that a
328 * large prefix key group will benefit from bounded sort, so configure the
329 * tuplesort to allow for that optimization.
340 * Copy as many tuples as we can (i.e., in the same prefix key group) from
341 * the full sort state to the prefix sort state.
346 * When we encounter multiple prefix key groups inside the full sort
347 * tuplesort we have to carry over the last read tuple into the next
353 /* The carried over tuple is our new group pivot tuple. */
363 * If this is our first time through the loop, then we need to
364 * save the first tuple we get as our new group pivot.
376 * The tuple isn't part of the current batch so we need to
377 * carry it over into the next batch of tuples we transfer out
378 * of the full sort tuplesort into the presorted prefix
379 * tuplesort. We don't actually have to do anything special to
380 * save the tuple since we've already loaded it into the
381 * node->transfer_tuple slot, and, even though that slot
382 * points to memory inside the full sort tuplesort, we can't
383 * reset that tuplesort anyway until we've fully transferred
384 * out its tuples, so this reference is safe. We do need to
385 * reset the group pivot tuple though since we've finished the
386 * current prefix key group.
390 /* Break out of for-loop early */
397 * Track how many tuples remain in the full sort batch so that we know if
398 * we need to sort multiple prefix key groups before processing tuples
399 * remaining in the large single prefix key group we think we've
409 * We've found that all tuples remaining in the full sort batch are in
410 * the same prefix key group and moved all of those tuples into the
411 * presorted prefix tuplesort. We don't know that we've yet found the
412 * last tuple in the current prefix key group, so save our pivot
413 * comparison tuple and continue fetching tuples from the outer
414 * execution node to load into the presorted prefix tuplesort.
417 SO_printf(
"Setting execution_status to INCSORT_LOADPREFIXSORT (switchToPresortedPrefixMode)\n");
421 * Make sure we clear the transfer tuple slot so that next time we
422 * encounter a large prefix key group we don't incorrectly assume we
423 * have a tuple carried over from the previous group.
430 * We finished a group but didn't consume all of the tuples from the
431 * full sort state, so we'll sort this batch, let the outer node read
432 * out all of those tuples, and then come back around to find another
443 * If the current node has a bound and we've already sorted n
444 * tuples, then the functional bound remaining is (original bound
445 * - n), so store the current number of processed tuples for use
446 * in configuring sorting bound.
453 SO_printf(
"Setting execution_status to INCSORT_READPREFIXSORT (switchToPresortedPrefixMode)\n");
459 * Sorting many small groups with tuplesort is inefficient. In order to
460 * cope with this problem we don't start a new group until the current one
461 * contains at least DEFAULT_MIN_GROUP_SIZE tuples (unfortunately this also
462 * means we can't assume small groups of tuples all have the same prefix keys.)
463 * When we have a bound that's less than DEFAULT_MIN_GROUP_SIZE we start looking
464 * for the new group as soon as we've met our bound to avoid fetching more
465 * tuples than we absolutely have to fetch.
467 #define DEFAULT_MIN_GROUP_SIZE 32
470 * While we've optimized for small prefix key groups by not starting our prefix
471 * key comparisons until we've reached a minimum number of tuples, we don't want
472 * that optimization to cause us to lose out on the benefits of being able to
473 * assume a large group of tuples is fully presorted by its prefix keys.
474 * Therefore we use the DEFAULT_MAX_FULL_SORT_GROUP_SIZE cutoff as a heuristic
475 * for determining when we believe we've encountered a large group, and, if we
476 * get to that point without finding a new prefix key group we transition to
477 * presorted prefix key mode.
479 #define DEFAULT_MAX_FULL_SORT_GROUP_SIZE (2 * DEFAULT_MIN_GROUP_SIZE)
481/* ----------------------------------------------------------------
482 * ExecIncrementalSort
484 * Assuming that outer subtree returns tuple presorted by some prefix
485 * of target sort columns, performs incremental sort.
491 * -- the outer child is prepared to return the first tuple.
492 * ----------------------------------------------------------------
516 * If a previous iteration has sorted a batch, then we need to check to
517 * see if there are any remaining tuples in that batch that we can return
518 * before moving on to other execution states.
524 * Return next tuple from the current sorted group set if available.
531 * We have to populate the slot from the tuplesort before checking
532 * outerNodeDone because it will set the slot to NULL if no more
533 * tuples remain. If the tuplesort is empty, but we don't have any
534 * more tuples available for sort from the outer node, then
535 * outerNodeDone will have been set so we'll return that now-empty
536 * slot to the caller.
542 * Note: there isn't a good test case for the node->outerNodeDone
543 * check directly, but we need it for any plan where the outer
544 * node will fail when trying to fetch too many tuples.
550 * When we transition to presorted prefix mode, we might have
551 * accumulated at least one additional prefix key group in the
552 * full sort tuplesort. The first call to
553 * switchToPresortedPrefixMode() will have pulled the first one of
554 * those groups out, and we've returned those tuples to the parent
555 * node, but if at this point we still have tuples remaining in
556 * the full sort state (i.e., n_fullsort_remaining > 0), then we
557 * need to re-execute the prefix mode transition function to pull
558 * out the next prefix key group.
560 SO1_printf(
"Re-calling switchToPresortedPrefixMode() because n_fullsort_remaining is > 0 (" INT64_FORMAT ")\n",
567 * If we don't have any sorted tuples to read and we're not
568 * currently transitioning into presorted prefix sort mode, then
569 * it's time to start the process all over again by building a new
570 * group in the full sort state.
572 SO_printf(
"Setting execution_status to INCSORT_LOADFULLSORT (n_fullsort_remaining > 0)\n");
578 * Scan the subplan in the forward direction while creating the sorted
586 /* Load tuples into the full sort state. */
590 * Initialize sorting structures.
592 if (fullsort_state == NULL)
595 * Initialize presorted column support structures for
596 * isCurrentGroup(). It's correct to do this along with the
597 * initial initialization for the full sort state (and not for the
598 * prefix sort state) since we always load the full sort state
604 * Since we optimize small prefix key groups by accumulating a
605 * minimum number of tuples before sorting, we can't assume that a
606 * group of tuples all have the same prefix key values. Hence we
607 * setup the full sort tuplesort to sort by all requested sort
612 plannode->
sort.sortColIdx,
613 plannode->
sort.sortOperators,
614 plannode->
sort.collations,
615 plannode->
sort.nullsFirst,
625 /* Reset sort for the next batch. */
630 * Calculate the remaining tuples left if bounded and configure both
631 * bounded sort and the minimum group size accordingly.
638 * Bounded sort isn't likely to be a useful optimization for full
639 * sort mode since we limit full sort mode to a relatively small
640 * number of tuples and tuplesort doesn't switch over to top-n
641 * heap sort anyway unless it hits (2 * bound) tuples.
652 * Because we have to read the next tuple to find out that we've
653 * encountered a new prefix key group, on subsequent groups we have to
654 * carry over that extra tuple and add it to the new group's sort here
655 * before we read any new tuples from the outer node.
663 * We're in full sort mode accumulating a minimum number of tuples
664 * and not checking for prefix key equality yet, so we can't
665 * assume the group pivot tuple will remain the same -- unless
666 * we're using a minimum group size of 1, in which case the pivot
667 * is obviously still the pivot.
669 if (nTuples != minGroupSize)
675 * Pull as many tuples from the outer node as possible given our
676 * current operating mode.
683 * If the outer node can't provide us any more tuples, then we can
684 * sort the current group and return those tuples.
689 * We need to know later if the outer node has completed to be
690 * able to distinguish between being done with a batch and
691 * being done with the whole node.
700 SO_printf(
"Setting execution_status to INCSORT_READFULLSORT (final tuple)\n");
705 /* Accumulate the next group of presorted tuples. */
706 if (nTuples < minGroupSize)
709 * If we haven't yet hit our target minimum group size, then
710 * we don't need to bother checking for inclusion in the
711 * current prefix group since at this point we'll assume that
712 * we'll full sort this batch to avoid a large number of very
713 * tiny (and thus inefficient) sorts.
719 * If we've reached our minimum group size, then we need to
720 * store the most recent tuple as a pivot.
722 if (nTuples == minGroupSize)
728 * If we've already accumulated enough tuples to reach our
729 * minimum group size, then we need to compare any additional
730 * tuples to our pivot tuple to see if we reach the end of
731 * that prefix key group. Only after we find changed prefix
732 * keys can we guarantee sort stability of the tuples we've
733 * already accumulated.
738 * As long as the prefix keys match the pivot tuple then
739 * load the tuple into the tuplesort.
747 * Since the tuple we fetched isn't part of the current
748 * prefix key group we don't want to sort it as part of
749 * the current batch. Instead we use the group_pivot slot
750 * to carry it over to the next batch (even though we
751 * won't actually treat it as a group pivot).
758 * If the current node has a bound, and we've already
759 * sorted n tuples, then the functional bound
760 * remaining is (original bound - n), so store the
761 * current number of processed tuples for later use
762 * configuring the sort state's bound.
771 * Once we find changed prefix keys we can complete the
772 * sort and transition modes to reading out the sorted
781 SO_printf(
"Setting execution_status to INCSORT_READFULLSORT (found end of group)\n");
788 * Unless we've already transitioned modes to reading from the
789 * full sort state, then we assume that having read at least
790 * DEFAULT_MAX_FULL_SORT_GROUP_SIZE tuples means it's likely we're
791 * processing a large group of tuples all having equal prefix keys
792 * (but haven't yet found the final tuple in that prefix key
793 * group), so we need to transition into presorted prefix mode.
799 * The group pivot we have stored has already been put into
800 * the tuplesort; we don't want to carry it over. Since we
801 * haven't yet found the end of the prefix key group, it might
802 * seem like we should keep this, but we don't actually know
803 * how many prefix key groups might be represented in the full
804 * sort state, so we'll let the mode transition function
805 * manage this state for us.
810 * Unfortunately the tuplesort API doesn't include a way to
811 * retrieve tuples unless a sort has been performed, so we
812 * perform the sort even though we could just as easily rely
813 * on FIFO retrieval semantics when transferring them to the
814 * presorted prefix tuplesort.
822 * If the full sort tuplesort happened to switch into top-n
823 * heapsort mode then we will only be able to retrieve
824 * currentBound tuples (since the tuplesort will have only
825 * retained the top-n tuples). This is safe even though we
826 * haven't yet completed fetching the current prefix key group
827 * because the tuples we've "lost" already sorted "below" the
828 * retained ones, and we're already contractually guaranteed
829 * to not need any more than the currentBound tuples.
836 nTuples,
Min(currentBound, nTuples));
837 nTuples =
Min(currentBound, nTuples);
840 SO1_printf(
"Setting n_fullsort_remaining to " INT64_FORMAT " and calling switchToPresortedPrefixMode()\n",
844 * We might have multiple prefix key groups in the full sort
845 * state, so the mode transition function needs to know that
846 * it needs to move from the fullsort to presorted prefix
851 /* Transition the tuples to the presorted prefix tuplesort. */
855 * Since we know we had tuples to move to the presorted prefix
856 * tuplesort, we know that unless that transition has verified
857 * that all tuples belonged to the same prefix key group (in
858 * which case we can go straight to continuing to load tuples
859 * into that tuplesort), we should have a tuple to return
862 * Either way, the appropriate execution status should have
863 * been set by switchToPresortedPrefixMode(), so we can drop
864 * out of the loop here and let the appropriate path kick in.
874 * We only enter this state after the mode transition function has
875 * confirmed all remaining tuples from the full sort state have the
876 * same prefix and moved those tuples to the prefix sort state. That
877 * function has also set a group pivot tuple (which doesn't need to be
878 * carried over; it's already been put into the prefix sort state).
883 * Read tuples from the outer node and load them into the prefix sort
884 * state until we encounter a tuple whose prefix keys don't match the
885 * current group_pivot tuple, since we can't guarantee sort stability
886 * until we have all tuples matching those prefix keys.
893 * If we've exhausted tuples from the outer node we're done
894 * loading the prefix sort state.
899 * We need to know later if the outer node has completed to be
900 * able to distinguish between being done with a batch and
901 * being done with the whole node.
908 * If the tuple's prefix keys match our pivot tuple, we're not
909 * done yet and can load it into the prefix sort state. If not, we
910 * don't want to sort it as part of the current batch. Instead we
911 * use the group_pivot slot to carry it over to the next batch
912 * (even though we won't actually treat it as a group pivot).
927 * Perform the sort and begin returning the tuples to the parent plan
935 SO_printf(
"Setting execution_status to INCSORT_READPREFIXSORT (found end of group)\n");
941 * If the current node has a bound, and we've already sorted n
942 * tuples, then the functional bound remaining is (original bound
943 * - n), so store the current number of processed tuples for use
944 * in configuring sorting bound.
953 /* Restore to user specified direction. */
957 * Get the first or next tuple from tuplesort. Returns NULL if no more
968/* ----------------------------------------------------------------
969 * ExecInitIncrementalSort
971 * Creates the run-time state information for the sort node
972 * produced by the planner and initializes its outer subtree.
973 * ----------------------------------------------------------------
980 SO_printf(
"ExecInitIncrementalSort: initializing sort node\n");
983 * Incremental sort can't be used with EXEC_FLAG_BACKWARD or
984 * EXEC_FLAG_MARK, because the current sort state contains only one sort
985 * batch rather than the full result set.
989 /* Initialize state structure. */
996 incrsortstate->
bounded =
false;
1028 * Miscellaneous initialization
1030 * Sort nodes don't initialize their ExprContexts because they never call
1031 * ExecQual or ExecProject.
1035 * Initialize child nodes.
1037 * Incremental sort does not support backwards scans and mark/restore, so
1038 * we don't bother removing the flags from eflags here. We allow passing a
1039 * REWIND flag, because although incremental sort can't use it, the child
1040 * nodes may be able to do something more useful.
1045 * Initialize scan slot and type.
1050 * Initialize return slot and type. No need to initialize projection info
1051 * because we don't do any projections.
1057 * Initialize standalone slots to store a tuple for pivot prefix keys and
1058 * for carrying over a tuple from one batch to the next.
1067 SO_printf(
"ExecInitIncrementalSort: sort node initialized\n");
1069 return incrsortstate;
1072/* ----------------------------------------------------------------
1073 * ExecEndIncrementalSort(node)
1074 * ----------------------------------------------------------------
1079 SO_printf(
"ExecEndIncrementalSort: shutting down sort node\n");
1085 * Release tuplesort resources.
1099 * Shut down the subplan.
1103 SO_printf(
"ExecEndIncrementalSort: sort node shutdown\n");
1112 * Incremental sort doesn't support efficient rescan even when parameters
1113 * haven't changed (e.g., rewind) because unlike regular sort we don't
1114 * store all tuples at once for the full sort.
1116 * So even if EXEC_FLAG_REWIND is set we just reset all of our state and
1117 * re-execute the sort along with the child node. Incremental sort itself
1118 * can't do anything smarter, but maybe the child nodes can.
1120 * In theory if we've only filled the full sort with one batch (and
1121 * haven't reset it for a new batch yet) then we could efficiently rewind,
1122 * but that seems a narrow enough case that it's not worth handling
1123 * specially at this time.
1126 /* must drop pointer to sort result tuple */
1141 * If we've set up either of the sort states yet, we need to reset them.
1142 * We could end them and null out the pointers, but there's no reason to
1143 * repay the setup cost, and because ExecIncrementalSort guards presorted
1144 * column functions by checking to see if the full sort state has been
1145 * initialized yet, setting the sort states to null here might actually
1154 * If chgParam of subnode is not null, then the plan will be re-scanned by
1155 * the first ExecProcNode.
1161/* ----------------------------------------------------------------
1162 * Parallel Query Support
1163 * ----------------------------------------------------------------
1166/* ----------------------------------------------------------------
1169 * Estimate space required to propagate sort statistics.
1170 * ----------------------------------------------------------------
1177 /* don't need this if not instrumenting or no workers */
1187/* ----------------------------------------------------------------
1188 * ExecSortInitializeDSM
1190 * Initialize DSM space for sort statistics.
1191 * ----------------------------------------------------------------
1198 /* don't need this if not instrumenting or no workers */
1205 /* ensure any unfilled slots will contain zeroes */
1212/* ----------------------------------------------------------------
1213 * ExecSortInitializeWorker
1215 * Attach worker to DSM space for sort statistics.
1216 * ----------------------------------------------------------------
1226/* ----------------------------------------------------------------
1227 * ExecSortRetrieveInstrumentation
1229 * Transfer sort statistics from DSM to private memory.
1230 * ----------------------------------------------------------------
#define OidIsValid(objectId)
void ExecReScan(PlanState *node)
void ExecEndNode(PlanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsMinimalTuple
TupleDesc ExecGetResultType(PlanState *planstate)
void ExecCreateScanSlotFromOuterPlan(EState *estate, ScanState *scanstate, const TupleTableSlotOps *tts_ops)
#define SO2_printf(s, p1, p2)
#define outerPlanState(node)
struct IncrementalSortInfo IncrementalSortInfo
#define EXEC_FLAG_BACKWARD
static TupleTableSlot * ExecProcNode(PlanState *node)
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
#define SizeForFunctionCallInfo(nargs)
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
#define FunctionCallInvoke(fcinfo)
Assert(PointerIsAligned(start, uint64))
Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse)
RegProcedure get_opcode(Oid opno)
void * palloc0(Size size)
MemoryContext CurrentMemoryContext
#define CHECK_FOR_INTERRUPTS()
void ExecIncrementalSortEstimate(IncrementalSortState *node, ParallelContext *pcxt)
static void switchToPresortedPrefixMode(PlanState *pstate)
static void instrumentSortedGroup(IncrementalSortGroupInfo *groupInfo, Tuplesortstate *sortState)
void ExecReScanIncrementalSort(IncrementalSortState *node)
void ExecIncrementalSortInitializeDSM(IncrementalSortState *node, ParallelContext *pcxt)
void ExecEndIncrementalSort(IncrementalSortState *node)
static TupleTableSlot * ExecIncrementalSort(PlanState *pstate)
IncrementalSortState * ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)
#define INSTRUMENT_SORT_GROUP(node, groupName)
static bool isCurrentGroup(IncrementalSortState *node, TupleTableSlot *pivot, TupleTableSlot *tuple)
static void preparePresortedCols(IncrementalSortState *node)
void ExecIncrementalSortRetrieveInstrumentation(IncrementalSortState *node)
#define DEFAULT_MAX_FULL_SORT_GROUP_SIZE
void ExecIncrementalSortInitializeWorker(IncrementalSortState *node, ParallelWorkerContext *pwcxt)
#define DEFAULT_MIN_GROUP_SIZE
#define castNode(_type_, nodeptr)
static bool DatumGetBool(Datum X)
#define ScanDirectionIsForward(direction)
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
#define shm_toc_estimate_chunk(e, sz)
#define shm_toc_estimate_keys(e, cnt)
Size add_size(Size s1, Size s2)
Size mul_size(Size s1, Size s2)
ScanDirection es_direction
int64 totalMemorySpaceUsed
IncrementalSortGroupInfo prefixsortGroupInfo
IncrementalSortGroupInfo fullsortGroupInfo
Tuplesortstate * prefixsort_state
TupleTableSlot * group_pivot
TupleTableSlot * transfer_tuple
Tuplesortstate * fullsort_state
SharedIncrementalSortInfo * shared_info
IncrementalSortExecutionStatus execution_status
PresortedKeyData * presorted_keys
IncrementalSortInfo incsort_info
int64 n_fullsort_remaining
shm_toc_estimator estimator
Instrumentation * instrument
TupleTableSlot * ps_ResultTupleSlot
ProjectionInfo * ps_ProjInfo
ExecProcNodeMtd ExecProcNode
TuplesortMethod sortMethod
TuplesortSpaceType spaceType
void tuplesort_performsort(Tuplesortstate *state)
void tuplesort_reset(Tuplesortstate *state)
bool tuplesort_used_bound(Tuplesortstate *state)
void tuplesort_get_stats(Tuplesortstate *state, TuplesortInstrumentation *stats)
void tuplesort_end(Tuplesortstate *state)
void tuplesort_set_bound(Tuplesortstate *state, int64 bound)
#define TUPLESORT_ALLOWBOUNDED
void tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, TupleTableSlot *slot, Datum *abbrev)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
static TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)