1/*-------------------------------------------------------------------------
4 * contains dispatch functions which call the appropriate "initialize",
5 * "get a tuple", and "cleanup" routines for the given node type.
6 * If the node has children, then it will presumably call ExecInitNode,
7 * ExecProcNode, or ExecEndNode on its subnodes and do the appropriate
10 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/backend/executor/execProcnode.c
17 *-------------------------------------------------------------------------
21 * This used to be three files. It is now all combined into
22 * one file so that it is easier to keep the dispatch routines
23 * in sync when new nodes are added.
26 * Suppose we want the age of the manager of the shoe department and
27 * the number of employees in that department. So we have the query:
29 * select DEPT.no_emps, EMP.age
31 * where EMP.name = DEPT.mgr and
34 * Suppose the planner gives us the following plan:
36 * Nest Loop (DEPT.mgr = EMP.name)
43 * ExecutorStart() is called first.
44 * It calls InitPlan() which calls ExecInitNode() on
45 * the root of the plan -- the nest loop node.
47 * * ExecInitNode() notices that it is looking at a nest loop and
48 * as the code below demonstrates, it calls ExecInitNestLoop().
49 * Eventually this calls ExecInitNode() on the right and left subplans
50 * and so forth until the entire plan is initialized. The result
51 * of ExecInitNode() is a plan state tree built with the same structure
52 * as the underlying plan tree.
54 * * Then when ExecutorRun() is called, it calls ExecutePlan() which calls
55 * ExecProcNode() repeatedly on the top node of the plan state tree.
56 * Each time this happens, ExecProcNode() will end up calling
57 * ExecNestLoop(), which calls ExecProcNode() on its subplans.
58 * Each of these subplans is a sequential scan so ExecSeqScan() is
59 * called. The slots returned by ExecSeqScan() may contain
60 * tuples which contain the attributes ExecNestLoop() uses to
61 * form the tuples it returns.
63 * * Eventually ExecSeqScan() stops returning tuples and the nest
64 * loop join ends. Lastly, ExecutorEnd() calls ExecEndNode() which
65 * calls ExecEndNestLoop() which in turn calls ExecEndNode() on
66 * its subplans which result in ExecEndSeqScan().
68 * This should show how the executor works by having
69 * ExecInitNode(), ExecProcNode() and ExecEndNode() dispatch
70 * their work to the appropriate node support routines which may
71 * in turn call these routines themselves on their subplans.
127/* ------------------------------------------------------------------------
130 * Recursively initializes all the nodes in the plan tree rooted
134 * 'node' is the current node of the plan produced by the query planner
135 * 'estate' is the shared execution state for the plan tree
136 * 'eflags' is a bitwise OR of flag bits described in executor.h
138 * Returns a PlanState node corresponding to the given Plan node.
139 * ------------------------------------------------------------------------
149 * do nothing when we get to the end of a leaf on tree.
155 * Make sure there's enough stack available. Need to check here, in
156 * addition to ExecProcNode() (via ExecProcNodeFirst()), to ensure the
157 * stack isn't overrun while initializing the node tree.
191 case T_RecursiveUnion:
224 case T_IndexOnlyScan:
229 case T_BitmapIndexScan:
234 case T_BitmapHeapScan:
259 case T_TableFuncScan:
274 case T_NamedTuplestoreScan:
279 case T_WorkTableScan:
313 * materialization nodes
325 case T_IncrementalSort:
387 result = NULL;
/* keep compiler quiet */
394 * Initialize any initPlans present in this node. The planner put them in
395 * a separate list for us.
397 * The defining characteristic of initplans is that they don't have
398 * arguments, so we don't need to evaluate them (in contrast to
399 * ExecInitSubPlanExpr()).
410 subps =
lappend(subps, sstate);
414 /* Set up instrumentation for this node if requested */
424 * If a node wants to change its ExecProcNode function after ExecInitNode()
425 * has finished, it should do so with this function. That way any wrapper
426 * functions can be reinstalled, without the node having to know how that
433 * Add a wrapper around the ExecProcNode callback that checks stack depth
434 * during the first execution and maybe adds an instrumentation wrapper.
435 * When the callback is changed after execution has already begun that
436 * means we'll superfluously execute ExecProcNodeFirst, but that seems ok.
444 * ExecProcNode wrapper that performs some one-time checks, before calling
445 * the relevant node method (possibly via an instrumentation wrapper).
451 * Perform stack depth check during the first execution of the node. We
452 * only do so the first time round because it turns out to not be cheap on
453 * some common architectures (eg. x86). This relies on the assumption
454 * that ExecProcNode calls for a given plan node will always be made at
455 * roughly the same stack depth.
460 * If instrumentation is required, change the wrapper to one that just
461 * does instrumentation. Otherwise we can dispense with all wrappers and
462 * have ExecProcNode() directly call the relevant function from now on.
474 * ExecProcNode wrapper that performs instrumentation calls. By keeping
475 * this a separate function, we avoid overhead in the normal case where
476 * no instrumentation is wanted.
493/* ----------------------------------------------------------------
496 * Execute a node that doesn't return individual tuples
497 * (it might return a hashtable, bitmap, etc). Caller should
498 * check it got back the expected kind of Node.
500 * This has essentially the same responsibilities as ExecProcNode,
501 * but it does not do InstrStartNode/InstrStopNode (mainly because
502 * it can't tell how many returned tuples to count). Each per-node
503 * function must provide its own instrumentation support.
504 * ----------------------------------------------------------------
515 if (node->
chgParam != NULL)
/* something changed */
516 ExecReScan(node);
/* let ReScan handle this */
521 * Only node types that actually support multiexec will be listed
528 case T_BitmapIndexScanState:
532 case T_BitmapAndState:
536 case T_BitmapOrState:
550/* ----------------------------------------------------------------
553 * Recursively cleans up all the nodes in the plan rooted
556 * After this operation, the query plan will not be able to be
557 * processed any further. This should be called only after
558 * the query plan has been fully executed.
559 * ----------------------------------------------------------------
565 * do nothing when we get to the end of a leaf on tree.
571 * Make sure there's enough stack available. Need to check here, in
572 * addition to ExecProcNode() (via ExecProcNodeFirst()), because it's not
573 * guaranteed that ExecProcNode() is reached for all nodes.
592 case T_ProjectSetState:
596 case T_ModifyTableState:
604 case T_MergeAppendState:
608 case T_RecursiveUnionState:
612 case T_BitmapAndState:
616 case T_BitmapOrState:
627 case T_SampleScanState:
635 case T_GatherMergeState:
639 case T_IndexScanState:
643 case T_IndexOnlyScanState:
647 case T_BitmapIndexScanState:
651 case T_BitmapHeapScanState:
659 case T_TidRangeScanState:
663 case T_SubqueryScanState:
667 case T_FunctionScanState:
671 case T_TableFuncScanState:
679 case T_ForeignScanState:
683 case T_CustomScanState:
690 case T_NestLoopState:
694 case T_MergeJoinState:
698 case T_HashJoinState:
703 * materialization nodes
705 case T_MaterialState:
713 case T_IncrementalSortState:
729 case T_WindowAggState:
745 case T_LockRowsState:
753 /* No clean up actions for these nodes. */
754 case T_ValuesScanState:
755 case T_NamedTuplestoreScanState:
756 case T_WorkTableScanState:
768 * Give execution nodes a chance to stop asynchronous resource consumption
769 * and release any resources still held.
786 * Treat the node as running while we shut it down, but only if it's run
787 * at least once already. We don't expect much CPU consumption during
788 * node shutdown, but in the case of Gather or Gather Merge, we may shut
789 * down workers at this stage. If so, their buffer usage will get
790 * propagated into pgBufferUsage at this point, and we want to make sure
791 * that it gets associated with the Gather node. We skip this if the node
792 * has never been executed, so as to avoid incorrectly making it appear
805 case T_ForeignScanState:
808 case T_CustomScanState:
811 case T_GatherMergeState:
817 case T_HashJoinState:
824 /* Stop the node if we started it above, reporting 0 tuples. */
834 * Set a tuple bound for a planstate node. This lets child plan nodes
835 * optimize based on the knowledge that the maximum number of tuples that
836 * their parent will demand is limited. The tuple bound for a node may
837 * only be changed between scans (i.e., after node initialization or just
838 * before an ExecReScan call).
840 * Any negative tuples_needed value means "no limit", which should be the
841 * default assumption when this is not called at all for a particular node.
843 * Note: if this is called repeatedly on a plan tree, the exact same set
844 * of nodes must be updated with the new limit each time; be careful that
845 * only unchanging conditions are tested here.
851 * Since this function recurses, in principle we should check stack depth
852 * here. In practice, it's probably pointless since the earlier node
853 * initialization tree traversal would surely have consumed more stack.
859 * If it is a Sort node, notify it that it can use bounded sort.
861 * Note: it is the responsibility of nodeSort.c to react properly to
862 * changes of these parameters. If we ever redesign this, it'd be a
863 * good idea to integrate this signaling with the parameter-change
868 if (tuples_needed < 0)
870 /* make sure flag gets reset if needed upon rescan */
876 sortState->
bound = tuples_needed;
882 * If it is an IncrementalSort node, notify it that it can use bounded
885 * Note: it is the responsibility of nodeIncrementalSort.c to react
886 * properly to changes of these parameters. If we ever redesign this,
887 * it'd be a good idea to integrate this signaling with the
888 * parameter-change mechanism.
892 if (tuples_needed < 0)
894 /* make sure flag gets reset if needed upon rescan */
900 sortState->
bound = tuples_needed;
906 * If it is an Append, we can apply the bound to any nodes that are
907 * children of the Append, since the Append surely need read no more
908 * than that many tuples from any one input.
919 * If it is a MergeAppend, we can apply the bound to any nodes that
920 * are children of the MergeAppend, since the MergeAppend surely need
921 * read no more than that many tuples from any one input.
932 * Similarly, for a projecting Result, we can apply the bound to its
935 * If Result supported qual checking, we'd have to punt on seeing a
936 * qual. Note that having a resconstantqual is not a showstopper: if
937 * that condition succeeds it affects nothing, while if it fails, no
938 * rows will be demanded from the Result child anyway.
946 * We can also descend through SubqueryScan, but only if it has no
947 * qual (otherwise it might discard rows).
951 if (subqueryState->
ss.
ps.
qual == NULL)
957 * A Gather node can propagate the bound to its workers. As with
958 * MergeAppend, no one worker could possibly need to return more
959 * tuples than the Gather itself needs to.
961 * Note: As with Sort, the Gather node is responsible for reacting
962 * properly to changes to this parameter.
968 /* Also pass down the bound to our own copy of the child plan */
973 /* Same comments as for Gather */
982 * In principle we could descend through any plan node type that is
983 * certain not to discard or combine input rows; but on seeing a node that
984 * can do that, we can't propagate the bound any further. For the moment
985 * it's unclear that any other cases are worth checking here.
void bms_free(Bitmapset *a)
void ExecReScan(PlanState *node)
static bool ExecShutdownNode_walker(PlanState *node, void *context)
Node * MultiExecProcNode(PlanState *node)
void ExecSetTupleBound(int64 tuples_needed, PlanState *child_node)
void ExecEndNode(PlanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
void ExecShutdownNode(PlanState *node)
static TupleTableSlot * ExecProcNodeInstr(PlanState *node)
static TupleTableSlot * ExecProcNodeFirst(PlanState *node)
void ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
#define outerPlanState(node)
TupleTableSlot *(* ExecProcNodeMtd)(PlanState *pstate)
Assert(PointerIsAligned(start, uint64))
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
void InstrStartNode(Instrumentation *instr)
void InstrStopNode(Instrumentation *instr, double nTuples)
List * lappend(List *list, void *datum)
#define CHECK_FOR_INTERRUPTS()
void ExecEndAgg(AggState *node)
AggState * ExecInitAgg(Agg *node, EState *estate, int eflags)
void ExecEndAppend(AppendState *node)
AppendState * ExecInitAppend(Append *node, EState *estate, int eflags)
BitmapAndState * ExecInitBitmapAnd(BitmapAnd *node, EState *estate, int eflags)
Node * MultiExecBitmapAnd(BitmapAndState *node)
void ExecEndBitmapAnd(BitmapAndState *node)
void ExecEndBitmapHeapScan(BitmapHeapScanState *node)
BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
Node * MultiExecBitmapIndexScan(BitmapIndexScanState *node)
void ExecEndBitmapIndexScan(BitmapIndexScanState *node)
BitmapIndexScanState * ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
void ExecEndBitmapOr(BitmapOrState *node)
Node * MultiExecBitmapOr(BitmapOrState *node)
BitmapOrState * ExecInitBitmapOr(BitmapOr *node, EState *estate, int eflags)
CteScanState * ExecInitCteScan(CteScan *node, EState *estate, int eflags)
void ExecEndCteScan(CteScanState *node)
void ExecShutdownCustomScan(CustomScanState *node)
void ExecEndCustomScan(CustomScanState *node)
CustomScanState * ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
ForeignScanState * ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
void ExecShutdownForeignScan(ForeignScanState *node)
void ExecEndForeignScan(ForeignScanState *node)
#define planstate_tree_walker(ps, w, c)
FunctionScanState * ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
void ExecEndFunctionScan(FunctionScanState *node)
void ExecShutdownGatherMerge(GatherMergeState *node)
GatherMergeState * ExecInitGatherMerge(GatherMerge *node, EState *estate, int eflags)
void ExecEndGatherMerge(GatherMergeState *node)
void ExecEndGather(GatherState *node)
void ExecShutdownGather(GatherState *node)
GatherState * ExecInitGather(Gather *node, EState *estate, int eflags)
GroupState * ExecInitGroup(Group *node, EState *estate, int eflags)
void ExecEndGroup(GroupState *node)
Node * MultiExecHash(HashState *node)
HashState * ExecInitHash(Hash *node, EState *estate, int eflags)
void ExecEndHash(HashState *node)
void ExecShutdownHash(HashState *node)
void ExecEndHashJoin(HashJoinState *node)
void ExecShutdownHashJoin(HashJoinState *node)
HashJoinState * ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
void ExecEndIncrementalSort(IncrementalSortState *node)
IncrementalSortState * ExecInitIncrementalSort(IncrementalSort *node, EState *estate, int eflags)
void ExecEndIndexOnlyScan(IndexOnlyScanState *node)
IndexOnlyScanState * ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
IndexScanState * ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
void ExecEndIndexScan(IndexScanState *node)
LimitState * ExecInitLimit(Limit *node, EState *estate, int eflags)
void ExecEndLimit(LimitState *node)
LockRowsState * ExecInitLockRows(LockRows *node, EState *estate, int eflags)
void ExecEndLockRows(LockRowsState *node)
MaterialState * ExecInitMaterial(Material *node, EState *estate, int eflags)
void ExecEndMaterial(MaterialState *node)
MemoizeState * ExecInitMemoize(Memoize *node, EState *estate, int eflags)
void ExecEndMemoize(MemoizeState *node)
MergeAppendState * ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
void ExecEndMergeAppend(MergeAppendState *node)
MergeJoinState * ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
void ExecEndMergeJoin(MergeJoinState *node)
ModifyTableState * ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
void ExecEndModifyTable(ModifyTableState *node)
NamedTuplestoreScanState * ExecInitNamedTuplestoreScan(NamedTuplestoreScan *node, EState *estate, int eflags)
void ExecEndNestLoop(NestLoopState *node)
NestLoopState * ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
ProjectSetState * ExecInitProjectSet(ProjectSet *node, EState *estate, int eflags)
void ExecEndProjectSet(ProjectSetState *node)
void ExecEndRecursiveUnion(RecursiveUnionState *node)
RecursiveUnionState * ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
ResultState * ExecInitResult(Result *node, EState *estate, int eflags)
void ExecEndResult(ResultState *node)
SampleScanState * ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
void ExecEndSampleScan(SampleScanState *node)
void ExecEndSeqScan(SeqScanState *node)
SeqScanState * ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
void ExecEndSetOp(SetOpState *node)
SetOpState * ExecInitSetOp(SetOp *node, EState *estate, int eflags)
SortState * ExecInitSort(Sort *node, EState *estate, int eflags)
void ExecEndSort(SortState *node)
SubPlanState * ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
void ExecEndSubqueryScan(SubqueryScanState *node)
SubqueryScanState * ExecInitSubqueryScan(SubqueryScan *node, EState *estate, int eflags)
void ExecEndTableFuncScan(TableFuncScanState *node)
TableFuncScanState * ExecInitTableFuncScan(TableFuncScan *node, EState *estate, int eflags)
void ExecEndTidRangeScan(TidRangeScanState *node)
TidRangeScanState * ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
TidScanState * ExecInitTidScan(TidScan *node, EState *estate, int eflags)
void ExecEndTidScan(TidScanState *node)
void ExecEndUnique(UniqueState *node)
UniqueState * ExecInitUnique(Unique *node, EState *estate, int eflags)
ValuesScanState * ExecInitValuesScan(ValuesScan *node, EState *estate, int eflags)
void ExecEndWindowAgg(WindowAggState *node)
WindowAggState * ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
WorkTableScanState * ExecInitWorkTableScan(WorkTableScan *node, EState *estate, int eflags)
#define IsA(nodeptr, _type_)
on_exit_nicely_callback function
void check_stack_depth(void)
Instrumentation * instrument
ExecProcNodeMtd ExecProcNodeReal
ExecProcNodeMtd ExecProcNode