1/*-------------------------------------------------------------------------
4 * routines to handle append nodes.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeAppend.c
13 *-------------------------------------------------------------------------
16 * ExecInitAppend - initialize the append node
17 * ExecAppend - retrieve the next tuple from the node
18 * ExecEndAppend - shut down the append node
19 * ExecReScanAppend - rescan the append node
22 * Each append node contains a list of one or more subplans which
23 * must be iteratively processed (forwards or backwards).
24 * Tuples are retrieved by executing the 'whichplan'th subplan
25 * until the subplan stops returning tuples, at which point that
26 * plan is shut down and the next started up.
28 * Append nodes don't make use of their left and right
29 * subtrees, rather they maintain a list of subplans so
30 * a typical append node looks like this in the plan tree:
34 * Append -------+------+------+--- nil
39 * Append nodes are currently used for unions, and to support
40 * inheritance queries, where several relations need to be scanned.
41 * For example, in our standard person/student/employee/student-emp
42 * example, where student and employee inherit from person
43 * and student-emp inherits from student and employee, the
46 * select name from person
51 * Append -------+-------+--------+--------+
53 * nil nil Scan Scan Scan Scan
55 * person employee student student-emp
68/* Shared state for parallel-aware Append. */
75 * pa_finished[i] should be true if no more workers should select subplan
76 * i. for a non-partial plan, this should be set to true as soon as a
77 * worker selects the plan; for a partial plan, it remains false until
78 * some worker executes the plan to completion.
83 #define INVALID_SUBPLAN_INDEX -1
84 #define EVENT_BUFFER_SIZE 16
97/* ----------------------------------------------------------------
100 * Begin all of the subscans of the append node.
102 * (This is potentially wasteful, since the entire result of the
103 * append node may not be scanned, but this way all of the
104 * structures get allocated in the executor's top level memory
105 * block instead of that of the call to ExecAppend.)
106 * ----------------------------------------------------------------
122 /* check for unsupported flags */
126 * create new AppendState for our append node
129 appendstate->
ps.
state = estate;
132 /* Let choose_next_subplan_* function handle setting the first subplan */
137 /* If run-time partition pruning is enabled, then set that up now */
143 * Set up pruning data structure. This also initializes the set of
144 * subplans to initialize (validsubplans) by taking into account the
145 * result of performing initial pruning if any.
156 * When no run-time pruning is required and there's at least one
157 * subplan, we can fill as_valid_subplans immediately, preventing
158 * later calls to ExecFindMatchingSubPlans.
171 * When run-time partition pruning is not enabled we can just mark all
172 * subplans as valid; they must also all be initialized.
185 * call ExecInitNode on each of the valid plans to be executed and save
186 * the results into the appendplanstates array.
188 * While at it, find out the first valid partial plan.
200 * Record async subplans. When executing EvalPlanQual, we treat them
201 * as sync ones; don't do this when initializing an EvalPlanQual plan
211 * Record the lowest appendplans index which is a valid partial plan.
216 appendplanstates[
j++] =
ExecInitNode(initNode, estate, eflags);
224 * Initialize Append's result tuple type and slot. If the child plans all
225 * produce the same fixed slot type, we can use that slot type; otherwise
226 * make a virtual slot. (Note that the result slot itself is used only to
227 * return a null tuple at end of execution; real tuples are returned to
228 * the caller in the children's own result slots. What we are doing here
229 * is allowing the parent plan node to optimize if the Append will return
230 * only one kind of slot.)
233 if (appendops != NULL)
240 /* show that the output slot type is not fixed */
245 /* Initialize async state */
285 * Miscellaneous initialization
290 /* For parallel query, this will be overridden later. */
296/* ----------------------------------------------------------------
299 * Handles iteration over multiple subplans.
300 * ----------------------------------------------------------------
309 * If this is the first call after Init or ReScan, we need to do the
310 * initialization work.
317 /* Nothing to do if there are no subplans */
321 /* If there are any async subplans, begin executing them. */
326 * If no sync subplan has been chosen, we must choose one before
336 /* And we're initialized. */
347 * try to get a tuple from an async subplan if any
358 * figure out which sync subplan we are currently processing
364 * get a tuple from the subplan
371 * If the subplan gave us something then return it as-is. We do
372 * NOT make use of the result slot that was set up in
373 * ExecInitAppend; there's no need for it.
379 * wait or poll for async events if any. We do this before checking
380 * for the end of iteration, because it might drain the remaining
386 /* choose new sync subplan; if no sync/async subplans, we're done */
392/* ----------------------------------------------------------------
395 * Shuts down the subscans of the append node.
397 * Returns nothing of interest.
398 * ----------------------------------------------------------------
408 * get information from the node
414 * shut down each of the subscans
416 for (
i = 0;
i < nplans;
i++)
427 * If any PARAM_EXEC Params used in pruning expressions have changed, then
428 * we'd better unset the valid subplans so that they are reselected for
429 * the new parameter values.
447 * ExecReScan doesn't know about my subplans, so I have to do
448 * changed-parameter signaling myself.
454 * If chgParam of subnode is not null then plan will be re-scanned by
455 * first ExecProcNode or by first ExecAsyncRequest.
461 /* Reset async state */
480 /* Let choose_next_subplan_* function handle setting the first subplan */
486/* ----------------------------------------------------------------
487 * Parallel Append Support
488 * ----------------------------------------------------------------
491/* ----------------------------------------------------------------
494 * Compute the amount of space we'll need in the parallel
495 * query DSM, and inform pcxt->estimator about our needs.
496 * ----------------------------------------------------------------
511/* ----------------------------------------------------------------
512 * ExecAppendInitializeDSM
514 * Set up shared state for Parallel Append.
515 * ----------------------------------------------------------------
532/* ----------------------------------------------------------------
533 * ExecAppendReInitializeDSM
535 * Reset shared state before beginning a fresh scan.
536 * ----------------------------------------------------------------
547/* ----------------------------------------------------------------
548 * ExecAppendInitializeWorker
550 * Copy relevant information from TOC into planstate, and initialize
551 * whatever is required to choose and execute the optimal subplan.
552 * ----------------------------------------------------------------
561/* ----------------------------------------------------------------
562 * choose_next_subplan_locally
564 * Choose next sync subplan for a non-parallel-aware Append,
565 * returning false if there are no more.
566 * ----------------------------------------------------------------
574 /* We should never be called when there are no subplans */
577 /* Nothing to do if syncdone */
582 * If first call then have the bms member function choose the first valid
583 * sync subplan by initializing whichplan to -1. If there happen to be no
584 * valid sync subplans then the bms member function will handle that by
585 * returning a negative number which will allow us to exit returning a
592 /* We'd have filled as_valid_subplans already */
605 /* Ensure whichplan is within the expected range */
606 Assert(whichplan >= -1 && whichplan <= node->as_nplans);
615 /* Set as_syncdone if in async mode */
626/* ----------------------------------------------------------------
627 * choose_next_subplan_for_leader
629 * Try to pick a plan which doesn't commit us to doing much
630 * work locally, so that as much work as possible is done in
631 * the workers. Cheapest subplans are at the end.
632 * ----------------------------------------------------------------
639 /* Backward scan is not supported by parallel-aware plans */
642 /* We should never be called when there are no subplans */
649 /* Mark just-completed subplan as finished. */
654 /* Start with last subplan. */
658 * If we've yet to determine the valid subplans then do so now. If
659 * run-time pruning is disabled then the valid subplans will always be
660 * set to all subplans.
669 * Mark each invalid plan as finished to allow the loop below to
670 * select the first valid subplan.
676 /* Loop until we find a subplan to execute. */
688 * We needn't pay attention to as_valid_subplans here as all invalid
689 * plans have been marked as finished.
694 /* If non-partial, immediately mark as finished. */
703/* ----------------------------------------------------------------
704 * choose_next_subplan_for_worker
706 * Choose next subplan for a parallel-aware Append, returning
707 * false if there are no more.
709 * We start from the first plan and advance through the list;
710 * when we get back to the end, we loop back to the first
711 * partial plan. This assigns the non-partial plans first in
712 * order of descending cost and then spreads out the workers
713 * as evenly as possible across the remaining partial plans.
714 * ----------------------------------------------------------------
721 /* Backward scan is not supported by parallel-aware plans */
724 /* We should never be called when there are no subplans */
729 /* Mark just-completed subplan as finished. */
734 * If we've yet to determine the valid subplans then do so now. If
735 * run-time pruning is disabled then the valid subplans will always be set
747 /* If all the plans are already done, we have nothing to do */
754 /* Save the plan from which we are starting the search. */
757 /* Loop until we find a valid subplan to execute. */
766 /* Advance to the next valid plan. */
772 * Try looping back to the first valid partial plan, if there is
773 * one. If there isn't, arrange to bail out below.
783 * At last plan, and either there are no partial plans or we've
784 * tried them all. Arrange to bail out.
791 /* We've tried everything! */
798 /* Pick the plan we found, and advance pa_next_plan one more time. */
804 * If there are no more valid plans then try setting the next plan to the
805 * first valid partial plan.
817 * There are no valid partial plans, and we already chose the last
818 * non-partial plan; so flag that there's nothing more for our
819 * fellow workers to do.
825 /* If non-partial, immediately mark as finished. */
835 * mark_invalid_subplans_as_finished
836 * Marks the ParallelAppendState's pa_finished as true for each invalid
839 * This function should only be called for parallel Append with run-time
847 /* Only valid to call this while in parallel Append mode */
850 /* Shouldn't have been called when run-time pruning is not enabled */
853 /* Nothing to do if all plans are valid */
857 /* Mark all non-valid plans as finished */
865/* ----------------------------------------------------------------
866 * Asynchronous Append Support
867 * ----------------------------------------------------------------
870/* ----------------------------------------------------------------
871 * ExecAppendAsyncBegin
873 * Begin executing designed async-capable subplans.
874 * ----------------------------------------------------------------
881 /* Backward scan is not supported by async-aware Appends. */
884 /* We should never be called when there are no subplans */
887 /* We should never be called when there are no async subplans. */
890 /* If we've yet to determine the valid subplans then do so now. */
900 /* Initialize state variables. */
904 /* Nothing to do if there are no valid async subplans. */
908 /* Make a request for each of the valid async subplans. */
917 /* Do the actual work. */
922/* ----------------------------------------------------------------
923 * ExecAppendAsyncGetNext
925 * Get the next tuple from any of the asynchronous subplans.
926 * ----------------------------------------------------------------
933 /* We should never be called when there are no valid async subplans. */
936 /* Request a tuple asynchronously. */
944 /* Wait or poll for async events. */
947 /* Request a tuple asynchronously. */
951 /* Break from loop if there's any sync subplan that isn't complete. */
957 * If all sync subplans are complete, we're totally done scanning the
958 * given node. Otherwise, we're done with the asynchronous stuff but must
959 * continue scanning the sync subplans.
971/* ----------------------------------------------------------------
972 * ExecAppendAsyncRequest
974 * Request a tuple asynchronously.
975 * ----------------------------------------------------------------
983 /* Nothing to do if there are no async subplans needing a new request. */
991 * If there are any asynchronously-generated results that have not yet
992 * been returned, we have nothing to do; just return one of them.
1001 /* Make a new request for each of the async subplans that need it. */
1009 /* Do the actual work. */
1014 /* Return one of the asynchronously-generated results if any. */
1025/* ----------------------------------------------------------------
1026 * ExecAppendAsyncEventWait
1028 * Wait or poll for file descriptor events and fire callbacks.
1029 * ----------------------------------------------------------------
1040 /* We should never be called when there are no valid async subplans. */
1048 /* Give each waiting subplan a chance to add an event. */
1059 * No need for further processing if none of the subplans configured any
1070 * Add the process latch to the set, so that we wake up to process the
1071 * standard interrupts with CHECK_FOR_INTERRUPTS().
1073 * NOTE: For historical reasons, it's important that this is added to the
1074 * WaitEventSet after the ExecAsyncConfigureWait() calls. Namely,
1075 * postgres_fdw calls "GetNumRegisteredWaitEvents(set) == 1" to check if
1076 * any other events are in the set. That's a poor design, it's
1077 * questionable for postgres_fdw to be doing that in the first place, but
1078 * we cannot change it now. The pattern has possibly been copied to other
1084 /* Return at most EVENT_BUFFER_SIZE events in one call. */
1089 * If the timeout is -1, wait until at least one event occurs. If the
1090 * timeout is 0, poll for events, but do not wait at all.
1093 nevents, WAIT_EVENT_APPEND_READY);
1099 /* Deliver notifications. */
1100 for (
i = 0;
i < noccurred;
i++)
1105 * Each waiting subplan should have registered its wait event with
1106 * user_data pointing back to its AsyncRequest.
1115 * Mark it as no longer needing a callback. We must do this
1116 * before dispatching the callback in case the callback resets
1121 /* Do the actual work. */
1126 /* Handle standard interrupts */
1135/* ----------------------------------------------------------------
1136 * ExecAsyncAppendResponse
1138 * Receive a response from an asynchronous request we made.
1139 * ----------------------------------------------------------------
1147 /* The result should be a TupleTableSlot or NULL. */
1150 /* Nothing to do if the request is pending. */
1153 /* The request would have been pending for a callback. */
1158 /* If the result is NULL or an empty slot, there's nothing more to do. */
1161 /* The ending subplan wouldn't have been pending for a callback. */
1167 /* Save result so we can return it. */
1172 * Mark the subplan that returned a result as ready for a new request. We
1173 * don't launch another one here immediately because it might complete.
1179/* ----------------------------------------------------------------
1180 * classify_matching_subplans
1182 * Classify the node's as_valid_subplans into sync ones and
1183 * async ones, adjust it to contain sync ones only, and save
1184 * async ones in the node's as_valid_asyncplans.
1185 * ----------------------------------------------------------------
1195 /* Nothing to do if there are no valid subplans. */
1203 /* Nothing to do if there are no valid async subplans. */
1210 /* Get valid async subplans. */
1214 /* Adjust the valid subplans to contain sync subplans only. */
1218 /* Save valid async subplans. */
int bms_prev_member(const Bitmapset *a, int prevbit)
Bitmapset * bms_intersect(const Bitmapset *a, const Bitmapset *b)
int bms_next_member(const Bitmapset *a, int prevbit)
Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
void bms_free(Bitmapset *a)
int bms_num_members(const Bitmapset *a)
bool bms_is_member(int x, const Bitmapset *a)
Bitmapset * bms_add_member(Bitmapset *a, int x)
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
#define FLEXIBLE_ARRAY_MEMBER
void ExecReScan(PlanState *node)
void ExecAsyncRequest(AsyncRequest *areq)
void ExecAsyncConfigureWait(AsyncRequest *areq)
void ExecAsyncNotify(AsyncRequest *areq)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
void ExecEndNode(PlanState *node)
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
const TupleTableSlotOps TTSOpsVirtual
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps * ExecGetCommonSlotOps(PlanState **planstates, int nplans)
void UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
static TupleTableSlot * ExecProcNode(PlanState *node)
Assert(PointerIsAligned(start, uint64))
if(TABLE==NULL||TABLE_index==NULL)
void ResetLatch(Latch *latch)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
void LWLockRelease(LWLock *lock)
void LWLockInitialize(LWLock *lock, int tranche_id)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
void ExecEndAppend(AppendState *node)
static void ExecAppendAsyncBegin(AppendState *node)
static void ExecAppendAsyncEventWait(AppendState *node)
void ExecReScanAppend(AppendState *node)
static void classify_matching_subplans(AppendState *node)
static void mark_invalid_subplans_as_finished(AppendState *node)
static TupleTableSlot * ExecAppend(PlanState *pstate)
static bool ExecAppendAsyncRequest(AppendState *node, TupleTableSlot **result)
static bool choose_next_subplan_for_leader(AppendState *node)
static bool choose_next_subplan_for_worker(AppendState *node)
void ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
#define INVALID_SUBPLAN_INDEX
void ExecAsyncAppendResponse(AsyncRequest *areq)
#define EVENT_BUFFER_SIZE
AppendState * ExecInitAppend(Append *node, EState *estate, int eflags)
static bool choose_next_subplan_locally(AppendState *node)
void ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
void ExecAppendInitializeDSM(AppendState *node, ParallelContext *pcxt)
void ExecAppendEstimate(AppendState *node, ParallelContext *pcxt)
static bool ExecAppendAsyncGetNext(AppendState *node, TupleTableSlot **result)
#define IsA(nodeptr, _type_)
#define castNode(_type_, nodeptr)
static int list_length(const List *l)
static void * list_nth(const List *list, int n)
ResourceOwner CurrentResourceOwner
#define ScanDirectionIsForward(direction)
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
#define shm_toc_estimate_chunk(e, sz)
#define shm_toc_estimate_keys(e, cnt)
Size add_size(Size s1, Size s2)
struct PartitionPruneState * as_prune_state
Bitmapset * as_valid_asyncplans
Bitmapset * as_needrequest
AsyncRequest ** as_asyncrequests
Bitmapset * as_asyncplans
struct WaitEventSet * as_eventset
bool(* choose_next_subplan)(AppendState *)
int as_first_partial_plan
ParallelAppendState * as_pstate
Bitmapset * as_valid_subplans
TupleTableSlot ** as_asyncresults
bool as_valid_subplans_identified
ScanDirection es_direction
struct EPQState * es_epq_active
bool pa_finished[FLEXIBLE_ARRAY_MEMBER]
shm_toc_estimator estimator
TupleTableSlot * ps_ResultTupleSlot
ProjectionInfo * ps_ProjInfo
ExecProcNodeMtd ExecProcNode
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
int GetNumRegisteredWaitEvents(WaitEventSet *set)
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
void FreeWaitEventSet(WaitEventSet *set)
WaitEventSet * CreateWaitEventSet(ResourceOwner resowner, int nevents)
#define WL_SOCKET_READABLE
#define WL_EXIT_ON_PM_DEATH