1/*-------------------------------------------------------------------------
4 * Support routines for partitioning.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/executor/execPartition.c
12 *-------------------------------------------------------------------------
36/*-----------------------
37 * PartitionTupleRouting - Encapsulates all information required to
38 * route a tuple inserted into a partitioned table to one of its leaf
42 * The partitioned table that's the target of the command.
44 * partition_dispatch_info
45 * Array of 'max_dispatch' elements containing a pointer to a
46 * PartitionDispatch object for every partitioned table touched by tuple
47 * routing. The entry for the target partitioned table is *always*
48 * present in the 0th element of this array. See comment for
49 * PartitionDispatchData->indexes for details on how this array is
53 * Array of 'max_dispatch' elements containing pointers to fake
54 * ResultRelInfo objects for nonleaf partitions, useful for checking
55 * the partition constraint.
58 * The current number of items stored in the 'partition_dispatch_info'
59 * array. Also serves as the index of the next free array element for
60 * new PartitionDispatch objects that need to be stored.
63 * The current allocated size of the 'partition_dispatch_info' array.
66 * Array of 'max_partitions' elements containing a pointer to a
67 * ResultRelInfo for every leaf partition touched by tuple routing.
68 * Some of these are pointers to ResultRelInfos which are borrowed out of
69 * the owning ModifyTableState node. The remainder have been built
70 * especially for tuple routing. See comment for
71 * PartitionDispatchData->indexes for details on how this array is
75 * Array of 'max_partitions' booleans recording whether a given entry
76 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
77 * ModifyTableState node, rather than being built here.
80 * The current number of items stored in the 'partitions' array. Also
81 * serves as the index of the next free array element for new
82 * ResultRelInfo objects that need to be stored.
85 * The current allocated size of the 'partitions' array.
88 * Memory context used to allocate subsidiary structs.
89 *-----------------------
105/*-----------------------
106 * PartitionDispatch - information about one partitioned table in a partition
107 * hierarchy required to route a tuple to any of its partitions. A
108 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
109 * struct and stored inside its 'partition_dispatch_info' array.
112 * Relation descriptor of the table
115 * Partition key information of the table
118 * Execution state required for expressions in the partition key
121 * Partition descriptor of the table
124 * A standalone TupleTableSlot initialized with this table's tuple
125 * descriptor, or NULL if no tuple conversion between the parent is
129 * TupleConversionMap to convert from the parent's rowtype to this table's
130 * rowtype (when extracting the partition key of a tuple just before
131 * routing it through this table). A NULL value is stored if no tuple
132 * conversion is required.
135 * Array of partdesc->nparts elements. For leaf partitions the index
136 * corresponds to the partition's ResultRelInfo in the encapsulating
137 * PartitionTupleRouting's partitions array. For partitioned partitions,
138 * the index corresponds to the PartitionDispatch for it in its
139 * partition_dispatch_info array. -1 indicates we've not yet allocated
140 * anything in PartitionTupleRouting for the partition.
141 *-----------------------
166 bool is_borrowed_rel);
196 int n_total_subplans);
205 * ExecSetupPartitionTupleRouting - sets up information needed during
206 * tuple routing for partitioned tables, encapsulates it in
207 * PartitionTupleRouting, and returns it.
209 * Callers must use the returned PartitionTupleRouting during calls to
210 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
211 * allocated when the partition is found for the first time.
213 * The current memory context is used to allocate this struct and all
214 * subsidiary structs that will be allocated from it later on. Typically
215 * it should be estate->es_query_cxt.
223 * Here we attempt to expend as little effort as possible in setting up
224 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
225 * demand, only when we actually need to route a tuple to that partition.
226 * The reason for this is that a common case is for INSERT to insert a
227 * single tuple into a partitioned table and this must be fast.
232 /* Rest of members initialized by zeroing */
235 * Initialize this table's PartitionDispatch object. Here we pass in the
236 * parent as NULL as we don't need to care about any parent of the target
246 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
247 * the tuple contained in *slot should belong to.
249 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
250 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
251 * ResultRelInfo from the mtstate we verify that the relation is a valid
252 * target for INSERTs and initialize tuple routing information.
254 * rootResultRelInfo is the relation named in the query.
256 * estate must be non-NULL; we'll need it to compute any expressions in the
257 * partition keys. Also, its per-tuple contexts are used as evaluation
260 * If no leaf partition is found, this routine errors out with the appropriate
261 * error message. An error may also be raised if the found target partition
262 * is not a valid target for an INSERT.
283 /* use per-tuple context here to avoid leaking memory */
287 * First check the root table's partition constraint, if any. No point in
288 * routing the tuple if it doesn't belong in the root table itself.
293 /* start with the root partitioned table */
295 while (dispatch != NULL)
306 * Extract partition key from tuple. Expression evaluation machinery
307 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
308 * point to the correct tuple slot. The slot might have changed from
309 * what was used for the parent table if the table of the current
310 * partitioning level has different tuple descriptor from the parent.
311 * So update ecxt_scantuple accordingly.
317 * If this partitioned table has no partitions or no partition for
318 * these values, error out.
320 if (partdesc->
nparts == 0 ||
329 (
errcode(ERRCODE_CHECK_VIOLATION),
330 errmsg(
"no partition of relation \"%s\" found for row",
333 errdetail(
"Partition key of the failing row contains %s.",
338 is_leaf = partdesc->
is_leaf[partidx];
342 * We've reached the leaf -- hurray, we're done. Look to see if
343 * we've already got a ResultRelInfo for this partition.
347 /* ResultRelInfo already built */
354 * If the partition is known in the owning ModifyTableState
355 * node, we can re-use that ResultRelInfo instead of creating
356 * a new one with ExecInitPartitionInfo().
359 partdesc->
oids[partidx],
365 /* Verify this ResultRelInfo allows INSERTs */
371 * Initialize information needed to insert this and
372 * subsequent tuples routed to this partition.
379 /* We need to create a new one. */
382 rootResultRelInfo, partidx);
387 /* Signal to terminate the loop */
393 * Partition is a sub-partitioned table; get the PartitionDispatch
403 * Move down to the next partition level and search again
404 * until we find a leaf partition that matches this tuple
406 dispatch = pd[dispatch->
indexes[partidx]];
410 /* Not yet built. Do that now. */
414 * Create the new PartitionDispatch. We pass the current one
415 * in as the parent PartitionDispatch
419 partdesc->
oids[partidx],
426 dispatch = subdispatch;
430 * Convert the tuple to the new parent's layout, if different from
431 * the previous parent.
441 if (tempslot != NULL)
447 * If this partition is the default one, we must check its partition
448 * constraint now, which may have changed concurrently due to
449 * partitions being added to the parent.
451 * (We do this here, and do not rely on ExecInsert doing it, because
452 * we don't want to miss doing it for non-leaf partitions.)
457 * The tuple must match the partition's layout for the constraint
458 * expression to be evaluated successfully. If the partition is
459 * sub-partitioned, that would already be the case due to the code
460 * above, but for a leaf partition the tuple still matches the
463 * Note that we have a map to convert from root to current
464 * partition, but not from immediate parent to current partition.
465 * So if we have to convert, do it from the root slot; if not, use
466 * the root slot as-is.
483 /* Release the tuple in the lowest parent's dedicated slot. */
486 /* and restore ecxt's scantuple */
494 * ExecInitPartitionInfo
495 * Lock the partition and initialize ResultRelInfo. Also setup other
496 * information for the partition and store it in the next empty slot in
497 * the proute->partitions array.
499 * Returns the ResultRelInfo
516 bool found_whole_row;
530 * Verify result relation is a valid target for an INSERT. An UPDATE of a
531 * partition-key becomes a DELETE+INSERT operation, so this check is still
532 * required when the operation is CMD_UPDATE.
538 * Open partition indices. The user may have asked to check for conflicts
539 * within this leaf partition and do "nothing" instead of throwing an
540 * error. Be prepared in that case by initializing the index information
541 * needed by ExecInsert() to perform speculative insertions.
543 if (partrel->rd_rel->relhasindex &&
544 leaf_part_rri->ri_IndexRelationDescs == NULL)
550 * Build WITH CHECK OPTION constraints for the partition. Note that we
551 * didn't build the withCheckOptionList for partitions within the planner,
552 * but simple translation of varattnos will suffice. This only occurs for
553 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
554 * didn't find a result rel to reuse.
563 * In the case of INSERT on a partitioned table, there is only one
564 * plan. Likewise, there is only one WCO list, not one per partition.
565 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
578 * Use the WCO list of the first plan as a reference to calculate
579 * attno's for the WCO list of this partition. In the INSERT case,
580 * that refers to the root partitioned table, whereas in the UPDATE
581 * tuple routing case, that refers to the first partition in the
582 * mtstate->resultRelInfo array. In any case, both that relation and
583 * this partition should have the same columns, so we should be able
584 * to map attributes successfully.
589 * Convert Vars in it to contain this partition's attribute numbers.
601 /* We ignore the value of found_whole_row. */
609 wcoExprs =
lappend(wcoExprs, wcoExpr);
612 leaf_part_rri->ri_WithCheckOptions = wcoList;
613 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
617 * Build the RETURNING projection for the partition. Note that we didn't
618 * build the returningList for partitions within the planner, but simple
619 * translation of varattnos will suffice. This only occurs for the INSERT
620 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
621 * a result rel to reuse.
629 /* See the comment above for WCO lists. */
641 * Use the RETURNING list of the first plan as a reference to
642 * calculate attno's for the RETURNING list of this partition. See
643 * the comment above for WCO lists for more details on why this is
649 * Convert Vars in it to contain this partition's attribute numbers.
651 if (part_attmap == NULL)
656 returningList = (
List *)
662 /* We ignore the value of found_whole_row. */
664 leaf_part_rri->ri_returningList = returningList;
667 * Initialize the projection itself.
669 * Use the slot and the expression context that would have been set up
670 * in ExecInitModifyTable() for projection's output.
676 leaf_part_rri->ri_projectReturning =
681 /* Set up information needed for routing tuples to the partition. */
683 leaf_part_rri, partidx,
false);
686 * If there is an ON CONFLICT clause, initialize state for it.
696 * If there is a list of arbiter indexes, map it to a list of indexes
697 * in the partition. We do that by scanning the partition's index
698 * list and searching for ancestry relationships to each index in the
707 foreach(lc, childIdxs)
717 arbiterIndexes =
lappend_oid(arbiterIndexes, childIdx);
724 * If the resulting lists are of inequal length, something is wrong.
725 * (This shouldn't happen, since arbiter index selection should not
726 * pick up an invalid index.)
730 elog(
ERROR,
"invalid arbiter index list");
731 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
734 * In the DO UPDATE case, we have some more state to initialize.
746 leaf_part_rri->ri_onConflict = onconfl;
749 * Need a separate existing slot for each partition, as the
750 * partition could be of a different AM, even if the tuple
758 * If the partition's tuple descriptor matches exactly the root
759 * parent (the common case), we can re-use most of the parent's ON
760 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
761 * need to create state specific to this partition.
766 * It's safe to reuse these from the partition root, as we
767 * only process one tuple at a time (therefore we won't
768 * overwrite needed data in slots), and the results of
769 * projections are independent of the underlying storage.
770 * Projections and where clauses themselves don't store state
771 * / are independent of the underlying storage.
786 * Translate expressions in onConflictSet to account for
787 * different attribute numbers. For that, map partition
788 * varattnos twice: first to catch the EXCLUDED
789 * pseudo-relation (INNER_VAR), and second to handle the main
790 * target relation (firstVarno).
793 if (part_attmap == NULL)
798 onconflset = (
List *)
804 /* We ignore the value of found_whole_row. */
805 onconflset = (
List *)
811 /* We ignore the value of found_whole_row. */
813 /* Finally, adjust the target colnos to match the partition. */
817 /* create the tuple slot for the UPDATE SET projection */
822 /* build UPDATE SET projection state */
833 * If there is a WHERE clause, initialize state where it will
834 * be evaluated, mapping the attribute numbers appropriately.
835 * As with onConflictSet, we need to map partition varattnos
836 * to the partition's tupdesc.
849 /* We ignore the value of found_whole_row. */
856 /* We ignore the value of found_whole_row. */
865 * Since we've just initialized this ResultRelInfo, it's not in any list
866 * attached to the estate as yet. Add it, so that it can be found later.
868 * Note that the entries in this list appear in no predetermined order,
869 * because partition result rels are initialized as and when they're
878 * Initialize information about this partition that's needed to handle
879 * MERGE. We take the "first" result relation's mergeActionList as
880 * reference and make copy for this relation, converting stuff that
881 * references attribute numbers to match this relation's.
883 * This duplicates much of the logic in ExecInitMerge(), so if something
884 * changes there, look here too.
893 if (part_attmap == NULL)
899 if (
unlikely(!leaf_part_rri->ri_projectNewInfoValid))
902 /* Initialize state for join condition checking. */
909 /* We ignore the value of found_whole_row. */
910 leaf_part_rri->ri_MergeJoinCondition =
913 foreach(lc, firstMergeActionList)
915 /* Make a copy for this relation to be safe. */
919 /* Generate the action's state for this relation */
923 /* And put the action in the appropriate list */
924 leaf_part_rri->ri_MergeActions[
action->matchKind] =
928 switch (
action->commandType)
933 * ExecCheckPlanOutput() already done on the targetlist
934 * when "first" result relation initialized and it is same
935 * for all result relations.
939 leaf_part_rri->ri_newTupleSlot,
946 * Convert updateColnos from "first" result relation
947 * attribute numbers to this result rel's.
959 leaf_part_rri->ri_newTupleSlot,
968 elog(
ERROR,
"unknown action in MERGE WHEN clause");
971 /* found_whole_row intentionally ignored. */
984 return leaf_part_rri;
988 * ExecInitRoutingInfo
989 * Set up information needed for translating tuples between root
990 * partitioned table format and partition format, and keep track of it
991 * in PartitionTupleRouting.
1000 bool is_borrowed_rel)
1008 * Set up tuple conversion between root parent and the partition if the
1009 * two have different rowtypes. If conversion is indeed required, also
1010 * initialize a slot dedicated to storing this partition's converted
1011 * tuples. Various operations that are applied to tuples after routing,
1012 * such as checking constraints, will refer to this slot.
1019 * This pins the partition's TupleDesc, which will be released at the
1020 * end of the command.
1029 * If the partition is a foreign table, let the FDW init itself for
1030 * routing tuples to the partition.
1037 * Determine if the FDW supports batch insert and determine the batch size
1038 * (a FDW may support batching, but it may be disabled for the
1039 * server/table or for this particular query).
1041 * If the FDW does not support batching, we set the batch size to 1.
1056 * Keep track of it in the PartitionTupleRouting->partitions array.
1062 /* Allocate or enlarge the array, as needed */
1087 dispatch->
indexes[partidx] = rri_index;
1093 * ExecInitPartitionDispatchInfo
1094 * Lock the partitioned table (if not locked already) and initialize
1095 * PartitionDispatch for a partitioned table and store it in the next
1096 * available slot in the proute->partition_dispatch_info array. Also,
1097 * record the index into this array in the parent_pd->indexes[] array in
1098 * the partidx element so that we can properly retrieve the newly created
1099 * PartitionDispatch later.
1114 * For data modification, it is better that executor does not include
1115 * partitions being detached, except when running in snapshot-isolation
1116 * mode. This means that a read-committed transaction immediately gets a
1117 * "no partition for tuple" error when a tuple is inserted into a
1118 * partition that's being detached concurrently, but a transaction in
1119 * repeatable-read mode can still use such a partition.
1129 * Only sub-partitioned tables need to be locked here. The root
1130 * partitioned table will already have been locked as it's referenced in
1131 * the query's rtable.
1140 partdesc->
nparts *
sizeof(int));
1145 if (parent_pd != NULL)
1150 * For sub-partitioned tables where the column order differs from its
1151 * direct parent partitioned table, we must store a tuple table slot
1152 * initialized with its tuple descriptor and a tuple conversion map to
1153 * convert a tuple from its parent's rowtype to its own. This is to
1154 * make sure that we are looking at the correct row using the correct
1155 * tuple descriptor when computing its partition key for tuple
1166 /* Not required for the root partitioned table */
1172 * Initialize with -1 to signify that the corresponding partition's
1173 * ResultRelInfo or PartitionDispatch has not been created yet.
1177 /* Track in PartitionTupleRouting for later use */
1180 /* Allocate or enlarge the array, as needed */
1205 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1206 * also need a minimally valid ResultRelInfo for checking the partition
1207 * constraint later; set that up now.
1220 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1221 * install a downlink in the parent to allow quick descent.
1226 parent_pd->
indexes[partidx] = dispatchidx;
1235 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1238 * Close all the partitioned tables, leaf partitions, and their indices.
1247 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1248 * partitioned table, which we must not try to close, because it is the
1249 * main target table of the query that will be closed by callers such as
1250 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1251 * partitioned table.
1267 /* Allow any FDWs to shut down */
1274 * Close it if it's not one of the result relations borrowed from the
1275 * owning ModifyTableState; those will be closed by ExecEndPlan().
1286 * FormPartitionKeyDatum
1287 * Construct values[] and isnull[] arrays for the partition key
1290 * pd Partition dispatch object of the partitioned table
1291 * slot Heap tuple from which to extract partition key
1292 * estate executor state for evaluating any partition key
1293 * expressions (must be non-NULL)
1294 * values Array of partition key Datums (output area)
1295 * isnull Array of is-null indicators (output area)
1297 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1298 * the heap tuple passed in.
1313 /* Check caller has set up context correctly */
1317 /* First time through, set up expression evaluation state */
1330 /* Plain column; get the value directly from the heap tuple */
1335 /* Expression; need to evaluate it */
1336 if (partexpr_item == NULL)
1337 elog(
ERROR,
"wrong number of partition key expressions");
1347 if (partexpr_item != NULL)
1348 elog(
ERROR,
"wrong number of partition key expressions");
1352 * The number of times the same partition must be found in a row before we
1353 * switch from a binary search for the given values to just checking if the
1354 * values belong to the last found partition. This must be above 0.
1356 #define PARTITION_CACHED_FIND_THRESHOLD 16
1359 * get_partition_for_tuple
1360 * Finds partition of relation which accepts the partition key specified
1361 * in values and isnull.
1363 * Calling this function can be quite expensive when LIST and RANGE
1364 * partitioned tables have many partitions. This is due to the binary search
1365 * that's done to find the correct partition. Many of the use cases for LIST
1366 * and RANGE partitioned tables make it likely that the same partition is
1367 * found in subsequent ExecFindPartition() calls. This is especially true for
1368 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1369 * partition key is the current time. When asked to find a partition for a
1370 * RANGE or LIST partitioned table, we record the partition index and datum
1371 * offset we've found for the given 'values' in the PartitionDesc (which is
1372 * stored in relcache), and if we keep finding the same partition
1373 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1374 * logic and instead of performing a binary search to find the correct
1375 * partition, we'll just double-check that 'values' still belong to the last
1376 * found partition, and if so, we'll return that partition index, thus
1377 * skipping the need for the binary search. If we fail to match the last
1378 * partition when double checking, then we fall back on doing a binary search.
1379 * In this case, unless we find 'values' belong to the DEFAULT partition,
1380 * we'll reset the number of times we've hit the same partition so that we
1381 * don't attempt to use the cache again until we've found that partition at
1382 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1384 * For cases where the partition changes on each lookup, the amount of
1385 * additional work required just amounts to recording the last found partition
1386 * and bound offset then resetting the found counter. This is cheap and does
1387 * not appear to cause any meaningful slowdowns for such cases.
1389 * No caching of partitions is done when the last found partition is the
1390 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1391 * is no bound offset storing the matching datum, so we cannot confirm the
1392 * indexes match. For the NULL partition, this is just so cheap, there's no
1395 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1396 * found or -1 if none found.
1401 int bound_offset = -1;
1402 int part_index = -1;
1408 * In the switch statement below, when we perform a cached lookup for
1409 * RANGE and LIST partitioned tables, if we find that the last found
1410 * partition matches the 'values', we return the partition index right
1411 * away. We do this instead of breaking out of the switch as we don't
1412 * want to execute the code about the DEFAULT partition or do any updates
1413 * for any of the cache-related fields. That would be a waste of effort
1414 * as we already know it's not the DEFAULT partition and have no need to
1415 * increment the number of times we found the same partition any higher
1416 * than PARTITION_CACHED_FIND_THRESHOLD.
1419 /* Route as appropriate based on partitioning strategy. */
1420 switch (
key->strategy)
1426 /* hash partitioning is too cheap to bother caching */
1433 * HASH partitions can't have a DEFAULT partition and we don't
1434 * do any caching work for them, so just return the part index
1442 /* this is far too cheap to bother doing any caching */
1446 * When there is a NULL partition we just return that
1447 * directly. We don't have a bound_offset so it's not
1448 * valid to drop into the code after the switch which
1449 * checks and updates the cache fields. We perhaps should
1450 * be invalidating the details of the last cached
1451 * partition but there's no real need to. Keeping those
1452 * fields set gives a chance at matching to the cached
1453 * partition on the next lookup.
1465 Datum lastDatum = boundinfo->
datums[last_datum_offset][0];
1468 /* does the last found datum index match this datum? */
1470 key->partcollation[0],
1475 return boundinfo->
indexes[last_datum_offset];
1477 /* fall-through and do a manual lookup */
1484 if (bound_offset >= 0 &&
equal)
1485 part_index = boundinfo->
indexes[bound_offset];
1492 range_partkey_has_null =
false;
1496 * No range includes NULL, so this will be accepted by the
1497 * default partition if there is one, and otherwise rejected.
1499 for (
i = 0;
i <
key->partnatts;
i++)
1503 range_partkey_has_null =
true;
1508 /* NULLs belong in the DEFAULT partition */
1509 if (range_partkey_has_null)
1515 Datum *lastDatums = boundinfo->
datums[last_datum_offset];
1519 /* check if the value is >= to the lower bound */
1528 * If it's equal to the lower bound then no need to check
1532 return boundinfo->
indexes[last_datum_offset + 1];
1534 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->
ndatums)
1536 /* check if the value is below the upper bound */
1537 lastDatums = boundinfo->
datums[last_datum_offset + 1];
1538 kind = boundinfo->
kind[last_datum_offset + 1];
1547 return boundinfo->
indexes[last_datum_offset + 1];
1549 /* fall-through and do a manual lookup */
1560 * The bound at bound_offset is less than or equal to the
1561 * tuple value, so the bound at offset+1 is the upper bound of
1562 * the partition we're looking for, if there actually exists
1565 part_index = boundinfo->
indexes[bound_offset + 1];
1570 elog(
ERROR,
"unexpected partition strategy: %d",
1571 (
int)
key->strategy);
1575 * part_index < 0 means we failed to find a partition of this parent. Use
1576 * the default partition, if there is one.
1581 * No need to reset the cache fields here. The next set of values
1582 * might end up belonging to the cached partition, so leaving the
1583 * cache alone improves the chances of a cache hit on the next lookup.
1588 /* we should only make it here when the code above set bound_offset */
1589 Assert(bound_offset >= 0);
1592 * Attend to the cache fields. If the bound_offset matches the last
1593 * cached bound offset then we've found the same partition as last time,
1594 * so bump the count by one. If all goes well, we'll eventually reach
1595 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1596 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1597 * we've found this partition for the first time.
1612 * ExecBuildSlotPartitionKeyDescription
1614 * This works very much like BuildIndexValueDescription() and is currently
1615 * used for building error messages when ExecFindPartition() fails to find
1616 * partition for a row.
1634 /* If the user has table-level access, just go build the description. */
1639 * Step through the columns of the partition key and make sure the
1640 * user has SELECT rights on all of them.
1642 for (
i = 0;
i < partnatts;
i++)
1647 * If this partition key column is an expression, we return no
1648 * detail rather than try to figure out what column(s) the
1649 * expression includes and if the user has SELECT rights on them.
1662 for (
i = 0;
i < partnatts;
i++)
1675 &foutoid, &typisvarlena);
1682 /* truncate if needed */
1683 vallen = strlen(
val);
1684 if (vallen <= maxfieldlen)
1700 * adjust_partition_colnos
1701 * Adjust the list of UPDATE target column numbers to account for
1702 * attribute differences between the parent and the partition.
1704 * Note: mustn't be called if no adjustment is required.
1717 * adjust_partition_colnos_using_map
1718 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1719 * of assuming to map from the "root" result relation.
1721 * Note: mustn't be called if no adjustment is required.
1729 Assert(attrMap != NULL);
/* else we shouldn't be here */
1735 if (parentattrno <= 0 ||
1736 parentattrno > attrMap->
maplen ||
1737 attrMap->
attnums[parentattrno - 1] == 0)
1738 elog(
ERROR,
"unexpected attno %d in target column list",
1741 attrMap->
attnums[parentattrno - 1]);
1747/*-------------------------------------------------------------------------
1748 * Run-Time Partition Pruning Support.
1750 * The following series of functions exist to support the removal of unneeded
1751 * subplans for queries against partitioned tables. The supporting functions
1752 * here are designed to work with any plan type which supports an arbitrary
1753 * number of subplans, e.g. Append, MergeAppend.
1755 * When pruning involves comparison of a partition key to a constant, it's
1756 * done by the planner. However, if we have a comparison to a non-constant
1757 * but not volatile expression, that presents an opportunity for run-time
1758 * pruning by the executor, allowing irrelevant partitions to be skipped
1761 * We must distinguish expressions containing PARAM_EXEC Params from
1762 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1763 * considered to be a stable expression, it can change value from one plan
1764 * node scan to the next during query execution. Stable comparison
1765 * expressions that don't involve such Params allow partition pruning to be
1766 * done once during executor startup. Expressions that do involve such Params
1767 * require us to prune separately for each scan of the parent plan node.
1769 * Note that pruning away unneeded subplans during executor startup has the
1770 * added benefit of not having to initialize the unneeded subplans at all.
1775 * ExecDoInitialPruning:
1776 * Perform runtime "initial" pruning, if necessary, to determine the set
1777 * of child subnodes that need to be initialized during ExecInitNode() for
1778 * all plan nodes that contain a PartitionPruneInfo.
1780 * ExecInitPartitionExecPruning:
1781 * Updates the PartitionPruneState found at given part_prune_index in
1782 * EState.es_part_prune_states for use during "exec" pruning if required.
1783 * Also returns the set of subplans to initialize that would be stored at
1784 * part_prune_index in EState.es_part_prune_results by
1785 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1786 * account for initial pruning possibly having eliminated some of the
1789 * ExecFindMatchingSubPlans:
1790 * Returns indexes of matching subplans after evaluating the expressions
1791 * that are safe to evaluate at a given point. This function is first
1792 * called during ExecDoInitialPruning() to find the initially matching
1793 * subplans based on performing the initial pruning steps and then must be
1794 * called again each time the value of a Param listed in
1795 * PartitionPruneState's 'execparamids' changes.
1796 *-------------------------------------------------------------------------
1801 * ExecDoInitialPruning
1802 * Perform runtime "initial" pruning, if necessary, to determine the set
1803 * of child subnodes that need to be initialized during ExecInitNode() for
1804 * plan nodes that support partition pruning.
1806 * This function iterates over each PartitionPruneInfo entry in
1807 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1808 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1809 * these states through their corresponding indexes in es_part_prune_states and
1810 * assign each state to the parent node's PlanState, from where it will be used
1811 * for "exec" pruning.
1813 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1814 * executes those pruning steps and stores the result as a bitmapset of valid
1815 * child subplans, identifying which subplans should be initialized for
1816 * execution. The results are saved in estate->es_part_prune_results.
1818 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1819 * entry is still added to es_part_prune_results to maintain alignment with
1820 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1821 * use the same index to retrieve the pruning results.
1836 /* Create and save the PartitionPruneState. */
1838 &all_leafpart_rtis);
1843 * Perform initial pruning steps, if any, and save the result
1844 * bitmapset or NULL as described in the header comment.
1848 &validsubplan_rtis);
1850 validsubplan_rtis = all_leafpart_rtis;
1860 * ExecInitPartitionExecPruning
1861 * Initialize the data structures needed for runtime "exec" partition
1862 * pruning and return the result of initial pruning, if available.
1864 * 'relids' identifies the relation to which both the parent plan and the
1865 * PartitionPruneInfo given by 'part_prune_index' belong.
1867 * On return, *initially_valid_subplans is assigned the set of indexes of
1868 * child subplans that must be initialized along with the parent plan node.
1869 * Initial pruning would have been performed by ExecDoInitialPruning(), if
1870 * necessary, and the bitmapset of surviving subplans' indexes would have
1871 * been stored as the part_prune_index'th element of
1872 * EState.es_part_prune_results.
1874 * If subplans were indeed pruned during initial pruning, the subplan_map
1875 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
1876 * subplans, but only if the maps will be needed for subsequent execution
1881 int n_total_subplans,
1882 int part_prune_index,
1890 /* Obtain the pruneinfo we need. */
1894 /* Its relids better match the plan node's or the planner messed up. */
1896 elog(
ERROR,
"wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
1901 * The PartitionPruneState would have been created by
1902 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
1903 * EState.es_part_prune_states.
1906 Assert(prunestate != NULL);
1908 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
1915 /* No pruning, so we'll need to initialize all subplans */
1916 Assert(n_total_subplans > 0);
1918 n_total_subplans - 1);
1922 * The exec pruning state must also be initialized, if needed, before it
1923 * can be used for pruning during execution.
1925 * This also re-sequences subplan indexes contained in prunestate to
1926 * account for any that were removed due to initial pruning; refer to the
1927 * condition in InitExecPartitionPruneContexts() that is used to determine
1928 * whether to do this. If no exec pruning needs to be done, we would thus
1929 * leave the maps to be in an invalid state, but that's ok since that data
1930 * won't be consulted again (cf initial Assert in
1931 * ExecFindMatchingSubPlans).
1935 *initially_valid_subplans,
1942 * CreatePartitionPruneState
1943 * Build the data structure required for calling ExecFindMatchingSubPlans
1945 * This includes PartitionPruneContexts (stored in each
1946 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
1947 * which hold the ExprStates needed to evaluate pruning expressions, and
1948 * mapping arrays to convert partition indexes from the pruning logic
1949 * into subplan indexes in the parent plan node's list of child subplans.
1951 * 'pruneinfo' is a PartitionPruneInfo as generated by
1952 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1953 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1954 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
1955 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
1956 * system is needed to keep from confusing the different hierarchies when a
1957 * UNION ALL contains multiple partitioned tables as children. The data
1958 * stored in each PartitionedRelPruningData can be re-used each time we
1959 * re-evaluate which partitions match the pruning steps provided in each
1960 * PartitionedRelPruneInfo.
1962 * Note that only the PartitionPruneContexts for initial pruning are
1963 * initialized here. Those required for exec pruning are initialized later in
1964 * ExecInitPartitionExecPruning(), as they depend on the availability of the
1965 * parent plan node's PlanState.
1967 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
1968 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
1969 * all leaf partitions whose scanning subnode is included in the parent plan
1970 * node's list of child plans. The caller must add these RT indexes to
1971 * estate->es_unpruned_relids.
1978 int n_part_hierarchies;
1983 * Expression context that will be used by partkey_datum_from_expr() to
1984 * evaluate expressions for comparison against partition bounds.
1988 /* For data reading, executor always includes detached partitions */
1994 Assert(n_part_hierarchies > 0);
1997 * Allocate the data structure
2003 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2006 /* other_subplans can change at runtime, so we need our own copy */
2013 * Create a short-term memory context which we'll use when making calls to
2014 * the partition pruning functions. This avoids possible memory leaks,
2015 * since the pruning functions call comparison functions that aren't under
2027 int npartrelpruneinfos =
list_length(partrelpruneinfos);
2039 foreach(lc2, partrelpruneinfos)
2048 * We can rely on the copies of the partitioned table's partition
2049 * key and partition descriptor appearing in its relcache entry,
2050 * because that entry will be held open and locked for the
2051 * duration of this executor run.
2055 /* Remember for InitExecPartitionPruneContexts(). */
2063 * Initialize the subplan_map and subpart_map.
2065 * The set of partitions that exist now might not be the same that
2066 * existed when the plan was made. The normal case is that it is;
2067 * optimize for that case with a quick comparison, and just copy
2068 * the subplan_map and make subpart_map, leafpart_rti_map point to
2069 * the ones in PruneInfo.
2071 * For the case where they aren't identical, we could have more
2072 * partitions on either side; or even exactly the same number of
2073 * them on both but the set of OIDs doesn't match fully. Handle
2074 * this by creating new subplan_map and subpart_map arrays that
2075 * corresponds to the ones in the PruneInfo where the new
2076 * partition descriptor's OIDs match. Any that don't match can be
2077 * set to -1, as if they were pruned. By construction, both
2078 * arrays are in partition bounds order.
2084 memcmp(partdesc->
oids, pinfo->relid_map,
2085 sizeof(
int) * partdesc->
nparts) == 0)
2090 sizeof(
int) * pinfo->
nparts);
2098 * When the partition arrays are not identical, there could be
2099 * some new ones but it's also possible that one was removed;
2100 * we cope with both situations by walking the arrays and
2101 * discarding those that don't match.
2103 * If the number of partitions on both sides match, it's still
2104 * possible that one partition has been detached and another
2105 * attached. Cope with that by creating a map that skips any
2111 for (pp_idx = 0; pp_idx < partdesc->
nparts; pp_idx++)
2113 /* Skip any InvalidOid relid_map entries */
2114 while (pd_idx < pinfo->nparts &&
2119 if (pd_idx < pinfo->nparts &&
2120 pinfo->relid_map[pd_idx] == partdesc->
oids[pp_idx])
2124 pinfo->subplan_map[pd_idx];
2126 pinfo->subpart_map[pd_idx];
2128 pinfo->leafpart_rti_map[pd_idx];
2134 * There isn't an exact match in the corresponding
2135 * positions of both arrays. Peek ahead in
2136 * pinfo->relid_map to see if we have a match for the
2137 * current partition in partdesc. Normally if a match
2138 * exists it's just one element ahead, and it means the
2139 * planner saw one extra partition that we no longer see
2140 * now (its concurrent detach finished just in between);
2141 * so we skip that one by updating pd_idx to the new
2142 * location and jumping above. We can then continue to
2143 * match the rest of the elements after skipping the OID
2144 * with no match; no future matches are tried for the
2145 * element that was skipped, because we know the arrays to
2146 * be in the same order.
2148 * If we don't see a match anywhere in the rest of the
2149 * pinfo->relid_map array, that means we see an element
2150 * now that the planner didn't see, so mark that one as
2151 * pruned and move on.
2153 for (
int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->
nparts; pd_idx2++)
2155 if (pd_idx2 >= pinfo->
nparts)
2157 if (pinfo->relid_map[pd_idx2] == partdesc->
oids[pp_idx])
2170 /* present_parts is also subject to later modification */
2174 * Only initial_context is initialized here. exec_context is
2175 * initialized during ExecInitPartitionExecPruning() when the
2176 * parent plan's PlanState is available.
2178 * Note that we must skip execution-time (both "init" and "exec")
2179 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2180 * values may be missing.
2188 partdesc, partkey, NULL,
2190 /* Record whether initial pruning is needed at any level */
2197 /* Record whether exec pruning is needed at any level */
2202 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2203 * partitioning decisions at this plan node.
2209 * Return all leaf partition indexes if we're skipping pruning in
2210 * the EXPLAIN (GENERIC_PLAN) case.
2214 int part_index = -1;
2236 * Initialize a PartitionPruneContext for the given list of pruning steps.
2240 List *pruning_steps,
2259 /* We'll look up type-specific support functions as needed */
2267 /* Initialize expression state for each expression we need */
2270 foreach(lc, pruning_steps)
2276 /* not needed for other step kinds */
2282 for (keyno = 0; keyno < partnatts; keyno++)
2291 /* not needed for Consts */
2299 * When planstate is NULL, pruning_steps is known not to
2300 * contain any expressions that depend on the parent plan.
2301 * Information of any available EXTERN parameters must be
2302 * passed explicitly in that case, which the caller must
2303 * have made available via econtext.
2305 if (planstate == NULL)
2320 * InitExecPartitionPruneContexts
2321 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2323 * This function finalizes exec pruning setup for a PartitionPruneState by
2324 * initializing contexts for pruning steps that require the parent plan's
2325 * PlanState. It iterates over PartitionPruningData entries and sets up the
2326 * necessary execution contexts for pruning during query execution.
2328 * Also fix the mapping of partition indexes to subplan indexes contained in
2329 * prunestate by considering the new list of subplans that survived initial
2332 * Current values of the indexes present in PartitionPruneState count all the
2333 * subplans that would be present before initial pruning was done. If initial
2334 * pruning got rid of some of the subplans, any subsequent pruning passes will
2335 * be looking at a different set of target subplans to choose from than those
2336 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2337 * containing those indexes must be updated to reflect the new indexes of
2338 * subplans in the post-initial-pruning set.
2344 int n_total_subplans)
2347 int *new_subplan_indexes = NULL;
2351 bool fix_subplan_map =
false;
2354 Assert(parent_plan != NULL);
2355 estate = parent_plan->
state;
2358 * No need to fix subplans maps if initial pruning didn't eliminate any
2363 fix_subplan_map =
true;
2366 * First we must build a temporary array which maps old subplan
2367 * indexes to new ones. For convenience of initialization, we use
2368 * 1-based indexes in this array and leave pruned items as 0.
2370 new_subplan_indexes = (
int *)
palloc0(
sizeof(
int) * n_total_subplans);
2376 new_subplan_indexes[
i] = newidx++;
2381 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2382 * subplan indexes. We must also recompute its present_parts bitmap.
2390 * Within each hierarchy, we perform this loop in back-to-front order
2391 * so that we determine present_parts for the lowest-level partitioned
2392 * tables first. This way we can tell whether a sub-partitioned
2393 * table's partitions were entirely pruned so we can exclude it from
2394 * the current level's present_parts.
2399 int nparts = pprune->
nparts;
2402 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2409 * See the comment in CreatePartitionPruneState() regarding
2410 * the usage of partdesc and partkey.
2418 partdesc, partkey, parent_plan,
2422 if (!fix_subplan_map)
2425 /* We just rebuild present_parts from scratch */
2429 for (k = 0; k < nparts; k++)
2435 * If this partition existed as a subplan then change the old
2436 * subplan index to the new subplan index. The new index may
2437 * become -1 if the partition was pruned above, or it may just
2438 * come earlier in the subplan list due to some subplans being
2439 * removed earlier in the list. If it's a subpartition, add
2440 * it to present_parts unless it's entirely pruned.
2444 Assert(oldidx < n_total_subplans);
2445 pprune->
subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2447 if (new_subplan_indexes[oldidx] > 0)
2466 * If we fixed subplan maps, we must also recompute the other_subplans
2467 * set, since indexes in it may change.
2469 if (fix_subplan_map)
2471 new_other_subplans = NULL;
2475 new_subplan_indexes[
i] - 1);
2480 pfree(new_subplan_indexes);
2485 * ExecFindMatchingSubPlans
2486 * Determine which subplans match the pruning steps detailed in
2487 * 'prunestate' for the current comparison expression values.
2489 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2490 * differentiates the initial executor-time pruning step from later
2493 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2494 * to collect the RT indexes of leaf partitions whose subnodes will be
2495 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2507 * Either we're here on the initial prune done during pruning
2508 * initialization, or we're at a point where PARAM_EXEC Params can be
2509 * evaluated *and* there are steps in which to do so.
2512 Assert(validsubplan_rtis != NULL || !initial_prune);
2515 * Switch to a temp context to avoid leaking memory in the executor's
2516 * query-lifespan memory context.
2521 * For each hierarchy, do the pruning tests, and add nondeletable
2522 * subplans' indexes to "result".
2530 * We pass the zeroth item, belonging to the root table of the
2531 * hierarchy, and find_matching_subplans_recurse() takes care of
2532 * recursing to other (lower-level) parents as needed.
2536 &result, validsubplan_rtis);
2539 * Expression eval may have used space in ExprContext too. Avoid
2540 * accessing exec_context during initial pruning, as it is not valid
2547 /* Add in any subplans that partition pruning didn't account for */
2552 /* Copy result out of the temp context before we reset it */
2554 if (validsubplan_rtis)
2555 *validsubplan_rtis =
bms_copy(*validsubplan_rtis);
2563 * find_matching_subplans_recurse
2564 * Recursive worker function for ExecFindMatchingSubPlans
2566 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2567 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2568 * corresponding partitions, but only if they are leaf partitions.
2580 /* Guard against stack overflow due to overly deep partition hierarchy. */
2584 * Prune as appropriate, if we have pruning steps matching the current
2585 * execution context. Otherwise just include all partitions at this
2597 /* Translate partset into subplan indexes */
2607 * Only report leaf partitions. Non-leaf partitions may appear
2608 * here when they use an unflattened Append or MergeAppend.
2621 initial_prune, validsubplans,
2626 * We get here if the planner already pruned all the sub-
2627 * partitions for this partition. Silently ignore this
2628 * partition in this case. The end result is the same: we
2629 * would have pruned all partitions just the same, but we
2630 * don't have any pruning steps to execute to verify this.
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
#define InvalidAttrNumber
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
int bms_next_member(const Bitmapset *a, int prevbit)
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
void bms_free(Bitmapset *a)
int bms_num_members(const Bitmapset *a)
bool bms_is_member(int x, const Bitmapset *a)
Bitmapset * bms_add_member(Bitmapset *a, int x)
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_copy(const Bitmapset *a)
static Datum values[MAXATTR]
#define FLEXIBLE_ARRAY_MEMBER
#define OidIsValid(objectId)
int errdetail(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
bool equal(const void *a, const void *b)
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
ExprState * ExecInitQual(List *qual, PlanState *parent)
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
List * ExecPrepareExprList(List *nodes, EState *estate)
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, OnConflictAction onConflictAction, List *mergeActions)
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, Datum *values, bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
struct PartitionDispatchData PartitionDispatchData
static int get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
struct PartitionedRelPruningData PartitionedRelPruningData
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsVirtual
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
ExprContext * CreateExprContext(EState *estate)
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
#define GetPerTupleExprContext(estate)
#define EXEC_FLAG_EXPLAIN_GENERIC
#define ResetExprContext(econtext)
#define GetPerTupleMemoryContext(estate)
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
char * OidOutputFunctionCall(Oid functionId, Datum val)
Assert(PointerIsAligned(start, uint64))
List * lappend(List *list, void *datum)
List * lappend_int(List *list, int datum)
List * lappend_oid(List *list, Oid datum)
void list_free(List *list)
bool list_member_oid(const List *list, Oid datum)
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
int pg_mbcliplen(const char *mbstr, int len, int limit)
void MemoryContextReset(MemoryContext context)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
MemoryContext CurrentMemoryContext
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define CHECK_FOR_INTERRUPTS()
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
#define castNode(_type_, nodeptr)
char * bmsToString(const Bitmapset *bms)
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
@ PARTITION_STRATEGY_HASH
@ PARTITION_STRATEGY_LIST
@ PARTITION_STRATEGY_RANGE
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, Datum *rb_datums, PartitionRangeDatumKind *rb_kind, Datum *tuple_datums, int n_tuple_datums)
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, Datum *values, bool *is_equal)
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
#define partition_bound_accepts_nulls(bi)
PartitionKey RelationGetPartitionKey(Relation rel)
static int16 get_partition_col_attnum(PartitionKey key, int col)
static int get_partition_natts(PartitionKey key)
static Oid get_partition_col_typid(PartitionKey key, int col)
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
List * get_partition_ancestors(Oid relid)
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
#define PruneCxtStateIdx(partnatts, step_id, keyno)
#define PARTITION_MAX_KEYS
#define lfirst_node(type, lc)
static int list_length(const List *l)
static void * list_nth(const List *list, int n)
static ListCell * list_head(const List *l)
#define list_nth_node(type, list, n)
static ListCell * lnext(const List *l, const ListCell *c)
static int32 DatumGetInt32(Datum X)
#define RelationGetForm(relation)
#define RelationGetRelid(relation)
#define RelationGetDescr(relation)
#define RelationGetRelationName(relation)
List * RelationGetIndexList(Relation relation)
int errtable(Relation rel)
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
void check_stack_depth(void)
void appendStringInfo(StringInfo str, const char *fmt,...)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
void appendStringInfoString(StringInfo str, const char *s)
void appendStringInfoChar(StringInfo str, char ch)
void initStringInfo(StringInfo str)
List * es_part_prune_infos
List * es_tuple_routing_result_relations
Bitmapset * es_unpruned_relids
List * es_part_prune_states
MemoryContext es_query_cxt
PartitionDirectory es_partition_directory
List * es_part_prune_results
ParamListInfo ecxt_param_list_info
TupleTableSlot * ecxt_scantuple
struct EState * ecxt_estate
EndForeignInsert_function EndForeignInsert
BeginForeignInsert_function BeginForeignInsert
ExecForeignBatchInsert_function ExecForeignBatchInsert
GetForeignModifyBatchSize_function GetForeignModifyBatchSize
ProjectionInfo * mas_proj
ResultRelInfo * resultRelInfo
ResultRelInfo * rootResultRelInfo
List * mergeJoinConditions
List * withCheckOptionLists
OnConflictAction onConflictAction
TupleTableSlot * oc_ProjSlot
TupleTableSlot * oc_Existing
ExprState * oc_WhereClause
ProjectionInfo * oc_ProjInfo
PartitionRangeDatumKind ** kind
int last_found_datum_index
PartitionBoundInfo boundinfo
int last_found_part_index
int indexes[FLEXIBLE_ARRAY_MEMBER]
PartitionStrategy strategy
ExprContext * exprcontext
PartitionBoundInfo boundinfo
Bitmapset * other_subplans
PartitionPruningData * partprunedata[FLEXIBLE_ARRAY_MEMBER]
Bitmapset * other_subplans
MemoryContext prune_context
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]
PartitionDispatch * partition_dispatch_info
ResultRelInfo ** partitions
ResultRelInfo ** nonleaf_partitions
Bitmapset * present_parts
List * initial_pruning_steps
List * exec_pruning_steps
List * exec_pruning_steps
PartitionPruneContext exec_context
PartitionPruneContext initial_context
Bitmapset * present_parts
List * initial_pruning_steps
ExprContext * ps_ExprContext
TupleTableSlot * ps_ResultTupleSlot
TupleTableSlot * ri_PartitionTupleSlot
OnConflictSetState * ri_onConflict
List * ri_onConflictArbiterIndexes
struct CopyMultiInsertBuffer * ri_CopyMultiInsertBuffer
struct FdwRoutine * ri_FdwRoutine
void table_close(Relation relation, LOCKMODE lockmode)
Relation table_open(Oid relationId, LOCKMODE lockmode)
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
#define IsolationUsesXactSnapshot()