1/*-------------------------------------------------------------------------
4 * executor utility routines for grouping, hashing, and aggregation
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/execGrouping.c
13 *-------------------------------------------------------------------------
31 * Define parameters for tuple hash table code generation. The interface is
32 * *also* declared in execnodes.h (to generate the types, which are externally
35 #define SH_PREFIX tuplehash
36 #define SH_ELEMENT_TYPE TupleHashEntryData
37 #define SH_KEY_TYPE MinimalTuple
38 #define SH_KEY firstTuple
39 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
40 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
41 #define SH_SCOPE extern
43 #define SH_GET_HASH(tb, a) a->hash
48/*****************************************************************************
49 * Utility routines for grouping tuples together
50 *****************************************************************************/
53 * execTuplesMatchPrepare
54 * Build expression that can be evaluated using ExecQual(), returning
55 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
61 const Oid *eqOperators,
62 const Oid *collations,
74 /* lookup equality functions */
75 for (
i = 0;
i < numCols;
i++)
78 /* build actual expression */
80 numCols, keyColIdx, eqFunctions, collations,
87 * execTuplesHashPrepare
88 * Look up the equality and hashing functions needed for a TupleHashTable.
90 * This is similar to execTuplesMatchPrepare, but we also need to find the
91 * hash functions associated with the equality operators. *eqFunctions and
92 * *hashFunctions receive the palloc'd result arrays.
94 * Note: we expect that the given operators are not cross-type comparisons.
98 const Oid *eqOperators,
107 for (
i = 0;
i < numCols;
i++)
109 Oid eq_opr = eqOperators[
i];
111 Oid left_hash_function;
112 Oid right_hash_function;
116 &left_hash_function, &right_hash_function))
117 elog(
ERROR,
"could not find hash function for hash operator %u",
119 /* We're not supporting cross-type cases here */
120 Assert(left_hash_function == right_hash_function);
121 (*eqFuncOids)[
i] = eq_function;
122 fmgr_info(right_hash_function, &(*hashFunctions)[
i]);
127/*****************************************************************************
128 * Utility routines for all-in-memory hash tables
130 * These routines build hash tables for grouping tuples together (eg, for
131 * hash aggregation). There is one entry for each not-distinct set of tuples
133 *****************************************************************************/
136 * Construct an empty TupleHashTable
138 * parent: PlanState node that will own this hash table
139 * inputDesc: tuple descriptor for input tuples
140 * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
141 * numCols: number of columns to be compared (length of next 4 arrays)
142 * keyColIdx: indexes of tuple columns to compare
143 * eqfuncoids: OIDs of equality comparison functions to use
144 * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
145 * collations: collations to use in comparisons
146 * nbuckets: initial estimate of hashtable size
147 * additionalsize: size of data that may be stored along with the hash entry
148 * metacxt: memory context for long-lived allocation, but not per-entry data
149 * tablecxt: memory context in which to store table entries
150 * tempcxt: short-lived context for evaluation hash and comparison functions
151 * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
153 * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
154 * are not cross-type functions, but expect to see the table datatype(s)
157 * Note that the keyColIdx, hashfunctions, and collations arrays must be
158 * allocated in storage that will live as long as the hashtable does.
160 * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
161 * memory in the tempcxt. It is caller's responsibility to reset that context
162 * reasonably often, typically once per tuple. (We do it that way, rather
163 * than managing an extra context within the hashtable, because in many cases
164 * the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
172 const Oid *eqfuncoids,
180 bool use_variable_hash_iv)
190 additionalsize =
MAXALIGN(additionalsize);
193 /* Limit initial table size request to not more than hash_mem */
195 if (nbuckets > hash_mem_limit)
196 nbuckets = hash_mem_limit;
208 hashtable->
tableslot = NULL;
/* will be made on first lookup */
214 * If parallelism is in use, even if the leader backend is performing the
215 * scan itself, we don't want to create the hashtable exactly the same way
216 * in all workers. As hashtables are iterated over in keyspace-order,
217 * doing so in all processes in the same way is likely to lead to
218 * "unbalanced" hashtables when the table size initially is
221 if (use_variable_hash_iv)
224 hashtable->
hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
227 * We copy the input tuple descriptor just for safety --- we assume all
228 * input tuples will have equivalent descriptors.
234 * If the caller fails to make the metacxt different from the tablecxt,
235 * allowing JIT would lead to the generated functions to a) live longer
236 * than the query or b) be re-generated each time the table is being
237 * reset. Therefore prevent JIT from being used in that case, by not
238 * providing a parent node (which prevents accessing the JitContext in the
241 allow_jit = (metacxt != tablecxt);
243 /* build hash ExprState for all columns */
250 allow_jit ? parent : NULL,
253 /* build comparator for all columns */
258 keyColIdx, eqfuncoids, collations,
259 allow_jit ? parent : NULL);
262 * While not pretty, it's ok to not shut down this context, but instead
263 * rely on the containing memory context being reset, as
264 * ExecBuildGroupingEqual() only builds a very simple expression calling
265 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
275 * Reset contents of the hashtable to be empty, preserving all the non-content
276 * state. Note that the tablecxt passed to BuildTupleHashTable() should
277 * also be reset, otherwise there will be leaks.
282 tuplehash_reset(hashtable->
hashtab);
286 * Find or create a hashtable entry for the tuple group containing the
287 * given tuple. The tuple must be the same type as the hashtable entries.
289 * If isnew is NULL, we do not create new entries; we return NULL if no
292 * If hash is not NULL, we set it to the calculated hash value. This allows
293 * callers access to the hash value even if no entry is returned.
295 * If isnew isn't NULL, then a new entry is created if no existing entry
296 * matches. On return, *isnew is true if the entry is newly created,
297 * false if it existed already. The additional data in the new entry has
308 /* Need to run the hash functions in short-lived context */
311 /* set up data needed by hash and match functions */
322 Assert(entry == NULL || entry->
hash == local_hash);
330 * Compute the hash value for a tuple
341 /* Need to run the hash functions in short-lived context */
352 * A variant of LookupTupleHashEntry for callers that have already computed
362 /* Need to run the hash functions in short-lived context */
365 /* set up data needed by hash and match functions */
379 * Search for a hashtable entry matching the given tuple. No entry is
380 * created if there's not a match. This is similar to the non-creating
381 * case of LookupTupleHashEntry, except that it supports cross-type
382 * comparisons, in which the given tuple is not of the same type as the
383 * table entries. The caller must provide the hash ExprState to use for
384 * the input tuple, as well as the equality ExprState, since these may be
385 * different from the table's internal functions.
396 /* Need to run the hash functions in short-lived context */
399 /* Set up data needed by hash and match functions */
404 /* Search the hash table */
405 key = NULL;
/* flag to reference inputslot */
406 entry = tuplehash_lookup(hashtable->
hashtab,
key);
413 * If tuple is NULL, use the input slot instead. This convention avoids the
414 * need to materialize virtual input tuples unless they actually need to get
415 * copied into the table.
417 * Also, the caller must select an appropriate memory context for running
418 * the hash functions.
431 /* Process the current input tuple for the table */
440 * Process a tuple already stored in the table.
442 * (this case never actually occurs due to the way simplehash.h is
443 * used, as the hash-value is stored in the entries)
453 * The hashing done above, even with an initial value, doesn't tend to
454 * result in good hash perturbation. Running the value produced above
455 * through murmurhash32 leads to near perfect hash perturbation.
461 * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
462 * so that we can avoid switching the memory context multiple times for
463 * LookupTupleHashEntry.
465 * NB: This function may or may not change the memory context. Caller is
466 * expected to change it back.
476 key = NULL;
/* flag to reference inputslot */
480 entry = tuplehash_insert_hash(hashtable->
hashtab,
key,
hash, &found);
484 /* found pre-existing entry */
489 /* created new entry */
495 * Copy the first tuple into the table context, and request
496 * additionalsize extra bytes before the allocation.
498 * The caller can get a pointer to the additional data with
499 * TupleHashEntryGetAdditional(), and store arbitrary data there.
500 * Placing both the tuple and additional data in the same
501 * allocation avoids the need to store an extra pointer in
502 * TupleHashEntryData or allocate an additional chunk.
517 * See whether two tuples (presumably of the same hash value) match
528 * We assume that simplehash.h will only ever call us with the first
529 * argument being an actual table entry, and the second argument being
530 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
531 * could be supported too, but is not currently required.
539 /* For crosstype comparisons, the inputslot must be first */
540 econtext->ecxt_innertuple = slot2;
541 econtext->ecxt_outertuple = slot1;
ExprState * ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, FmgrInfo *hashfunctions, Oid *collations, int numCols, AttrNumber *keyColIdx, PlanState *parent, uint32 init_value)
ExprState * ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, const Oid *collations, PlanState *parent)
ExprState * execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, const Oid *collations, PlanState *parent)
void execTuplesHashPrepare(int numCols, const Oid *eqOperators, Oid **eqFuncOids, FmgrInfo **hashFunctions)
TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 *hash)
static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple)
uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, ExprState *hashexpr)
static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
TupleHashTable BuildTupleHashTable(PlanState *parent, TupleDesc inputDesc, const TupleTableSlotOps *inputOps, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv)
void ResetTupleHashTable(TupleHashTable hashtable)
static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
const TupleTableSlotOps TTSOpsMinimalTuple
ExprContext * CreateStandaloneExprContext(void)
struct TupleHashTableData * TupleHashTable
struct TupleHashEntryData TupleHashEntryData
static bool ExecQualAndReset(ExprState *state, ExprContext *econtext)
static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)
void fmgr_info(Oid functionId, FmgrInfo *finfo)
static uint32 murmurhash32(uint32 data)
Assert(PointerIsAligned(start, uint64))
if(TABLE==NULL||TABLE_index==NULL)
RegProcedure get_opcode(Oid opno)
bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno)
size_t get_hash_memory_limit(void)
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
static uint32 DatumGetUInt32(Datum X)
static unsigned hash(unsigned *uv, int n)
TupleTableSlot * ecxt_innertuple
ExprState * tab_hash_expr
TupleTableSlot * tableslot
ExprContext * exprcontext
TupleTableSlot * inputslot
TupleDesc CreateTupleDescCopy(TupleDesc tupdesc)
static MinimalTuple ExecCopySlotMinimalTupleExtra(TupleTableSlot *slot, Size extra)