[フレーム]

execGrouping.c

1/*-------------------------------------------------------------------------

2 *

3 * execGrouping.c

4 * executor utility routines for grouping, hashing, and aggregation

5 *

8 *

9 *

10 * IDENTIFICATION

11 * src/backend/executor/execGrouping.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#include "postgres.h"

16

17#include "access/parallel.h"

18#include "common/hashfn.h"

19#include "executor/executor.h"

20#include "miscadmin.h"

21#include "utils/lsyscache.h"

22

23static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);

24static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,

25 const MinimalTuple tuple);

26static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,

27 TupleTableSlot *slot,

28 bool *isnew, uint32 hash);

29

30/*

31 * Define parameters for tuple hash table code generation. The interface is

32 * *also* declared in execnodes.h (to generate the types, which are externally

33 * visible).

34 */

35 #define SH_PREFIX tuplehash

36 #define SH_ELEMENT_TYPE TupleHashEntryData

37 #define SH_KEY_TYPE MinimalTuple

38 #define SH_KEY firstTuple

39 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)

40 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0

41 #define SH_SCOPE extern

42 #define SH_STORE_HASH

43 #define SH_GET_HASH(tb, a) a->hash

44 #define SH_DEFINE

45#include "lib/simplehash.h"

46

47

48/*****************************************************************************

49 * Utility routines for grouping tuples together

50 *****************************************************************************/

51

52/*

53 * execTuplesMatchPrepare

54 * Build expression that can be evaluated using ExecQual(), returning

55 * whether an ExprContext's inner/outer tuples are NOT DISTINCT

56 */

57ExprState *

58 execTuplesMatchPrepare(TupleDesc desc,

59 int numCols,

60 const AttrNumber *keyColIdx,

61 const Oid *eqOperators,

62 const Oid *collations,

63 PlanState *parent)

64{

65 Oid *eqFunctions;

66 int i;

67 ExprState *expr;

68

69 if (numCols == 0)

70 return NULL;

71

72 eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));

73

74 /* lookup equality functions */

75 for (i = 0; i < numCols; i++)

76 eqFunctions[i] = get_opcode(eqOperators[i]);

77

78 /* build actual expression */

79 expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,

80 numCols, keyColIdx, eqFunctions, collations,

81 parent);

82

83 return expr;

84}

85

86/*

87 * execTuplesHashPrepare

88 * Look up the equality and hashing functions needed for a TupleHashTable.

89 *

90 * This is similar to execTuplesMatchPrepare, but we also need to find the

91 * hash functions associated with the equality operators. *eqFunctions and

92 * *hashFunctions receive the palloc'd result arrays.

93 *

94 * Note: we expect that the given operators are not cross-type comparisons.

95 */

96void

97 execTuplesHashPrepare(int numCols,

98 const Oid *eqOperators,

99 Oid **eqFuncOids,

100 FmgrInfo **hashFunctions)

101{

102 int i;

103

104 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));

105 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));

106

107 for (i = 0; i < numCols; i++)

108 {

109 Oid eq_opr = eqOperators[i];

110 Oid eq_function;

111 Oid left_hash_function;

112 Oid right_hash_function;

113

114 eq_function = get_opcode(eq_opr);

115 if (!get_op_hash_functions(eq_opr,

116 &left_hash_function, &right_hash_function))

117 elog(ERROR, "could not find hash function for hash operator %u",

118 eq_opr);

119 /* We're not supporting cross-type cases here */

120 Assert(left_hash_function == right_hash_function);

121 (*eqFuncOids)[i] = eq_function;

122 fmgr_info(right_hash_function, &(*hashFunctions)[i]);

123 }

124}

125

126

127/*****************************************************************************

128 * Utility routines for all-in-memory hash tables

129 *

130 * These routines build hash tables for grouping tuples together (eg, for

131 * hash aggregation). There is one entry for each not-distinct set of tuples

132 * presented.

133 *****************************************************************************/

134

135/*

136 * Construct an empty TupleHashTable

137 *

138 * parent: PlanState node that will own this hash table

139 * inputDesc: tuple descriptor for input tuples

140 * inputOps: slot ops for input tuples, or NULL if unknown or not fixed

141 * numCols: number of columns to be compared (length of next 4 arrays)

142 * keyColIdx: indexes of tuple columns to compare

143 * eqfuncoids: OIDs of equality comparison functions to use

144 * hashfunctions: FmgrInfos of datatype-specific hashing functions to use

145 * collations: collations to use in comparisons

146 * nbuckets: initial estimate of hashtable size

147 * additionalsize: size of data that may be stored along with the hash entry

148 * metacxt: memory context for long-lived allocation, but not per-entry data

149 * tablecxt: memory context in which to store table entries

150 * tempcxt: short-lived context for evaluation hash and comparison functions

151 * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker

152 *

153 * The hashfunctions array may be made with execTuplesHashPrepare(). Note they

154 * are not cross-type functions, but expect to see the table datatype(s)

155 * on both sides.

156 *

157 * Note that the keyColIdx, hashfunctions, and collations arrays must be

158 * allocated in storage that will live as long as the hashtable does.

159 *

160 * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak

161 * memory in the tempcxt. It is caller's responsibility to reset that context

162 * reasonably often, typically once per tuple. (We do it that way, rather

163 * than managing an extra context within the hashtable, because in many cases

164 * the caller can specify a tempcxt that it needs to reset per-tuple anyway.)

165 */

166TupleHashTable

167 BuildTupleHashTable(PlanState *parent,

168 TupleDesc inputDesc,

169 const TupleTableSlotOps *inputOps,

170 int numCols,

171 AttrNumber *keyColIdx,

172 const Oid *eqfuncoids,

173 FmgrInfo *hashfunctions,

174 Oid *collations,

175 long nbuckets,

176 Size additionalsize,

177 MemoryContext metacxt,

178 MemoryContext tablecxt,

179 MemoryContext tempcxt,

180 bool use_variable_hash_iv)

181{

182 TupleHashTable hashtable;

183 Size entrysize;

184 Size hash_mem_limit;

185 MemoryContext oldcontext;

186 bool allow_jit;

187 uint32 hash_iv = 0;

188

189 Assert(nbuckets > 0);

190 additionalsize = MAXALIGN(additionalsize);

191 entrysize = sizeof(TupleHashEntryData) + additionalsize;

192

193 /* Limit initial table size request to not more than hash_mem */

194 hash_mem_limit = get_hash_memory_limit() / entrysize;

195 if (nbuckets > hash_mem_limit)

196 nbuckets = hash_mem_limit;

197

198 oldcontext = MemoryContextSwitchTo(metacxt);

199

200 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));

201

202 hashtable->numCols = numCols;

203 hashtable->keyColIdx = keyColIdx;

204 hashtable->tab_collations = collations;

205 hashtable->tablecxt = tablecxt;

206 hashtable->tempcxt = tempcxt;

207 hashtable->additionalsize = additionalsize;

208 hashtable->tableslot = NULL; /* will be made on first lookup */

209 hashtable->inputslot = NULL;

210 hashtable->in_hash_expr = NULL;

211 hashtable->cur_eq_func = NULL;

212

213 /*

214 * If parallelism is in use, even if the leader backend is performing the

215 * scan itself, we don't want to create the hashtable exactly the same way

216 * in all workers. As hashtables are iterated over in keyspace-order,

217 * doing so in all processes in the same way is likely to lead to

218 * "unbalanced" hashtables when the table size initially is

219 * underestimated.

220 */

221 if (use_variable_hash_iv)

222 hash_iv = murmurhash32(ParallelWorkerNumber);

223

224 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);

225

226 /*

227 * We copy the input tuple descriptor just for safety --- we assume all

228 * input tuples will have equivalent descriptors.

229 */

230 hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),

231 &TTSOpsMinimalTuple);

232

233 /*

234 * If the caller fails to make the metacxt different from the tablecxt,

235 * allowing JIT would lead to the generated functions to a) live longer

236 * than the query or b) be re-generated each time the table is being

237 * reset. Therefore prevent JIT from being used in that case, by not

238 * providing a parent node (which prevents accessing the JitContext in the

239 * EState).

240 */

241 allow_jit = (metacxt != tablecxt);

242

243 /* build hash ExprState for all columns */

244 hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,

245 inputOps,

246 hashfunctions,

247 collations,

248 numCols,

249 keyColIdx,

250 allow_jit ? parent : NULL,

251 hash_iv);

252

253 /* build comparator for all columns */

254 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,

255 inputOps,

256 &TTSOpsMinimalTuple,

257 numCols,

258 keyColIdx, eqfuncoids, collations,

259 allow_jit ? parent : NULL);

260

261 /*

262 * While not pretty, it's ok to not shut down this context, but instead

263 * rely on the containing memory context being reset, as

264 * ExecBuildGroupingEqual() only builds a very simple expression calling

265 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).

266 */

267 hashtable->exprcontext = CreateStandaloneExprContext();

268

269 MemoryContextSwitchTo(oldcontext);

270

271 return hashtable;

272}

273

274/*

275 * Reset contents of the hashtable to be empty, preserving all the non-content

276 * state. Note that the tablecxt passed to BuildTupleHashTable() should

277 * also be reset, otherwise there will be leaks.

278 */

279void

280 ResetTupleHashTable(TupleHashTable hashtable)

281{

282 tuplehash_reset(hashtable->hashtab);

283}

284

285/*

286 * Find or create a hashtable entry for the tuple group containing the

287 * given tuple. The tuple must be the same type as the hashtable entries.

288 *

289 * If isnew is NULL, we do not create new entries; we return NULL if no

290 * match is found.

291 *

292 * If hash is not NULL, we set it to the calculated hash value. This allows

293 * callers access to the hash value even if no entry is returned.

294 *

295 * If isnew isn't NULL, then a new entry is created if no existing entry

296 * matches. On return, *isnew is true if the entry is newly created,

297 * false if it existed already. The additional data in the new entry has

298 * been zeroed.

299 */

300TupleHashEntry

301 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,

302 bool *isnew, uint32 *hash)

303{

304 TupleHashEntry entry;

305 MemoryContext oldContext;

306 uint32 local_hash;

307

308 /* Need to run the hash functions in short-lived context */

309 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

310

311 /* set up data needed by hash and match functions */

312 hashtable->inputslot = slot;

313 hashtable->in_hash_expr = hashtable->tab_hash_expr;

314 hashtable->cur_eq_func = hashtable->tab_eq_func;

315

316 local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);

317 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);

318

319 if (hash != NULL)

320 *hash = local_hash;

321

322 Assert(entry == NULL || entry->hash == local_hash);

323

324 MemoryContextSwitchTo(oldContext);

325

326 return entry;

327}

328

329/*

330 * Compute the hash value for a tuple

331 */

332uint32

333 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)

334{

335 MemoryContext oldContext;

336 uint32 hash;

337

338 hashtable->inputslot = slot;

339 hashtable->in_hash_expr = hashtable->tab_hash_expr;

340

341 /* Need to run the hash functions in short-lived context */

342 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

343

344 hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);

345

346 MemoryContextSwitchTo(oldContext);

347

348 return hash;

349}

350

351/*

352 * A variant of LookupTupleHashEntry for callers that have already computed

353 * the hash value.

354 */

355TupleHashEntry

356 LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,

357 bool *isnew, uint32 hash)

358{

359 TupleHashEntry entry;

360 MemoryContext oldContext;

361

362 /* Need to run the hash functions in short-lived context */

363 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

364

365 /* set up data needed by hash and match functions */

366 hashtable->inputslot = slot;

367 hashtable->in_hash_expr = hashtable->tab_hash_expr;

368 hashtable->cur_eq_func = hashtable->tab_eq_func;

369

370 entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);

371 Assert(entry == NULL || entry->hash == hash);

372

373 MemoryContextSwitchTo(oldContext);

374

375 return entry;

376}

377

378/*

379 * Search for a hashtable entry matching the given tuple. No entry is

380 * created if there's not a match. This is similar to the non-creating

381 * case of LookupTupleHashEntry, except that it supports cross-type

382 * comparisons, in which the given tuple is not of the same type as the

383 * table entries. The caller must provide the hash ExprState to use for

384 * the input tuple, as well as the equality ExprState, since these may be

385 * different from the table's internal functions.

386 */

387TupleHashEntry

388 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,

389 ExprState *eqcomp,

390 ExprState *hashexpr)

391{

392 TupleHashEntry entry;

393 MemoryContext oldContext;

394 MinimalTuple key;

395

396 /* Need to run the hash functions in short-lived context */

397 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

398

399 /* Set up data needed by hash and match functions */

400 hashtable->inputslot = slot;

401 hashtable->in_hash_expr = hashexpr;

402 hashtable->cur_eq_func = eqcomp;

403

404 /* Search the hash table */

405 key = NULL; /* flag to reference inputslot */

406 entry = tuplehash_lookup(hashtable->hashtab, key);

407 MemoryContextSwitchTo(oldContext);

408

409 return entry;

410}

411

412/*

413 * If tuple is NULL, use the input slot instead. This convention avoids the

414 * need to materialize virtual input tuples unless they actually need to get

415 * copied into the table.

416 *

417 * Also, the caller must select an appropriate memory context for running

418 * the hash functions.

419 */

420static uint32

421 TupleHashTableHash_internal(struct tuplehash_hash *tb,

422 const MinimalTuple tuple)

423{

424 TupleHashTable hashtable = (TupleHashTable) tb->private_data;

425 uint32 hashkey;

426 TupleTableSlot *slot;

427 bool isnull;

428

429 if (tuple == NULL)

430 {

431 /* Process the current input tuple for the table */

432 hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;

433 hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,

434 hashtable->exprcontext,

435 &isnull));

436 }

437 else

438 {

439 /*

440 * Process a tuple already stored in the table.

441 *

442 * (this case never actually occurs due to the way simplehash.h is

443 * used, as the hash-value is stored in the entries)

444 */

445 slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;

446 ExecStoreMinimalTuple(tuple, slot, false);

447 hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,

448 hashtable->exprcontext,

449 &isnull));

450 }

451

452 /*

453 * The hashing done above, even with an initial value, doesn't tend to

454 * result in good hash perturbation. Running the value produced above

455 * through murmurhash32 leads to near perfect hash perturbation.

456 */

457 return murmurhash32(hashkey);

458}

459

460/*

461 * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful

462 * so that we can avoid switching the memory context multiple times for

463 * LookupTupleHashEntry.

464 *

465 * NB: This function may or may not change the memory context. Caller is

466 * expected to change it back.

467 */

468static inline TupleHashEntry

469 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,

470 bool *isnew, uint32 hash)

471{

472 TupleHashEntryData *entry;

473 bool found;

474 MinimalTuple key;

475

476 key = NULL; /* flag to reference inputslot */

477

478 if (isnew)

479 {

480 entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);

481

482 if (found)

483 {

484 /* found pre-existing entry */

485 *isnew = false;

486 }

487 else

488 {

489 /* created new entry */

490 *isnew = true;

491

492 MemoryContextSwitchTo(hashtable->tablecxt);

493

494 /*

495 * Copy the first tuple into the table context, and request

496 * additionalsize extra bytes before the allocation.

497 *

498 * The caller can get a pointer to the additional data with

499 * TupleHashEntryGetAdditional(), and store arbitrary data there.

500 * Placing both the tuple and additional data in the same

501 * allocation avoids the need to store an extra pointer in

502 * TupleHashEntryData or allocate an additional chunk.

503 */

504 entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot,

505 hashtable->additionalsize);

506 }

507 }

508 else

509 {

510 entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);

511 }

512

513 return entry;

514}

515

516/*

517 * See whether two tuples (presumably of the same hash value) match

518 */

519static int

520 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)

521{

522 TupleTableSlot *slot1;

523 TupleTableSlot *slot2;

524 TupleHashTable hashtable = (TupleHashTable) tb->private_data;

525 ExprContext *econtext = hashtable->exprcontext;

526

527 /*

528 * We assume that simplehash.h will only ever call us with the first

529 * argument being an actual table entry, and the second argument being

530 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction

531 * could be supported too, but is not currently required.

532 */

533 Assert(tuple1 != NULL);

534 slot1 = hashtable->tableslot;

535 ExecStoreMinimalTuple(tuple1, slot1, false);

536 Assert(tuple2 == NULL);

537 slot2 = hashtable->inputslot;

538

539 /* For crosstype comparisons, the inputslot must be first */

540 econtext->ecxt_innertuple = slot2;

541 econtext->ecxt_outertuple = slot1;

542 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);

543}

AttrNumber

int16 AttrNumber

Definition: attnum.h:21

ParallelWorkerNumber

int ParallelWorkerNumber

Definition: parallel.c:115

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:810

uint32

uint32_t uint32

Definition: c.h:538

Size

size_t Size

Definition: c.h:610

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ExecBuildHash32FromAttrs

ExprState * ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, FmgrInfo *hashfunctions, Oid *collations, int numCols, AttrNumber *keyColIdx, PlanState *parent, uint32 init_value)

Definition: execExpr.c:4141

ExecBuildGroupingEqual

ExprState * ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, const Oid *collations, PlanState *parent)

Definition: execExpr.c:4465

execTuplesMatchPrepare

ExprState * execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, const Oid *collations, PlanState *parent)

Definition: execGrouping.c:58

execTuplesHashPrepare

void execTuplesHashPrepare(int numCols, const Oid *eqOperators, Oid **eqFuncOids, FmgrInfo **hashFunctions)

Definition: execGrouping.c:97

LookupTupleHashEntryHash

TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)

Definition: execGrouping.c:356

LookupTupleHashEntry

TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 *hash)

Definition: execGrouping.c:301

TupleHashTableHash_internal

static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple)

Definition: execGrouping.c:421

TupleHashTableHash

uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)

Definition: execGrouping.c:333

FindTupleHashEntry

TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, ExprState *hashexpr)

Definition: execGrouping.c:388

TupleHashTableMatch

static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)

Definition: execGrouping.c:520

BuildTupleHashTable

TupleHashTable BuildTupleHashTable(PlanState *parent, TupleDesc inputDesc, const TupleTableSlotOps *inputOps, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv)

Definition: execGrouping.c:167

ResetTupleHashTable

void ResetTupleHashTable(TupleHashTable hashtable)

Definition: execGrouping.c:280

LookupTupleHashEntry_internal

static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash)

Definition: execGrouping.c:469

MakeSingleTupleTableSlot

TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)

Definition: execTuples.c:1427

ExecStoreMinimalTuple

TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)