[フレーム]

dynahash.c

1/*-------------------------------------------------------------------------

2 *

3 * dynahash.c

4 * dynamic chained hash tables

5 *

6 * dynahash.c supports both local-to-a-backend hash tables and hash tables in

7 * shared memory. For shared hash tables, it is the caller's responsibility

8 * to provide appropriate access interlocking. The simplest convention is

9 * that a single LWLock protects the whole hash table. Searches (HASH_FIND or

10 * hash_seq_search) need only shared lock, but any update requires exclusive

11 * lock. For heavily-used shared tables, the single-lock approach creates a

12 * concurrency bottleneck, so we also support "partitioned" locking wherein

13 * there are multiple LWLocks guarding distinct subsets of the table. To use

14 * a hash table in partitioned mode, the HASH_PARTITION flag must be given

15 * to hash_create. This prevents any attempt to split buckets on-the-fly.

16 * Therefore, each hash bucket chain operates independently, and no fields

17 * of the hash header change after init except nentries and freeList.

18 * (A partitioned table uses multiple copies of those fields, guarded by

19 * spinlocks, for additional concurrency.)

20 * This lets any subset of the hash buckets be treated as a separately

21 * lockable partition. We expect callers to use the low-order bits of a

22 * lookup key's hash value as a partition number --- this will work because

23 * of the way calc_bucket() maps hash values to bucket numbers.

24 *

25 * The memory allocator function should match malloc's semantics of returning

26 * NULL on failure. (This is essential for hash tables in shared memory.

27 * For hash tables in local memory, we used to use palloc() which will throw

28 * error on failure; but we no longer do, so it's untested whether this

29 * module will still cope with that behavior.)

30 *

31 * dynahash.c provides support for these types of lookup keys:

32 *

33 * 1. Null-terminated C strings (truncated if necessary to fit in keysize),

34 * compared as though by strcmp(). This is selected by specifying the

35 * HASH_STRINGS flag to hash_create.

36 *

37 * 2. Arbitrary binary data of size keysize, compared as though by memcmp().

38 * (Caller must ensure there are no undefined padding bits in the keys!)

39 * This is selected by specifying the HASH_BLOBS flag to hash_create.

40 *

41 * 3. More complex key behavior can be selected by specifying user-supplied

42 * hashing, comparison, and/or key-copying functions. At least a hashing

43 * function must be supplied; comparison defaults to memcmp() and key copying

44 * to memcpy() when a user-defined hashing function is selected.

45 *

46 * Compared to simplehash, dynahash has the following benefits:

47 *

48 * - It supports partitioning, which is useful for shared memory access using

49 * locks.

50 * - Shared memory hashes are allocated in a fixed size area at startup and

51 * are discoverable by name from other processes.

52 * - Because entries don't need to be moved in the case of hash conflicts,

53 * dynahash has better performance for large entries.

54 * - Guarantees stable pointers to entries.

55 *

58 *

59 *

60 * IDENTIFICATION

61 * src/backend/utils/hash/dynahash.c

62 *

63 *-------------------------------------------------------------------------

64 */

65

66/*

67 * Original comments:

68 *

69 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.

70 * Coded into C, with minor code improvements, and with hsearch(3) interface,

71 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;

72 * also, hcreate/hdestroy routines added to simulate hsearch(3).

73 *

74 * These routines simulate hsearch(3) and family, with the important

75 * difference that the hash table is dynamic - can grow indefinitely

76 * beyond its original size (as supplied to hcreate()).

77 *

78 * Performance appears to be comparable to that of hsearch(3).

79 * The 'source-code' options referred to in hsearch(3)'s 'man' page

80 * are not implemented; otherwise functionality is identical.

81 *

82 * Compilation controls:

83 * HASH_STATISTICS causes some usage statistics to be maintained, which can be

84 * logged by calling hash_stats().

85 *

86 * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor

87 * concatenation property, in probably unnecessary code 'optimization'.

88 *

89 * Modified margo@postgres.berkeley.edu February 1990

90 * added multiple table interface

91 * Modified by sullivan@postgres.berkeley.edu April 1990

92 * changed ctl structure for shared memory

93 */

94

95#include "postgres.h"

96

97#include <limits.h>

98

99#include "access/xact.h"

100#include "common/hashfn.h"

101#include "lib/ilist.h"

102#include "port/pg_bitutils.h"

103#include "storage/shmem.h"

104#include "storage/spin.h"

105#include "utils/memutils.h"

106

107

108/*

109 * Constants

110 *

111 * A hash table has a top-level "directory", each of whose entries points

112 * to a "segment" of ssize bucket headers. The maximum number of hash

113 * buckets is thus dsize * ssize (but dsize may be expansible). Of course,

114 * the number of records in the table can be larger, but we don't want a

115 * whole lot of records per bucket or performance goes down.

116 *

117 * In a hash table allocated in shared memory, the directory cannot be

118 * expanded because it must stay at a fixed address. The directory size

119 * should be selected using hash_select_dirsize (and you'd better have

120 * a good idea of the maximum number of entries!). For non-shared hash

121 * tables, the initial directory size can be left at the default.

122 */

123 #define DEF_SEGSIZE 256

124 #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */

125 #define DEF_DIRSIZE 256

126

127/* Number of freelists to be used for a partitioned hash table. */

128 #define NUM_FREELISTS 32

129

130/* A hash bucket is a linked list of HASHELEMENTs */

131 typedef HASHELEMENT *HASHBUCKET;

132

133/* A hash segment is an array of bucket headers */

134 typedef HASHBUCKET *HASHSEGMENT;

135

136/*

137 * Per-freelist data.

138 *

139 * In a partitioned hash table, each freelist is associated with a specific

140 * set of hashcodes, as determined by the FREELIST_IDX() macro below.

141 * nentries tracks the number of live hashtable entries having those hashcodes

142 * (NOT the number of entries in the freelist, as you might expect).

143 *

144 * The coverage of a freelist might be more or less than one partition, so it

145 * needs its own lock rather than relying on caller locking. Relying on that

146 * wouldn't work even if the coverage was the same, because of the occasional

147 * need to "borrow" entries from another freelist; see get_hash_entry().

148 *

149 * Using an array of FreeListData instead of separate arrays of mutexes,

150 * nentries and freeLists helps to reduce sharing of cache lines between

151 * different mutexes.

152 */

153 typedef struct

154{

155 slock_t mutex; /* spinlock for this freelist */

156 int64 nentries; /* number of entries in associated buckets */

157 HASHELEMENT *freeList; /* chain of free elements */

158} FreeListData;

159

160/*

161 * Header structure for a hash table --- contains all changeable info

162 *

163 * In a shared-memory hash table, the HASHHDR is in shared memory, while

164 * each backend has a local HTAB struct. For a non-shared table, there isn't

165 * any functional difference between HASHHDR and HTAB, but we separate them

166 * anyway to share code between shared and non-shared tables.

167 */

168 struct HASHHDR

169{

170 /*

171 * The freelist can become a point of contention in high-concurrency hash

172 * tables, so we use an array of freelists, each with its own mutex and

173 * nentries count, instead of just a single one. Although the freelists

174 * normally operate independently, we will scavenge entries from freelists

175 * other than a hashcode's default freelist when necessary.

176 *

177 * If the hash table is not partitioned, only freeList[0] is used and its

178 * spinlock is not used at all; callers' locking is assumed sufficient.

179 */

180 FreeListData freeList[NUM_FREELISTS];

181

182 /* These fields can change, but not in a partitioned table */

183 /* Also, dsize can't change in a shared table, even if unpartitioned */

184 int64 dsize; /* directory size */

185 int64 nsegs; /* number of allocated segments (<= dsize) */

186 uint32 max_bucket; /* ID of maximum bucket in use */

187 uint32 high_mask; /* mask to modulo into entire table */

188 uint32 low_mask; /* mask to modulo into lower half of table */

189

190 /* These fields are fixed at hashtable creation */

191 Size keysize; /* hash key length in bytes */

192 Size entrysize; /* total user element size in bytes */

193 int64 num_partitions; /* # partitions (must be power of 2), or 0 */

194 int64 max_dsize; /* 'dsize' limit if directory is fixed size */

195 int64 ssize; /* segment size --- must be power of 2 */

196 int sshift; /* segment shift = log2(ssize) */

197 int nelem_alloc; /* number of entries to allocate at once */

198 bool isfixed; /* if true, don't enlarge */

199

200#ifdef HASH_STATISTICS

201

202 /*

203 * Count statistics here. NB: stats code doesn't bother with mutex, so

204 * counts could be corrupted a bit in a partitioned table.

205 */

206 uint64 accesses;

207 uint64 collisions;

208 uint64 expansions;

209#endif

210};

211

212 #define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)

213

214 #define FREELIST_IDX(hctl, hashcode) \

215 (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)

216

217/*

218 * Top control structure for a hashtable --- in a shared table, each backend

219 * has its own copy (OK since no fields change at runtime)

220 */

221 struct HTAB

222{

223 HASHHDR *hctl; /* => shared control information */

224 HASHSEGMENT *dir; /* directory of segment starts */

225 HashValueFunc hash; /* hash function */

226 HashCompareFunc match; /* key comparison function */

227 HashCopyFunc keycopy; /* key copying function */

228 HashAllocFunc alloc; /* memory allocator */

229 MemoryContext hcxt; /* memory context if default allocator used */

230 char *tabname; /* table name (for error messages) */

231 bool isshared; /* true if table is in shared memory */

232

233 /* freezing a shared table isn't allowed, so we can keep state here */

234 bool frozen; /* true = no more inserts allowed */

235

236 /* We keep local copies of these fixed values to reduce contention */

237 Size keysize; /* hash key length in bytes */

238 int64 ssize; /* segment size --- must be power of 2 */

239 int sshift; /* segment shift = log2(ssize) */

240

241 /*

242 * In a USE_VALGRIND build, non-shared hashtables keep an slist chain of

243 * all the element blocks they have allocated. This pacifies Valgrind,

244 * which would otherwise often claim that the element blocks are "possibly

245 * lost" for lack of any non-interior pointers to their starts.

246 */

247#ifdef USE_VALGRIND

248 slist_head element_blocks;

249#endif

250};

251

252/*

253 * Key (also entry) part of a HASHELEMENT

254 */

255 #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))

256

257/*

258 * Obtain element pointer given pointer to key

259 */

260 #define ELEMENT_FROM_KEY(key) \

261 ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))

262

263/*

264 * Fast MOD arithmetic, assuming that y is a power of 2 !

265 */

266 #define MOD(x,y) ((x) & ((y)-1))

267

268/*

269 * Private function prototypes

270 */

271static void *DynaHashAlloc(Size size);

272static HASHSEGMENT seg_alloc(HTAB *hashp);

273static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);

274static bool dir_realloc(HTAB *hashp);

275static bool expand_table(HTAB *hashp);

276static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);

277static void hdefault(HTAB *hashp);

278static int choose_nelem_alloc(Size entrysize);

279static bool init_htab(HTAB *hashp, int64 nelem);

280pg_noreturn static void hash_corrupted(HTAB *hashp);

281static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,

282 HASHBUCKET **bucketptr);

283static int my_log2(int64 num);

284static int64 next_pow2_int64(int64 num);

285static int next_pow2_int(int64 num);

286static void register_seq_scan(HTAB *hashp);

287static void deregister_seq_scan(HTAB *hashp);

288static bool has_seq_scans(HTAB *hashp);

289

290

291/*

292 * memory allocation support

293 */

294 static MemoryContext CurrentDynaHashCxt = NULL;

295

296static void *

297 DynaHashAlloc(Size size)

298{

299 Assert(MemoryContextIsValid(CurrentDynaHashCxt));

300 return MemoryContextAllocExtended(CurrentDynaHashCxt, size,

301 MCXT_ALLOC_NO_OOM);

302}

303

304

305/*

306 * HashCompareFunc for string keys

307 *

308 * Because we copy keys with strlcpy(), they will be truncated at keysize-1

309 * bytes, so we can only compare that many ... hence strncmp is almost but

310 * not quite the right thing.

311 */

312static int

313 string_compare(const char *key1, const char *key2, Size keysize)

314{

315 return strncmp(key1, key2, keysize - 1);

316}

317

318

319/************************** CREATE ROUTINES **********************/

320

321/*

322 * hash_create -- create a new dynamic hash table

323 *

324 * tabname: a name for the table (for debugging purposes)

325 * nelem: maximum number of elements expected

326 * *info: additional table parameters, as indicated by flags

327 * flags: bitmask indicating which parameters to take from *info

328 *

329 * The flags value *must* include HASH_ELEM. (Formerly, this was nominally

330 * optional, but the default keysize and entrysize values were useless.)

331 * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,

332 * or HASH_FUNCTION, to define the key hashing semantics (C strings,

333 * binary blobs, or custom, respectively). Callers specifying a custom

334 * hash function will likely also want to use HASH_COMPARE, and perhaps

335 * also HASH_KEYCOPY, to control key comparison and copying.

336 * Another often-used flag is HASH_CONTEXT, to allocate the hash table

337 * under info->hcxt rather than under TopMemoryContext; the default

338 * behavior is only suitable for session-lifespan hash tables.

339 * Other flags bits are special-purpose and seldom used, except for those

340 * associated with shared-memory hash tables, for which see ShmemInitHash().

341 *

342 * Fields in *info are read only when the associated flags bit is set.

343 * It is not necessary to initialize other fields of *info.

344 * Neither tabname nor *info need persist after the hash_create() call.

345 *

346 * Note: It is deprecated for callers of hash_create() to explicitly specify

347 * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or

348 * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than

349 * one of these.

350 *

351 * Note: for a shared-memory hashtable, nelem needs to be a pretty good

352 * estimate, since we can't expand the table on the fly. But an unshared

353 * hashtable can be expanded on-the-fly, so it's better for nelem to be

354 * on the small side and let the table grow if it's exceeded. An overly

355 * large nelem will penalize hash_seq_search speed without buying much.

356 */

357HTAB *

358 hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)

359{

360 HTAB *hashp;

361 HASHHDR *hctl;

362

363 /*

364 * Hash tables now allocate space for key and data, but you have to say

365 * how much space to allocate.

366 */

367 Assert(flags & HASH_ELEM);

368 Assert(info->keysize > 0);

369 Assert(info->entrysize >= info->keysize);

370

371 /*

372 * For shared hash tables, we have a local hash header (HTAB struct) that

373 * we allocate in TopMemoryContext; all else is in shared memory.

374 *

375 * For non-shared hash tables, everything including the hash header is in

376 * a memory context created specially for the hash table --- this makes

377 * hash_destroy very simple. The memory context is made a child of either

378 * a context specified by the caller, or TopMemoryContext if nothing is

379 * specified.

380 */

381 if (flags & HASH_SHARED_MEM)

382 {

383 /* Set up to allocate the hash header */

384 CurrentDynaHashCxt = TopMemoryContext;

385 }

386 else

387 {

388 /* Create the hash table's private memory context */

389 if (flags & HASH_CONTEXT)

390 CurrentDynaHashCxt = info->hcxt;

391 else

392 CurrentDynaHashCxt = TopMemoryContext;

393 CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,

394 "dynahash",

395 ALLOCSET_DEFAULT_SIZES);

396 }

397

398 /* Initialize the hash header, plus a copy of the table name */

399 hashp = (HTAB *) MemoryContextAlloc(CurrentDynaHashCxt,

400 sizeof(HTAB) + strlen(tabname) + 1);

401 MemSet(hashp, 0, sizeof(HTAB));

402

403 hashp->tabname = (char *) (hashp + 1);

404 strcpy(hashp->tabname, tabname);

405

406 /* If we have a private context, label it with hashtable's name */

407 if (!(flags & HASH_SHARED_MEM))

408 MemoryContextSetIdentifier(CurrentDynaHashCxt, hashp->tabname);

409

410 /*

411 * Select the appropriate hash function (see comments at head of file).

412 */

413 if (flags & HASH_FUNCTION)

414 {

415 Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));

416 hashp->hash = info->hash;

417 }

418 else if (flags & HASH_BLOBS)

419 {

420 Assert(!(flags & HASH_STRINGS));

421 /* We can optimize hashing for common key sizes */

422 if (info->keysize == sizeof(uint32))

423 hashp->hash = uint32_hash;

424 else

425 hashp->hash = tag_hash;

426 }

427 else

428 {

429 /*

430 * string_hash used to be considered the default hash method, and in a

431 * non-assert build it effectively still is. But we now consider it

432 * an assertion error to not say HASH_STRINGS explicitly. To help

433 * catch mistaken usage of HASH_STRINGS, we also insist on a

434 * reasonably long string length: if the keysize is only 4 or 8 bytes,

435 * it's almost certainly an integer or pointer not a string.

436 */

437 Assert(flags & HASH_STRINGS);

438 Assert(info->keysize > 8);

439

440 hashp->hash = string_hash;

441 }

442

443 /*

444 * If you don't specify a match function, it defaults to string_compare if

445 * you used string_hash, and to memcmp otherwise.

446 *

447 * Note: explicitly specifying string_hash is deprecated, because this

448 * might not work for callers in loadable modules on some platforms due to

449 * referencing a trampoline instead of the string_hash function proper.

450 * Specify HASH_STRINGS instead.

451 */

452 if (flags & HASH_COMPARE)

453 hashp->match = info->match;

454 else if (hashp->hash == string_hash)

455 hashp->match = (HashCompareFunc) string_compare;

456 else

457 hashp->match = memcmp;

458

459 /*

460 * Similarly, the key-copying function defaults to strlcpy or memcpy.

461 */

462 if (flags & HASH_KEYCOPY)

463 hashp->keycopy = info->keycopy;

464 else if (hashp->hash == string_hash)

465 {

466 /*

467 * The signature of keycopy is meant for memcpy(), which returns

468 * void*, but strlcpy() returns size_t. Since we never use the return

469 * value of keycopy, and size_t is pretty much always the same size as

470 * void *, this should be safe. The extra cast in the middle is to

471 * avoid warnings from -Wcast-function-type.

472 */

473 hashp->keycopy = (HashCopyFunc) (pg_funcptr_t) strlcpy;

474 }

475 else

476 hashp->keycopy = memcpy;

477

478 /* And select the entry allocation function, too. */

479 if (flags & HASH_ALLOC)

480 hashp->alloc = info->alloc;

481 else

482 hashp->alloc = DynaHashAlloc;

483

484 if (flags & HASH_SHARED_MEM)

485 {

486 /*

487 * ctl structure and directory are preallocated for shared memory

488 * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as

489 * well.

490 */

491 hashp->hctl = info->hctl;

492 hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));

493 hashp->hcxt = NULL;

494 hashp->isshared = true;

495

496 /* hash table already exists, we're just attaching to it */

497 if (flags & HASH_ATTACH)

498 {

499 /* make local copies of some heavily-used values */

500 hctl = hashp->hctl;

501 hashp->keysize = hctl->keysize;

502 hashp->ssize = hctl->ssize;

503 hashp->sshift = hctl->sshift;

504

505 return hashp;

506 }

507 }

508 else

509 {

510 /* setup hash table defaults */

511 hashp->hctl = NULL;

512 hashp->dir = NULL;

513 hashp->hcxt = CurrentDynaHashCxt;

514 hashp->isshared = false;

515 }

516

517 if (!hashp->hctl)

518 {

519 hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));

520 if (!hashp->hctl)

521 ereport(ERROR,

522 (errcode(ERRCODE_OUT_OF_MEMORY),

523 errmsg("out of memory")));

524 }

525

526 hashp->frozen = false;

527

528 hdefault(hashp);

529

530 hctl = hashp->hctl;

531

532 if (flags & HASH_PARTITION)

533 {

534 /* Doesn't make sense to partition a local hash table */

535 Assert(flags & HASH_SHARED_MEM);

536

537 /*

538 * The number of partitions had better be a power of 2. Also, it must

539 * be less than INT_MAX (see init_htab()), so call the int version of

540 * next_pow2.

541 */

542 Assert(info->num_partitions == next_pow2_int(info->num_partitions));

543

544 hctl->num_partitions = info->num_partitions;

545 }

546

547 if (flags & HASH_SEGMENT)

548 {

549 hctl->ssize = info->ssize;

550 hctl->sshift = my_log2(info->ssize);

551 /* ssize had better be a power of 2 */

552 Assert(hctl->ssize == (1L << hctl->sshift));

553 }

554

555 /*

556 * SHM hash tables have fixed directory size passed by the caller.

557 */

558 if (flags & HASH_DIRSIZE)

559 {

560 hctl->max_dsize = info->max_dsize;

561 hctl->dsize = info->dsize;

562 }

563

564 /* remember the entry sizes, too */

565 hctl->keysize = info->keysize;

566 hctl->entrysize = info->entrysize;

567

568 /* make local copies of heavily-used constant fields */

569 hashp->keysize = hctl->keysize;

570 hashp->ssize = hctl->ssize;

571 hashp->sshift = hctl->sshift;

572

573 /* Build the hash directory structure */

574 if (!init_htab(hashp, nelem))

575 elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);

576

577 /*

578 * For a shared hash table, preallocate the requested number of elements.

579 * This reduces problems with run-time out-of-shared-memory conditions.

580 *

581 * For a non-shared hash table, preallocate the requested number of

582 * elements if it's less than our chosen nelem_alloc. This avoids wasting

583 * space if the caller correctly estimates a small table size.

584 */

585 if ((flags & HASH_SHARED_MEM) ||

586 nelem < hctl->nelem_alloc)

587 {

588 int i,

589 freelist_partitions,

590 nelem_alloc,

591 nelem_alloc_first;

592

593 /*

594 * If hash table is partitioned, give each freelist an equal share of

595 * the initial allocation. Otherwise only freeList[0] is used.

596 */

597 if (IS_PARTITIONED(hashp->hctl))

598 freelist_partitions = NUM_FREELISTS;

599 else

600 freelist_partitions = 1;

601

602 nelem_alloc = nelem / freelist_partitions;

603 if (nelem_alloc <= 0)

604 nelem_alloc = 1;

605

606 /*

607 * Make sure we'll allocate all the requested elements; freeList[0]

608 * gets the excess if the request isn't divisible by NUM_FREELISTS.

609 */

610 if (nelem_alloc * freelist_partitions < nelem)

611 nelem_alloc_first =

612 nelem - nelem_alloc * (freelist_partitions - 1);

613 else

614 nelem_alloc_first = nelem_alloc;

615

616 for (i = 0; i < freelist_partitions; i++)

617 {

618 int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;

619

620 if (!element_alloc(hashp, temp, i))

621 ereport(ERROR,

622 (errcode(ERRCODE_OUT_OF_MEMORY),

623 errmsg("out of memory")));

624 }

625 }

626

627 /* Set isfixed if requested, but not till after we build initial entries */

628 if (flags & HASH_FIXED_SIZE)

629 hctl->isfixed = true;

630

631 return hashp;

632}

633

634/*

635 * Set default HASHHDR parameters.

636 */

637static void

638 hdefault(HTAB *hashp)

639{

640 HASHHDR *hctl = hashp->hctl;

641

642 MemSet(hctl, 0, sizeof(HASHHDR));

643

644 hctl->dsize = DEF_DIRSIZE;

645 hctl->nsegs = 0;

646

647 hctl->num_partitions = 0; /* not partitioned */

648

649 /* table has no fixed maximum size */

650 hctl->max_dsize = NO_MAX_DSIZE;

651

652 hctl->ssize = DEF_SEGSIZE;

653 hctl->sshift = DEF_SEGSIZE_SHIFT;

654

655 hctl->isfixed = false; /* can be enlarged */

656

657#ifdef HASH_STATISTICS

658 hctl->accesses = hctl->collisions = hctl->expansions = 0;

659#endif

660}

661

662/*

663 * Given the user-specified entry size, choose nelem_alloc, ie, how many

664 * elements to add to the hash table when we need more.

665 */

666static int

667 choose_nelem_alloc(Size entrysize)

668{

669 int nelem_alloc;

670 Size elementSize;

671 Size allocSize;

672

673 /* Each element has a HASHELEMENT header plus user data. */

674 /* NB: this had better match element_alloc() */

675 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);

676

677 /*

678 * The idea here is to choose nelem_alloc at least 32, but round up so

679 * that the allocation request will be a power of 2 or just less. This

680 * makes little difference for hash tables in shared memory, but for hash

681 * tables managed by palloc, the allocation request will be rounded up to

682 * a power of 2 anyway. If we fail to take this into account, we'll waste

683 * as much as half the allocated space.

684 */

685 allocSize = 32 * 4; /* assume elementSize at least 8 */

686 do

687 {

688 allocSize <<= 1;

689 nelem_alloc = allocSize / elementSize;

690 } while (nelem_alloc < 32);

691

692 return nelem_alloc;

693}

694

695/*

696 * Compute derived fields of hctl and build the initial directory/segment

697 * arrays

698 */

699static bool

700 init_htab(HTAB *hashp, int64 nelem)

701{

702 HASHHDR *hctl = hashp->hctl;

703 HASHSEGMENT *segp;

704 int nbuckets;

705 int nsegs;

706 int i;

707

708 /*

709 * initialize mutexes if it's a partitioned table

710 */

711 if (IS_PARTITIONED(hctl))

712 for (i = 0; i < NUM_FREELISTS; i++)

713 SpinLockInit(&(hctl->freeList[i].mutex));

714

715 /*

716 * Allocate space for the next greater power of two number of buckets,

717 * assuming a desired maximum load factor of 1.

718 */

719 nbuckets = next_pow2_int(nelem);

720

721 /*

722 * In a partitioned table, nbuckets must be at least equal to

723 * num_partitions; were it less, keys with apparently different partition

724 * numbers would map to the same bucket, breaking partition independence.

725 * (Normally nbuckets will be much bigger; this is just a safety check.)

726 */

727 while (nbuckets < hctl->num_partitions)

728 nbuckets <<= 1;

729

730 hctl->max_bucket = hctl->low_mask = nbuckets - 1;

731 hctl->high_mask = (nbuckets << 1) - 1;

732

733 /*

734 * Figure number of directory segments needed, round up to a power of 2

735 */

736 nsegs = (nbuckets - 1) / hctl->ssize + 1;

737 nsegs = next_pow2_int(nsegs);

738

739 /*

740 * Make sure directory is big enough. If pre-allocated directory is too

741 * small, choke (caller screwed up).

742 */

743 if (nsegs > hctl->dsize)

744 {

745 if (!(hashp->dir))

746 hctl->dsize = nsegs;

747 else

748 return false;

749 }

750

751 /* Allocate a directory */

752 if (!(hashp->dir))

753 {

754 CurrentDynaHashCxt = hashp->hcxt;

755 hashp->dir = (HASHSEGMENT *)

756 hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));

757 if (!hashp->dir)

758 return false;

759 }

760

761 /* Allocate initial segments */

762 for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)

763 {

764 *segp = seg_alloc(hashp);

765 if (*segp == NULL)

766 return false;

767 }

768

769 /* Choose number of entries to allocate at a time */

770 hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);

771

772 return true;

773}

774

775/*

776 * Estimate the space needed for a hashtable containing the given number

777 * of entries of given size.

778 * NOTE: this is used to estimate the footprint of hashtables in shared

779 * memory; therefore it does not count HTAB which is in local memory.

780 * NB: assumes that all hash structure parameters have default values!

781 */

782Size

783 hash_estimate_size(int64 num_entries, Size entrysize)

784{

785 Size size;

786 int64 nBuckets,

787 nSegments,

788 nDirEntries,

789 nElementAllocs,

790 elementSize,

791 elementAllocCnt;

792

793 /* estimate number of buckets wanted */

794 nBuckets = next_pow2_int64(num_entries);

795 /* # of segments needed for nBuckets */

796 nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);

797 /* directory entries */

798 nDirEntries = DEF_DIRSIZE;

799 while (nDirEntries < nSegments)

800 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */

801

802 /* fixed control info */

803 size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */

804 /* directory */

805 size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));

806 /* segments */

807 size = add_size(size, mul_size(nSegments,

808 MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));

809 /* elements --- allocated in groups of choose_nelem_alloc() entries */

810 elementAllocCnt = choose_nelem_alloc(entrysize);

811 nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;

812 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);

813 size = add_size(size,

814 mul_size(nElementAllocs,

815 mul_size(elementAllocCnt, elementSize)));

816

817 return size;

818}

819

820/*

821 * Select an appropriate directory size for a hashtable with the given

822 * maximum number of entries.

823 * This is only needed for hashtables in shared memory, whose directories

824 * cannot be expanded dynamically.

825 * NB: assumes that all hash structure parameters have default values!

826 *

827 * XXX this had better agree with the behavior of init_htab()...

828 */

829int64

830 hash_select_dirsize(int64 num_entries)

831{

832 int64 nBuckets,

833 nSegments,

834 nDirEntries;

835

836 /* estimate number of buckets wanted */

837 nBuckets = next_pow2_int64(num_entries);

838 /* # of segments needed for nBuckets */

839 nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);

840 /* directory entries */

841 nDirEntries = DEF_DIRSIZE;

842 while (nDirEntries < nSegments)

843 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */

844

845 return nDirEntries;

846}

847

848/*

849 * Compute the required initial memory allocation for a shared-memory

850 * hashtable with the given parameters. We need space for the HASHHDR

851 * and for the (non expansible) directory.

852 */

853Size

854 hash_get_shared_size(HASHCTL *info, int flags)

855{

856 Assert(flags & HASH_DIRSIZE);

857 Assert(info->dsize == info->max_dsize);

858 return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);

859}

860

861

862/********************** DESTROY ROUTINES ************************/

863

864void

865 hash_destroy(HTAB *hashp)

866{

867 if (hashp != NULL)

868 {

869 /* allocation method must be one we know how to free, too */

870 Assert(hashp->alloc == DynaHashAlloc);

871 /* so this hashtable must have its own context */

872 Assert(hashp->hcxt != NULL);

873

874 hash_stats(__func__, hashp);

875

876 /*

877 * Free everything by destroying the hash table's memory context.

878 */

879 MemoryContextDelete(hashp->hcxt);

880 }

881}

882

883void

884 hash_stats(const char *caller, HTAB *hashp)

885{

886#ifdef HASH_STATISTICS

887 HASHHDR *hctl = hashp->hctl;

888

889 elog(DEBUG4,

890 "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT,

891 caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses,

892 hctl->collisions, hctl->expansions, hash_get_num_entries(hashp),

893 hctl->keysize, hctl->max_bucket, hctl->nsegs);

894#endif

895}

896

897/*******************************SEARCH ROUTINES *****************************/

898

899

900/*

901 * get_hash_value -- exported routine to calculate a key's hash value

902 *

903 * We export this because for partitioned tables, callers need to compute

904 * the partition number (from the low-order bits of the hash value) before

905 * searching.

906 */

907uint32

908 get_hash_value(HTAB *hashp, const void *keyPtr)

909{

910 return hashp->hash(keyPtr, hashp->keysize);

911}

912

913/* Convert a hash value to a bucket number */

914static inline uint32

915 calc_bucket(HASHHDR *hctl, uint32 hash_val)

916{

917 uint32 bucket;

918

919 bucket = hash_val & hctl->high_mask;

920 if (bucket > hctl->max_bucket)

921 bucket = bucket & hctl->low_mask;

922

923 return bucket;

924}

925

926/*

927 * hash_search -- look up key in table and perform action

928 * hash_search_with_hash_value -- same, with key's hash value already computed

929 *

930 * action is one of:

931 * HASH_FIND: look up key in table

932 * HASH_ENTER: look up key in table, creating entry if not present

933 * HASH_ENTER_NULL: same, but return NULL if out of memory

934 * HASH_REMOVE: look up key in table, remove entry if present

935 *

936 * Return value is a pointer to the element found/entered/removed if any,

937 * or NULL if no match was found. (NB: in the case of the REMOVE action,

938 * the result is a dangling pointer that shouldn't be dereferenced!)

939 *

940 * HASH_ENTER will normally ereport a generic "out of memory" error if

941 * it is unable to create a new entry. The HASH_ENTER_NULL operation is

942 * the same except it will return NULL if out of memory.

943 *

944 * If foundPtr isn't NULL, then *foundPtr is set true if we found an

945 * existing entry in the table, false otherwise. This is needed in the

946 * HASH_ENTER case, but is redundant with the return value otherwise.

947 *

948 * For hash_search_with_hash_value, the hashvalue parameter must have been

949 * calculated with get_hash_value().

950 */

951void *

952 hash_search(HTAB *hashp,

953 const void *keyPtr,

954 HASHACTION action,

955 bool *foundPtr)

956{

957 return hash_search_with_hash_value(hashp,

958 keyPtr,

959 hashp->hash(keyPtr, hashp->keysize),

960 action,

961 foundPtr);

962}

963

964void *

965 hash_search_with_hash_value(HTAB *hashp,

966 const void *keyPtr,

967 uint32 hashvalue,

968 HASHACTION action,

969 bool *foundPtr)

970{

971 HASHHDR *hctl = hashp->hctl;

972 int freelist_idx = FREELIST_IDX(hctl, hashvalue);

973 Size keysize;

974 HASHBUCKET currBucket;

975 HASHBUCKET *prevBucketPtr;

976 HashCompareFunc match;

977

978#ifdef HASH_STATISTICS

979 hctl->accesses++;

980#endif

981

982 /*

983 * If inserting, check if it is time to split a bucket.

984 *

985 * NOTE: failure to expand table is not a fatal error, it just means we

986 * have to run at higher fill factor than we wanted. However, if we're

987 * using the palloc allocator then it will throw error anyway on

988 * out-of-memory, so we must do this before modifying the table.

989 */

990 if (action == HASH_ENTER || action == HASH_ENTER_NULL)

991 {

992 /*

993 * Can't split if running in partitioned mode, nor if frozen, nor if

994 * table is the subject of any active hash_seq_search scans.

995 */

996 if (hctl->freeList[0].nentries > (int64) hctl->max_bucket &&

997 !IS_PARTITIONED(hctl) && !hashp->frozen &&

998 !has_seq_scans(hashp))

999 (void) expand_table(hashp);

1000 }

1001

1002 /*

1003 * Do the initial lookup

1004 */

1005 (void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);

1006 currBucket = *prevBucketPtr;

1007

1008 /*

1009 * Follow collision chain looking for matching key

1010 */

1011 match = hashp->match; /* save one fetch in inner loop */

1012 keysize = hashp->keysize; /* ditto */

1013

1014 while (currBucket != NULL)

1015 {

1016 if (currBucket->hashvalue == hashvalue &&

1017 match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)

1018 break;

1019 prevBucketPtr = &(currBucket->link);

1020 currBucket = *prevBucketPtr;

1021#ifdef HASH_STATISTICS

1022 hctl->collisions++;

1023#endif

1024 }

1025

1026 if (foundPtr)

1027 *foundPtr = (bool) (currBucket != NULL);

1028

1029 /*

1030 * OK, now what?

1031 */

1032 switch (action)

1033 {

1034 case HASH_FIND:

1035 if (currBucket != NULL)

1036 return ELEMENTKEY(currBucket);

1037 return NULL;

1038

1039 case HASH_REMOVE:

1040 if (currBucket != NULL)

1041 {

1042 /* if partitioned, must lock to touch nentries and freeList */

1043 if (IS_PARTITIONED(hctl))

1044 SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));

1045

1046 /* delete the record from the appropriate nentries counter. */

1047 Assert(hctl->freeList[freelist_idx].nentries > 0);

1048 hctl->freeList[freelist_idx].nentries--;

1049

1050 /* remove record from hash bucket's chain. */

1051 *prevBucketPtr = currBucket->link;

1052

1053 /* add the record to the appropriate freelist. */

1054 currBucket->link = hctl->freeList[freelist_idx].freeList;

1055 hctl->freeList[freelist_idx].freeList = currBucket;

1056

1057 if (IS_PARTITIONED(hctl))

1058 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);

1059

1060 /*

1061 * better hope the caller is synchronizing access to this

1062 * element, because someone else is going to reuse it the next

1063 * time something is added to the table

1064 */

1065 return ELEMENTKEY(currBucket);

1066 }

1067 return NULL;

1068

1069 case HASH_ENTER:

1070 case HASH_ENTER_NULL:

1071 /* Return existing element if found, else create one */

1072 if (currBucket != NULL)

1073 return ELEMENTKEY(currBucket);

1074

1075 /* disallow inserts if frozen */

1076 if (hashp->frozen)

1077 elog(ERROR, "cannot insert into frozen hashtable \"%s\"",

1078 hashp->tabname);

1079

1080 currBucket = get_hash_entry(hashp, freelist_idx);

1081 if (currBucket == NULL)

1082 {

1083 /* out of memory */

1084 if (action == HASH_ENTER_NULL)

1085 return NULL;

1086 /* report a generic message */

1087 if (hashp->isshared)

1088 ereport(ERROR,

1089 (errcode(ERRCODE_OUT_OF_MEMORY),

1090 errmsg("out of shared memory")));

1091 else

1092 ereport(ERROR,

1093 (errcode(ERRCODE_OUT_OF_MEMORY),

1094 errmsg("out of memory")));

1095 }

1096

1097 /* link into hashbucket chain */

1098 *prevBucketPtr = currBucket;

1099 currBucket->link = NULL;

1100

1101 /* copy key into record */

1102 currBucket->hashvalue = hashvalue;

1103 hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);

1104

1105 /*

1106 * Caller is expected to fill the data field on return. DO NOT

1107 * insert any code that could possibly throw error here, as doing

1108 * so would leave the table entry incomplete and hence corrupt the

1109 * caller's data structure.

1110 */

1111

1112 return ELEMENTKEY(currBucket);

1113 }

1114

1115 elog(ERROR, "unrecognized hash action code: %d", (int) action);

1116

1117 return NULL; /* keep compiler quiet */

1118}

1119

1120/*

1121 * hash_update_hash_key -- change the hash key of an existing table entry

1122 *

1123 * This is equivalent to removing the entry, making a new entry, and copying

1124 * over its data, except that the entry never goes to the table's freelist.

1125 * Therefore this cannot suffer an out-of-memory failure, even if there are

1126 * other processes operating in other partitions of the hashtable.

1127 *

1128 * Returns true if successful, false if the requested new hash key is already

1129 * present. Throws error if the specified entry pointer isn't actually a

1130 * table member.

1131 *

1132 * NB: currently, there is no special case for old and new hash keys being

1133 * identical, which means we'll report false for that situation. This is

1134 * preferable for existing uses.

1135 *

1136 * NB: for a partitioned hashtable, caller must hold lock on both relevant

1137 * partitions, if the new hash key would belong to a different partition.

1138 */

1139bool

1140 hash_update_hash_key(HTAB *hashp,

1141 void *existingEntry,

1142 const void *newKeyPtr)

1143{

1144 HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);

1145 uint32 newhashvalue;

1146 Size keysize;

1147 uint32 bucket;

1148 uint32 newbucket;

1149 HASHBUCKET currBucket;

1150 HASHBUCKET *prevBucketPtr;

1151 HASHBUCKET *oldPrevPtr;

1152 HashCompareFunc match;

1153

1154#ifdef HASH_STATISTICS

1155 HASHHDR *hctl = hashp->hctl;

1156

1157 hctl->accesses++;

1158#endif

1159

1160 /* disallow updates if frozen */

1161 if (hashp->frozen)

1162 elog(ERROR, "cannot update in frozen hashtable \"%s\"",

1163 hashp->tabname);

1164

1165 /*

1166 * Lookup the existing element using its saved hash value. We need to do

1167 * this to be able to unlink it from its hash chain, but as a side benefit

1168 * we can verify the validity of the passed existingEntry pointer.

1169 */

1170 bucket = hash_initial_lookup(hashp, existingElement->hashvalue,

1171 &prevBucketPtr);

1172 currBucket = *prevBucketPtr;

1173

1174 while (currBucket != NULL)

1175 {

1176 if (currBucket == existingElement)

1177 break;

1178 prevBucketPtr = &(currBucket->link);

1179 currBucket = *prevBucketPtr;

1180 }

1181

1182 if (currBucket == NULL)

1183 elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",

1184 hashp->tabname);

1185

1186 oldPrevPtr = prevBucketPtr;

1187

1188 /*

1189 * Now perform the equivalent of a HASH_ENTER operation to locate the hash

1190 * chain we want to put the entry into.

1191 */

1192 newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);

1193 newbucket = hash_initial_lookup(hashp, newhashvalue, &prevBucketPtr);

1194 currBucket = *prevBucketPtr;

1195

1196 /*

1197 * Follow collision chain looking for matching key

1198 */

1199 match = hashp->match; /* save one fetch in inner loop */

1200 keysize = hashp->keysize; /* ditto */

1201

1202 while (currBucket != NULL)

1203 {

1204 if (currBucket->hashvalue == newhashvalue &&

1205 match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)

1206 break;

1207 prevBucketPtr = &(currBucket->link);

1208 currBucket = *prevBucketPtr;

1209#ifdef HASH_STATISTICS

1210 hctl->collisions++;

1211#endif

1212 }

1213

1214 if (currBucket != NULL)

1215 return false; /* collision with an existing entry */

1216

1217 currBucket = existingElement;

1218

1219 /*

1220 * If old and new hash values belong to the same bucket, we need not

1221 * change any chain links, and indeed should not since this simplistic

1222 * update will corrupt the list if currBucket is the last element. (We

1223 * cannot fall out earlier, however, since we need to scan the bucket to

1224 * check for duplicate keys.)

1225 */

1226 if (bucket != newbucket)

1227 {

1228 /* OK to remove record from old hash bucket's chain. */

1229 *oldPrevPtr = currBucket->link;

1230

1231 /* link into new hashbucket chain */

1232 *prevBucketPtr = currBucket;

1233 currBucket->link = NULL;

1234 }

1235

1236 /* copy new key into record */

1237 currBucket->hashvalue = newhashvalue;

1238 hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);

1239

1240 /* rest of record is untouched */

1241

1242 return true;

1243}

1244

1245/*

1246 * Allocate a new hashtable entry if possible; return NULL if out of memory.

1247 * (Or, if the underlying space allocator throws error for out-of-memory,

1248 * we won't return at all.)

1249 */

1250static HASHBUCKET

1251 get_hash_entry(HTAB *hashp, int freelist_idx)

1252{

1253 HASHHDR *hctl = hashp->hctl;

1254 HASHBUCKET newElement;

1255

1256 for (;;)

1257 {

1258 /* if partitioned, must lock to touch nentries and freeList */

1259 if (IS_PARTITIONED(hctl))

1260 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);

1261

1262 /* try to get an entry from the freelist */

1263 newElement = hctl->freeList[freelist_idx].freeList;

1264

1265 if (newElement != NULL)

1266 break;

1267

1268 if (IS_PARTITIONED(hctl))

1269 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);

1270

1271 /*

1272 * No free elements in this freelist. In a partitioned table, there

1273 * might be entries in other freelists, but to reduce contention we

1274 * prefer to first try to get another chunk of buckets from the main

1275 * shmem allocator. If that fails, though, we *MUST* root through all

1276 * the other freelists before giving up. There are multiple callers

1277 * that assume that they can allocate every element in the initially

1278 * requested table size, or that deleting an element guarantees they

1279 * can insert a new element, even if shared memory is entirely full.

1280 * Failing because the needed element is in a different freelist is

1281 * not acceptable.

1282 */

1283 if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))

1284 {

1285 int borrow_from_idx;

1286

1287 if (!IS_PARTITIONED(hctl))

1288 return NULL; /* out of memory */

1289

1290 /* try to borrow element from another freelist */

1291 borrow_from_idx = freelist_idx;

1292 for (;;)

1293 {

1294 borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;

1295 if (borrow_from_idx == freelist_idx)

1296 break; /* examined all freelists, fail */

1297

1298 SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));

1299 newElement = hctl->freeList[borrow_from_idx].freeList;

1300

1301 if (newElement != NULL)

1302 {

1303 hctl->freeList[borrow_from_idx].freeList = newElement->link;

1304 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));

1305

1306 /* careful: count the new element in its proper freelist */

1307 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);

1308 hctl->freeList[freelist_idx].nentries++;

1309 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);

1310

1311 return newElement;

1312 }

1313

1314 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));

1315 }

1316

1317 /* no elements available to borrow either, so out of memory */

1318 return NULL;

1319 }

1320 }

1321

1322 /* remove entry from freelist, bump nentries */

1323 hctl->freeList[freelist_idx].freeList = newElement->link;

1324 hctl->freeList[freelist_idx].nentries++;

1325

1326 if (IS_PARTITIONED(hctl))

1327 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);

1328

1329 return newElement;

1330}

1331

1332/*

1333 * hash_get_num_entries -- get the number of entries in a hashtable

1334 */

1335int64

1336 hash_get_num_entries(HTAB *hashp)

1337{

1338 int i;

1339 int64 sum = hashp->hctl->freeList[0].nentries;

1340

1341 /*

1342 * We currently don't bother with acquiring the mutexes; it's only

1343 * sensible to call this function if you've got lock on all partitions of

1344 * the table.

1345 */

1346 if (IS_PARTITIONED(hashp->hctl))

1347 {

1348 for (i = 1; i < NUM_FREELISTS; i++)

1349 sum += hashp->hctl->freeList[i].nentries;

1350 }

1351

1352 return sum;

1353}

1354

1355/*

1356 * hash_seq_init/_search/_term

1357 * Sequentially search through hash table and return

1358 * all the elements one by one, return NULL when no more.

1359 *

1360 * hash_seq_term should be called if and only if the scan is abandoned before

1361 * completion; if hash_seq_search returns NULL then it has already done the

1362 * end-of-scan cleanup.

1363 *

1364 * NOTE: caller may delete the returned element before continuing the scan.

1365 * However, deleting any other element while the scan is in progress is

1366 * UNDEFINED (it might be the one that curIndex is pointing at!). Also,

1367 * if elements are added to the table while the scan is in progress, it is

1368 * unspecified whether they will be visited by the scan or not.

1369 *

1370 * NOTE: it is possible to use hash_seq_init/hash_seq_search without any

1371 * worry about hash_seq_term cleanup, if the hashtable is first locked against

1372 * further insertions by calling hash_freeze.

1373 *

1374 * NOTE: to use this with a partitioned hashtable, caller had better hold

1375 * at least shared lock on all partitions of the table throughout the scan!

1376 * We can cope with insertions or deletions by our own backend, but *not*

1377 * with concurrent insertions or deletions by another.

1378 */

1379void

1380 hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)

1381{

1382 status->hashp = hashp;

1383 status->curBucket = 0;

1384 status->curEntry = NULL;

1385 status->hasHashvalue = false;

1386 if (!hashp->frozen)

1387 register_seq_scan(hashp);

1388}

1389

1390/*

1391 * Same as above but scan by the given hash value.

1392 * See also hash_seq_search().

1393 *

1394 * NOTE: the default hash function doesn't match syscache hash function.

1395 * Thus, if you're going to use this function in syscache callback, make sure

1396 * you're using custom hash function. See relatt_cache_syshash()

1397 * for example.

1398 */

1399void

1400 hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,

1401 uint32 hashvalue)

1402{

1403 HASHBUCKET *bucketPtr;

1404

1405 hash_seq_init(status, hashp);

1406

1407 status->hasHashvalue = true;

1408 status->hashvalue = hashvalue;

1409

1410 status->curBucket = hash_initial_lookup(hashp, hashvalue, &bucketPtr);

1411 status->curEntry = *bucketPtr;

1412}

1413

1414void *

1415 hash_seq_search(HASH_SEQ_STATUS *status)

1416{

1417 HTAB *hashp;

1418 HASHHDR *hctl;

1419 uint32 max_bucket;

1420 int64 ssize;

1421 int64 segment_num;

1422 int64 segment_ndx;

1423 HASHSEGMENT segp;

1424 uint32 curBucket;

1425 HASHELEMENT *curElem;

1426

1427 if (status->hasHashvalue)

1428 {

1429 /*

1430 * Scan entries only in the current bucket because only this bucket

1431 * can contain entries with the given hash value.

1432 */

1433 while ((curElem = status->curEntry) != NULL)

1434 {

1435 status->curEntry = curElem->link;

1436 if (status->hashvalue != curElem->hashvalue)

1437 continue;

1438 return (void *) ELEMENTKEY(curElem);

1439 }

1440

1441 hash_seq_term(status);

1442 return NULL;

1443 }

1444

1445 if ((curElem = status->curEntry) != NULL)

1446 {

1447 /* Continuing scan of curBucket... */

1448 status->curEntry = curElem->link;

1449 if (status->curEntry == NULL) /* end of this bucket */

1450 ++status->curBucket;

1451 return ELEMENTKEY(curElem);

1452 }

1453

1454 /*

1455 * Search for next nonempty bucket starting at curBucket.

1456 */

1457 curBucket = status->curBucket;

1458 hashp = status->hashp;

1459 hctl = hashp->hctl;

1460 ssize = hashp->ssize;

1461 max_bucket = hctl->max_bucket;

1462

1463 if (curBucket > max_bucket)

1464 {

1465 hash_seq_term(status);

1466 return NULL; /* search is done */

1467 }

1468

1469 /*

1470 * first find the right segment in the table directory.

1471 */

1472 segment_num = curBucket >> hashp->sshift;

1473 segment_ndx = MOD(curBucket, ssize);

1474

1475 segp = hashp->dir[segment_num];

1476

1477 /*

1478 * Pick up the first item in this bucket's chain. If chain is not empty

1479 * we can begin searching it. Otherwise we have to advance to find the

1480 * next nonempty bucket. We try to optimize that case since searching a

1481 * near-empty hashtable has to iterate this loop a lot.

1482 */

1483 while ((curElem = segp[segment_ndx]) == NULL)

1484 {

1485 /* empty bucket, advance to next */

1486 if (++curBucket > max_bucket)

1487 {

1488 status->curBucket = curBucket;

1489 hash_seq_term(status);

1490 return NULL; /* search is done */

1491 }

1492 if (++segment_ndx >= ssize)

1493 {

1494 segment_num++;

1495 segment_ndx = 0;

1496 segp = hashp->dir[segment_num];

1497 }

1498 }

1499

1500 /* Begin scan of curBucket... */

1501 status->curEntry = curElem->link;

1502 if (status->curEntry == NULL) /* end of this bucket */

1503 ++curBucket;

1504 status->curBucket = curBucket;

1505 return ELEMENTKEY(curElem);

1506}

1507

1508void

1509 hash_seq_term(HASH_SEQ_STATUS *status)

1510{

1511 if (!status->hashp->frozen)

1512 deregister_seq_scan(status->hashp);

1513}

1514

1515/*

1516 * hash_freeze

1517 * Freeze a hashtable against future insertions (deletions are

1518 * still allowed)

1519 *

1520 * The reason for doing this is that by preventing any more bucket splits,

1521 * we no longer need to worry about registering hash_seq_search scans,

1522 * and thus caller need not be careful about ensuring hash_seq_term gets

1523 * called at the right times.

1524 *

1525 * Multiple calls to hash_freeze() are allowed, but you can't freeze a table

1526 * with active scans (since hash_seq_term would then do the wrong thing).

1527 */

1528void

1529 hash_freeze(HTAB *hashp)

1530{

1531 if (hashp->isshared)

1532 elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);

1533 if (!hashp->frozen && has_seq_scans(hashp))

1534 elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",

1535 hashp->tabname);

1536 hashp->frozen = true;

1537}

1538

1539

1540/********************************* UTILITIES ************************/

1541

1542/*

1543 * Expand the table by adding one more hash bucket.

1544 */

1545static bool

1546 expand_table(HTAB *hashp)

1547{

1548 HASHHDR *hctl = hashp->hctl;

1549 HASHSEGMENT old_seg,

1550 new_seg;

1551 int64 old_bucket,

1552 new_bucket;

1553 int64 new_segnum,

1554 new_segndx;

1555 int64 old_segnum,

1556 old_segndx;

1557 HASHBUCKET *oldlink,

1558 *newlink;

1559 HASHBUCKET currElement,

1560 nextElement;

1561

1562 Assert(!IS_PARTITIONED(hctl));

1563

1564#ifdef HASH_STATISTICS

1565 hctl->expansions++;

1566#endif

1567

1568 new_bucket = hctl->max_bucket + 1;

1569 new_segnum = new_bucket >> hashp->sshift;

1570 new_segndx = MOD(new_bucket, hashp->ssize);

1571

1572 if (new_segnum >= hctl->nsegs)

1573 {

1574 /* Allocate new segment if necessary -- could fail if dir full */

1575 if (new_segnum >= hctl->dsize)

1576 if (!dir_realloc(hashp))

1577 return false;

1578 if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))

1579 return false;

1580 hctl->nsegs++;

1581 }

1582

1583 /* OK, we created a new bucket */

1584 hctl->max_bucket++;

1585

1586 /*

1587 * *Before* changing masks, find old bucket corresponding to same hash

1588 * values; values in that bucket may need to be relocated to new bucket.

1589 * Note that new_bucket is certainly larger than low_mask at this point,

1590 * so we can skip the first step of the regular hash mask calc.

1591 */

1592 old_bucket = (new_bucket & hctl->low_mask);

1593

1594 /*

1595 * If we crossed a power of 2, readjust masks.

1596 */

1597 if ((uint32) new_bucket > hctl->high_mask)

1598 {

1599 hctl->low_mask = hctl->high_mask;

1600 hctl->high_mask = (uint32) new_bucket | hctl->low_mask;

1601 }

1602

1603 /*

1604 * Relocate records to the new bucket. NOTE: because of the way the hash

1605 * masking is done in calc_bucket, only one old bucket can need to be

1606 * split at this point. With a different way of reducing the hash value,

1607 * that might not be true!

1608 */

1609 old_segnum = old_bucket >> hashp->sshift;

1610 old_segndx = MOD(old_bucket, hashp->ssize);

1611

1612 old_seg = hashp->dir[old_segnum];

1613 new_seg = hashp->dir[new_segnum];

1614

1615 oldlink = &old_seg[old_segndx];

1616 newlink = &new_seg[new_segndx];

1617

1618 for (currElement = *oldlink;

1619 currElement != NULL;

1620 currElement = nextElement)

1621 {

1622 nextElement = currElement->link;

1623 if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket)

1624 {

1625 *oldlink = currElement;

1626 oldlink = &currElement->link;

1627 }

1628 else

1629 {

1630 *newlink = currElement;

1631 newlink = &currElement->link;

1632 }

1633 }

1634 /* don't forget to terminate the rebuilt hash chains... */

1635 *oldlink = NULL;

1636 *newlink = NULL;

1637

1638 return true;

1639}

1640

1641

1642static bool

1643 dir_realloc(HTAB *hashp)

1644{

1645 HASHSEGMENT *p;

1646 HASHSEGMENT *old_p;

1647 int64 new_dsize;

1648 int64 old_dirsize;

1649 int64 new_dirsize;

1650

1651 if (hashp->hctl->max_dsize != NO_MAX_DSIZE)

1652 return false;

1653

1654 /* Reallocate directory */

1655 new_dsize = hashp->hctl->dsize << 1;

1656 old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);

1657 new_dirsize = new_dsize * sizeof(HASHSEGMENT);

1658

1659 old_p = hashp->dir;

1660 CurrentDynaHashCxt = hashp->hcxt;

1661 p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);

1662

1663 if (p != NULL)

1664 {

1665 memcpy(p, old_p, old_dirsize);

1666 MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);

1667 hashp->dir = p;

1668 hashp->hctl->dsize = new_dsize;

1669

1670 /* XXX assume the allocator is palloc, so we know how to free */

1671 Assert(hashp->alloc == DynaHashAlloc);

1672 pfree(old_p);

1673

1674 return true;

1675 }

1676

1677 return false;

1678}

1679

1680

1681static HASHSEGMENT

1682 seg_alloc(HTAB *hashp)

1683{

1684 HASHSEGMENT segp;

1685

1686 CurrentDynaHashCxt = hashp->hcxt;

1687 segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);

1688

1689 if (!segp)

1690 return NULL;

1691

1692 MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);

1693

1694 return segp;

1695}

1696

1697/*

1698 * allocate some new elements and link them into the indicated free list

1699 */

1700static bool

1701 element_alloc(HTAB *hashp, int nelem, int freelist_idx)

1702{

1703 HASHHDR *hctl = hashp->hctl;

1704 Size elementSize;

1705 Size requestSize;

1706 char *allocedBlock;

1707 HASHELEMENT *firstElement;

1708 HASHELEMENT *tmpElement;

1709 HASHELEMENT *prevElement;

1710 int i;

1711

1712 if (hctl->isfixed)

1713 return false;

1714

1715 /* Each element has a HASHELEMENT header plus user data. */

1716 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);

1717

1718 requestSize = nelem * elementSize;

1719

1720 /* Add space for slist_node list link if we need one. */

1721#ifdef USE_VALGRIND

1722 if (!hashp->isshared)

1723 requestSize += MAXALIGN(sizeof(slist_node));

1724#endif

1725

1726 /* Allocate the memory. */

1727 CurrentDynaHashCxt = hashp->hcxt;

1728 allocedBlock = hashp->alloc(requestSize);

1729

1730 if (!allocedBlock)

1731 return false;

1732

1733 /*

1734 * If USE_VALGRIND, each allocated block of elements of a non-shared

1735 * hashtable is chained into a list, so that Valgrind won't think it's

1736 * been leaked.

1737 */

1738#ifdef USE_VALGRIND

1739 if (hashp->isshared)

1740 firstElement = (HASHELEMENT *) allocedBlock;

1741 else

1742 {

1743 slist_push_head(&hashp->element_blocks, (slist_node *) allocedBlock);

1744 firstElement = (HASHELEMENT *) (allocedBlock + MAXALIGN(sizeof(slist_node)));

1745 }

1746#else

1747 firstElement = (HASHELEMENT *) allocedBlock;

1748#endif

1749

1750 /* prepare to link all the new entries into the freelist */

1751 prevElement = NULL;

1752 tmpElement = firstElement;

1753 for (i = 0; i < nelem; i++)

1754 {

1755 tmpElement->link = prevElement;

1756 prevElement = tmpElement;

1757 tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);

1758 }

1759

1760 /* if partitioned, must lock to touch freeList */

1761 if (IS_PARTITIONED(hctl))

1762 SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);

1763

1764 /* freelist could be nonempty if two backends did this concurrently */

1765 firstElement->link = hctl->freeList[freelist_idx].freeList;

1766 hctl->freeList[freelist_idx].freeList = prevElement;

1767

1768 if (IS_PARTITIONED(hctl))

1769 SpinLockRelease(&hctl->freeList[freelist_idx].mutex);

1770

1771 return true;

1772}

1773

1774/*

1775 * Do initial lookup of a bucket for the given hash value, retrieving its

1776 * bucket number and its hash bucket.

1777 */

1778static inline uint32

1779 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)

1780{

1781 HASHHDR *hctl = hashp->hctl;

1782 HASHSEGMENT segp;

1783 int64 segment_num;

1784 int64 segment_ndx;

1785 uint32 bucket;

1786

1787 bucket = calc_bucket(hctl, hashvalue);

1788

1789 segment_num = bucket >> hashp->sshift;

1790 segment_ndx = MOD(bucket, hashp->ssize);

1791

1792 segp = hashp->dir[segment_num];

1793

1794 if (segp == NULL)

1795 hash_corrupted(hashp);

1796

1797 *bucketptr = &segp[segment_ndx];

1798 return bucket;

1799}

1800

1801/* complain when we have detected a corrupted hashtable */

1802static void

1803 hash_corrupted(HTAB *hashp)

1804{

1805 /*

1806 * If the corruption is in a shared hashtable, we'd better force a

1807 * systemwide restart. Otherwise, just shut down this one backend.

1808 */

1809 if (hashp->isshared)

1810 elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);

1811 else

1812 elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);

1813}

1814

1815/* calculate ceil(log base 2) of num */

1816static int

1817 my_log2(int64 num)

1818{

1819 /*

1820 * guard against too-large input, which would be invalid for

1821 * pg_ceil_log2_*()

1822 */

1823 if (num > PG_INT64_MAX / 2)

1824 num = PG_INT64_MAX / 2;

1825

1826 return pg_ceil_log2_64(num);

1827}

1828

1829/* calculate first power of 2 >= num, bounded to what will fit in a int64 */

1830static int64

1831 next_pow2_int64(int64 num)

1832{

1833 /* my_log2's internal range check is sufficient */

1834 return 1L << my_log2(num);

1835}

1836

1837/* calculate first power of 2 >= num, bounded to what will fit in an int */

1838static int

1839 next_pow2_int(int64 num)

1840{

1841 if (num > INT_MAX / 2)

1842 num = INT_MAX / 2;

1843 return 1 << my_log2(num);

1844}

1845

1846

1847/************************* SEQ SCAN TRACKING ************************/

1848

1849/*

1850 * We track active hash_seq_search scans here. The need for this mechanism

1851 * comes from the fact that a scan will get confused if a bucket split occurs

1852 * while it's in progress: it might visit entries twice, or even miss some

1853 * entirely (if it's partway through the same bucket that splits). Hence

1854 * we want to inhibit bucket splits if there are any active scans on the

1855 * table being inserted into. This is a fairly rare case in current usage,

1856 * so just postponing the split until the next insertion seems sufficient.

1857 *

1858 * Given present usages of the function, only a few scans are likely to be

1859 * open concurrently; so a finite-size stack of open scans seems sufficient,

1860 * and we don't worry that linear search is too slow. Note that we do

1861 * allow multiple scans of the same hashtable to be open concurrently.

1862 *

1863 * This mechanism can support concurrent scan and insertion in a shared

1864 * hashtable if it's the same backend doing both. It would fail otherwise,

1865 * but locking reasons seem to preclude any such scenario anyway, so we don't

1866 * worry.

1867 *

1868 * This arrangement is reasonably robust if a transient hashtable is deleted

1869 * without notifying us. The absolute worst case is we might inhibit splits

1870 * in another table created later at exactly the same address. We will give

1871 * a warning at transaction end for reference leaks, so any bugs leading to

1872 * lack of notification should be easy to catch.

1873 */

1874

1875 #define MAX_SEQ_SCANS 100

1876

1877 static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */

1878 static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */

1879 static int num_seq_scans = 0;

1880

1881

1882/* Register a table as having an active hash_seq_search scan */

1883static void

1884 register_seq_scan(HTAB *hashp)

1885{

1886 if (num_seq_scans >= MAX_SEQ_SCANS)

1887 elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",

1888 hashp->tabname);

1889 seq_scan_tables[num_seq_scans] = hashp;

1890 seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();

1891 num_seq_scans++;

1892}

1893

1894/* Deregister an active scan */

1895static void

1896 deregister_seq_scan(HTAB *hashp)

1897{

1898 int i;

1899

1900 /* Search backward since it's most likely at the stack top */

1901 for (i = num_seq_scans - 1; i >= 0; i--)

1902 {

1903 if (seq_scan_tables[i] == hashp)

1904 {

1905 seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];

1906 seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];

1907 num_seq_scans--;

1908 return;

1909 }

1910 }

1911 elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",

1912 hashp->tabname);

1913}

1914

1915/* Check if a table has any active scan */

1916static bool

1917 has_seq_scans(HTAB *hashp)

1918{

1919 int i;

1920

1921 for (i = 0; i < num_seq_scans; i++)

1922 {

1923 if (seq_scan_tables[i] == hashp)

1924 return true;

1925 }

1926 return false;

1927}

1928

1929/* Clean up any open scans at end of transaction */

1930void

1931 AtEOXact_HashTables(bool isCommit)

1932{

1933 /*

1934 * During abort cleanup, open scans are expected; just silently clean 'em

1935 * out. An open scan at commit means someone forgot a hash_seq_term()

1936 * call, so complain.

1937 *

1938 * Note: it's tempting to try to print the tabname here, but refrain for

1939 * fear of touching deallocated memory. This isn't a user-facing message

1940 * anyway, so it needn't be pretty.

1941 */

1942 if (isCommit)

1943 {

1944 int i;

1945

1946 for (i = 0; i < num_seq_scans; i++)

1947 {

1948 elog(WARNING, "leaked hash_seq_search scan for hash table %p",

1949 seq_scan_tables[i]);

1950 }

1951 }

1952 num_seq_scans = 0;

1953}

1954

1955/* Clean up any open scans at end of subtransaction */

1956void

1957 AtEOSubXact_HashTables(bool isCommit, int nestDepth)

1958{

1959 int i;

1960

1961 /*

1962 * Search backward to make cleanup easy. Note we must check all entries,

1963 * not only those at the end of the array, because deletion technique

1964 * doesn't keep them in order.

1965 */

1966 for (i = num_seq_scans - 1; i >= 0; i--)

1967 {

1968 if (seq_scan_level[i] >= nestDepth)

1969 {

1970 if (isCommit)

1971 elog(WARNING, "leaked hash_seq_search scan for hash table %p",

1972 seq_scan_tables[i]);

1973 seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];

1974 seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];

1975 num_seq_scans--;

1976 }

1977 }

1978}

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:810

pg_noreturn

#define pg_noreturn

Definition: c.h:164

INT64_FORMAT

#define INT64_FORMAT

Definition: c.h:556

int64

int64_t int64

Definition: c.h:535

UINT64_FORMAT

#define UINT64_FORMAT

Definition: c.h:557

PG_INT64_MAX

#define PG_INT64_MAX

Definition: c.h:597

uint64

uint64_t uint64

Definition: c.h:539

uint32

uint32_t uint32

Definition: c.h:538

MemSet

#define MemSet(start, val, len)

Definition: c.h:1019

pg_funcptr_t

void(* pg_funcptr_t)(void)

Definition: c.h:460

Size

size_t Size

Definition: c.h:610

seq_scan_tables

static HTAB * seq_scan_tables[MAX_SEQ_SCANS]

Definition: dynahash.c:1877

seq_scan_level

static int seq_scan_level[MAX_SEQ_SCANS]

Definition: dynahash.c:1878

hash_seq_init_with_hash_value

void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, uint32 hashvalue)

Definition: dynahash.c:1400

hash_search

void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)

Definition: dynahash.c:952

ELEMENT_FROM_KEY

#define ELEMENT_FROM_KEY(key)

Definition: dynahash.c:260

DEF_DIRSIZE

#define DEF_DIRSIZE

Definition: dynahash.c:125

DynaHashAlloc

static void * DynaHashAlloc(Size size)

Definition: dynahash.c:297

element_alloc

static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx)

Definition: dynahash.c:1701

AtEOXact_HashTables

void AtEOXact_HashTables(bool isCommit)

Definition: dynahash.c:1931

hash_estimate_size

Size hash_estimate_size(int64 num_entries, Size entrysize)

Definition: dynahash.c:783

seg_alloc

static HASHSEGMENT seg_alloc(HTAB *hashp)

Definition: dynahash.c:1682

MAX_SEQ_SCANS

#define MAX_SEQ_SCANS

Definition: dynahash.c:1875

CurrentDynaHashCxt

static MemoryContext CurrentDynaHashCxt

Definition: dynahash.c:294

next_pow2_int

static int next_pow2_int(int64 num)

Definition: dynahash.c:1839

choose_nelem_alloc

static int choose_nelem_alloc(Size entrysize)

Definition: dynahash.c:667

hash_create

HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)

Definition: dynahash.c:358

hash_get_shared_size

Size hash_get_shared_size(HASHCTL *info, int flags)

Definition: dynahash.c:854

register_seq_scan

static void register_seq_scan(HTAB *hashp)

Definition: dynahash.c:1884

MOD

#define MOD(x, y)

Definition: dynahash.c:266

IS_PARTITIONED

#define IS_PARTITIONED(hctl)

Definition: dynahash.c:212

DEF_SEGSIZE_SHIFT

#define DEF_SEGSIZE_SHIFT

Definition: dynahash.c:124

AtEOSubXact_HashTables

void AtEOSubXact_HashTables(bool isCommit, int nestDepth)

Definition: dynahash.c:1957

get_hash_entry

static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx)

Definition: dynahash.c:1251

NUM_FREELISTS

#define NUM_FREELISTS

Definition: dynahash.c:128

hash_destroy

void hash_destroy(HTAB *hashp)

Definition: dynahash.c:865

string_compare

static int string_compare(const char *key1, const char *key2, Size keysize)

Definition: dynahash.c:313

hash_select_dirsize

int64 hash_select_dirsize(int64 num_entries)

Definition: dynahash.c:830

hash_search_with_hash_value

void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)

Definition: dynahash.c:965

hash_seq_search

void * hash_seq_search(HASH_SEQ_STATUS *status)

Definition: dynahash.c:1415

expand_table

static bool expand_table(HTAB *hashp)

Definition: dynahash.c:1546

hdefault

static void hdefault(HTAB *hashp)

Definition: dynahash.c:638

deregister_seq_scan

static void deregister_seq_scan(HTAB *hashp)

Definition: dynahash.c:1896

my_log2

static int my_log2(int64 num)

Definition: dynahash.c:1817

ELEMENTKEY

#define ELEMENTKEY(helem)

Definition: dynahash.c:255

hash_seq_term

void hash_seq_term(HASH_SEQ_STATUS *status)

Definition: dynahash.c:1509

hash_get_num_entries

int64 hash_get_num_entries(HTAB *hashp)

Definition: dynahash.c:1336

DEF_SEGSIZE

#define DEF_SEGSIZE

Definition: dynahash.c:123

num_seq_scans

static int num_seq_scans

Definition: dynahash.c:1879

next_pow2_int64

static int64 next_pow2_int64(int64 num)

Definition: dynahash.c:1831

FREELIST_IDX

#define FREELIST_IDX(hctl, hashcode)

Definition: dynahash.c:214

hash_stats

void hash_stats(const char *caller, HTAB *hashp)

Definition: dynahash.c:884

init_htab

static bool init_htab(HTAB *hashp, int64 nelem)

Definition: dynahash.c:700

hash_corrupted

static pg_noreturn void hash_corrupted(HTAB *hashp)

Definition: dynahash.c:1803

hash_freeze

void hash_freeze(HTAB *hashp)

Definition: dynahash.c:1529

dir_realloc

static bool dir_realloc(HTAB *hashp)

Definition: dynahash.c:1643

hash_update_hash_key

bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr)

Definition: dynahash.c:1140

hash_initial_lookup

static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)

Definition: dynahash.c:1779

HASHBUCKET

HASHELEMENT * HASHBUCKET

Definition: dynahash.c:131

get_hash_value

uint32 get_hash_value(HTAB *hashp, const void *keyPtr)

Definition: dynahash.c:908

calc_bucket

static uint32 calc_bucket(HASHHDR *hctl, uint32 hash_val)

Definition: dynahash.c:915

has_seq_scans

static bool has_seq_scans(HTAB *hashp)

Definition: dynahash.c:1917

hash_seq_init

void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)

Definition: dynahash.c:1380

HASHSEGMENT

HASHBUCKET * HASHSEGMENT

Definition: dynahash.c:134

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

FATAL

#define FATAL

Definition: elog.h:41

WARNING

#define WARNING

Definition: elog.h:36

PANIC

#define PANIC

Definition: elog.h:42

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ereport

#define ereport(elevel,...)

Definition: elog.h:150

DEBUG4

#define DEBUG4

Definition: elog.h:27

MCXT_ALLOC_NO_OOM

#define MCXT_ALLOC_NO_OOM

Definition: fe_memutils.h:29

tag_hash

uint32 tag_hash(const void *key, Size keysize)

Definition: hashfn.c:677

uint32_hash

uint32 uint32_hash(const void *key, Size keysize)

Definition: hashfn.c:688

string_hash

uint32 string_hash(const void *key, Size keysize)

Definition: hashfn.c:660

hashfn.h

Assert

Assert(PointerIsAligned(start, uint64))

HASH_KEYCOPY

#define HASH_KEYCOPY

Definition: hsearch.h:100

HashAllocFunc

void *(* HashAllocFunc)(Size request)

Definition: hsearch.h:44

HASH_STRINGS

#define HASH_STRINGS

Definition: hsearch.h:96

HashCompareFunc

int(* HashCompareFunc)(const void *key1, const void *key2, Size keysize)

Definition: hsearch.h:29

HASHACTION

Definition: hsearch.h:112

HASH_FIND

@ HASH_FIND

Definition: hsearch.h:113

HASH_REMOVE

@ HASH_REMOVE

Definition: hsearch.h:115

HASH_ENTER

@ HASH_ENTER

Definition: hsearch.h:114

HASH_ENTER_NULL

@ HASH_ENTER_NULL

Definition: hsearch.h:116

HASH_CONTEXT

#define HASH_CONTEXT

Definition: hsearch.h:102

NO_MAX_DSIZE

#define NO_MAX_DSIZE

Definition: hsearch.h:108

HASH_ELEM

#define HASH_ELEM

Definition: hsearch.h:95

HASH_ALLOC

#define HASH_ALLOC

Definition: hsearch.h:101

HASH_DIRSIZE

#define HASH_DIRSIZE

Definition: hsearch.h:94

HashValueFunc

uint32(* HashValueFunc)(const void *key, Size keysize)

Definition: hsearch.h:21

HASH_SEGMENT

#define HASH_SEGMENT

Definition: hsearch.h:93

HASH_ATTACH

#define HASH_ATTACH

Definition: hsearch.h:104

HASH_COMPARE

#define HASH_COMPARE

Definition: hsearch.h:99

HASHHDR

struct HASHHDR HASHHDR

Definition: hsearch.h:58

HASH_FUNCTION

#define HASH_FUNCTION

Definition: hsearch.h:98

HASH_BLOBS

#define HASH_BLOBS

Definition: hsearch.h:97

HASH_SHARED_MEM

#define HASH_SHARED_MEM

Definition: hsearch.h:103

HASH_FIXED_SIZE

#define HASH_FIXED_SIZE

Definition: hsearch.h:105

HASH_PARTITION

#define HASH_PARTITION

Definition: hsearch.h:92

HashCopyFunc

void *(* HashCopyFunc)(void *dest, const void *src, Size keysize)

Definition: hsearch.h:37

ilist.h

slist_push_head

static void slist_push_head(slist_head *head, slist_node *node)

Definition: ilist.h:1006

i

int i

Definition: isn.c:77

MemoryContextAlloc

void * MemoryContextAlloc(MemoryContext context, Size size)

Definition: mcxt.c:1229

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

TopMemoryContext

MemoryContext TopMemoryContext

Definition: mcxt.c:166

MemoryContextAllocExtended

void * MemoryContextAllocExtended(MemoryContext context, Size size, int flags)

Definition: mcxt.c:1286

MemoryContextDelete

void MemoryContextDelete(MemoryContext context)

Definition: mcxt.c:469

MemoryContextSetIdentifier

void MemoryContextSetIdentifier(MemoryContext context, const char *id)

Definition: mcxt.c:658

MemoryContextIsValid

#define MemoryContextIsValid(context)

Definition: memnodes.h:145

memutils.h

AllocSetContextCreate

#define AllocSetContextCreate

Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES

#define ALLOCSET_DEFAULT_SIZES

Definition: memutils.h:160

generate_unaccent_rules.action

action

Definition: generate_unaccent_rules.py:287

pg_bitutils.h

pg_ceil_log2_64

static uint64 pg_ceil_log2_64(uint64 num)

Definition: pg_bitutils.h:271

strlcpy

size_t strlcpy(char *dst, const char *src, size_t siz)

Definition: strlcpy.c:45

postgres.h

add_size

Size add_size(Size s1, Size s2)

Definition: shmem.c:493

mul_size

Size mul_size(Size s1, Size s2)

Definition: shmem.c:510

shmem.h

spin.h

SpinLockInit

#define SpinLockInit(lock)

Definition: spin.h:57

SpinLockRelease

#define SpinLockRelease(lock)

Definition: spin.h:61

SpinLockAcquire

#define SpinLockAcquire(lock)

Definition: spin.h:59

FreeListData

Definition: dynahash.c:154

FreeListData::mutex

slock_t mutex

Definition: dynahash.c:155

FreeListData::nentries

int64 nentries

Definition: dynahash.c:156

FreeListData::freeList

HASHELEMENT * freeList

Definition: dynahash.c:157

HASHCTL

Definition: hsearch.h:66

HASHCTL::alloc

HashAllocFunc alloc

Definition: hsearch.h:84

HASHCTL::keysize

Size keysize

Definition: hsearch.h:75

HASHCTL::max_dsize

int64 max_dsize

Definition: hsearch.h:73

HASHCTL::hash

HashValueFunc hash

Definition: hsearch.h:78

HASHCTL::entrysize

Size entrysize

Definition: hsearch.h:76

HASHCTL::match

HashCompareFunc match

Definition: hsearch.h:80

HASHCTL::hctl

HASHHDR * hctl

Definition: hsearch.h:88

HASHCTL::ssize

int64 ssize

Definition: hsearch.h:70

HASHCTL::hcxt

MemoryContext hcxt

Definition: hsearch.h:86

HASHCTL::num_partitions

int64 num_partitions

Definition: hsearch.h:68

HASHCTL::dsize

int64 dsize

Definition: hsearch.h:72

HASHCTL::keycopy

HashCopyFunc keycopy

Definition: hsearch.h:82

HASHELEMENT

Definition: hsearch.h:52

HASHELEMENT::link

struct HASHELEMENT * link

Definition: hsearch.h:53

HASHELEMENT::hashvalue

uint32 hashvalue

Definition: hsearch.h:54

HASHHDR

Definition: dynahash.c:169

HASHHDR::high_mask

uint32 high_mask

Definition: dynahash.c:187

HASHHDR::freeList

FreeListData freeList[NUM_FREELISTS]

Definition: dynahash.c:180

HASHHDR::entrysize

Size entrysize

Definition: dynahash.c:192

HASHHDR::max_bucket

uint32 max_bucket

Definition: dynahash.c:186

HASHHDR::dsize

int64 dsize

Definition: dynahash.c:184

HASHHDR::max_dsize

int64 max_dsize

Definition: dynahash.c:194

HASHHDR::ssize

int64 ssize

Definition: dynahash.c:195

HASHHDR::keysize

Size keysize

Definition: dynahash.c:191

HASHHDR::isfixed

bool isfixed

Definition: dynahash.c:198

HASHHDR::nelem_alloc

int nelem_alloc

Definition: dynahash.c:197

HASHHDR::num_partitions

int64 num_partitions

Definition: dynahash.c:193

HASHHDR::low_mask

uint32 low_mask

Definition: dynahash.c:188

HASHHDR::nsegs

int64 nsegs

Definition: dynahash.c:185

HASHHDR::sshift

int sshift

Definition: dynahash.c:196

HASH_SEQ_STATUS

Definition: hsearch.h:121

HASH_SEQ_STATUS::hashvalue

uint32 hashvalue

Definition: hsearch.h:126

HASH_SEQ_STATUS::curEntry

HASHELEMENT * curEntry

Definition: hsearch.h:124

HASH_SEQ_STATUS::curBucket

uint32 curBucket

Definition: hsearch.h:123

HASH_SEQ_STATUS::hashp

HTAB * hashp

Definition: hsearch.h:122

HASH_SEQ_STATUS::hasHashvalue

bool hasHashvalue

Definition: hsearch.h:125

HTAB

Definition: dynahash.c:222

HTAB::isshared

bool isshared

Definition: dynahash.c:231

HTAB::match

HashCompareFunc match

Definition: dynahash.c:226

HTAB::tabname

char * tabname

Definition: dynahash.c:230

HTAB::hctl

HASHHDR * hctl

Definition: dynahash.c:223

HTAB::hcxt

MemoryContext hcxt

Definition: dynahash.c:229

HTAB::alloc

HashAllocFunc alloc

Definition: dynahash.c:228

HTAB::hash

HashValueFunc hash

Definition: dynahash.c:225

HTAB::ssize

int64 ssize

Definition: dynahash.c:238

HTAB::dir

HASHSEGMENT * dir

Definition: dynahash.c:224

HTAB::keysize

Size keysize

Definition: dynahash.c:237

HTAB::sshift

int sshift

Definition: dynahash.c:239

HTAB::keycopy

HashCopyFunc keycopy

Definition: dynahash.c:227

HTAB::frozen

bool frozen

Definition: dynahash.c:234

MemoryContextData

Definition: memnodes.h:118

slist_head

Definition: ilist.h:237

slist_node

Definition: ilist.h:225

GetCurrentTransactionNestLevel

int GetCurrentTransactionNestLevel(void)

Definition: xact.c:929

xact.h

PostgreSQL Source Code: src/backend/utils/hash/dynahash.c Source File