[フレーム]

hashfn.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 *

3 * hashfn.c

4 * Generic hashing functions, and hash functions for use in dynahash.c

5 * hashtables

6 *

7 *

10 *

11 *

12 * IDENTIFICATION

13 * src/common/hashfn.c

14 *

15 * NOTES

16 * It is expected that every bit of a hash function's 32-bit result is

17 * as random as every other; failure to ensure this is likely to lead

18 * to poor performance of hash tables. In most cases a hash

19 * function should use hash_bytes() or its variant hash_bytes_uint32(),

20 * or the wrappers hash_any() and hash_uint32 defined in hashfn.h.

21 *

22 *-------------------------------------------------------------------------

23 */

24#include "postgres.h"

25

26#include "common/hashfn.h"

27#include "port/pg_bitutils.h"

28

29

30/*

31 * This hash function was written by Bob Jenkins

32 * (bob_jenkins@burtleburtle.net), and superficially adapted

33 * for PostgreSQL by Neil Conway. For more information on this

34 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,

35 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.

36 *

37 * In the current code, we have adopted Bob's 2006 update of his hash

38 * function to fetch the data a word at a time when it is suitably aligned.

39 * This makes for a useful speedup, at the cost of having to maintain

40 * four code paths (aligned vs unaligned, and little-endian vs big-endian).

41 * It also uses two separate mixing functions mix() and final(), instead

42 * of a slower multi-purpose function.

43 */

44

45/* Get a bit mask of the bits set in non-uint32 aligned addresses */

46 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)

47

48 #define rot(x,k) pg_rotate_left32(x, k)

49

50/*----------

51 * mix -- mix 3 32-bit values reversibly.

52 *

53 * This is reversible, so any information in (a,b,c) before mix() is

54 * still in (a,b,c) after mix().

55 *

56 * If four pairs of (a,b,c) inputs are run through mix(), or through

57 * mix() in reverse, there are at least 32 bits of the output that

58 * are sometimes the same for one pair and different for another pair.

59 * This was tested for:

60 * * pairs that differed by one bit, by two bits, in any combination

61 * of top bits of (a,b,c), or in any combination of bottom bits of

62 * (a,b,c).

63 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed

64 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

65 * is commonly produced by subtraction) look like a single 1-bit

66 * difference.

67 * * the base values were pseudorandom, all zero but one bit set, or

68 * all zero plus a counter that starts at zero.

69 *

70 * This does not achieve avalanche. There are input bits of (a,b,c)

71 * that fail to affect some output bits of (a,b,c), especially of a. The

72 * most thoroughly mixed value is c, but it doesn't really even achieve

73 * avalanche in c.

74 *

75 * This allows some parallelism. Read-after-writes are good at doubling

76 * the number of bits affected, so the goal of mixing pulls in the opposite

77 * direction from the goal of parallelism. I did what I could. Rotates

78 * seem to cost as much as shifts on every machine I could lay my hands on,

79 * and rotates are much kinder to the top and bottom bits, so I used rotates.

80 *----------

81 */

82 #define mix(a,b,c) \

83{ \

84 a -= c; a ^= rot(c, 4); c += b; \

85 b -= a; b ^= rot(a, 6); a += c; \

86 c -= b; c ^= rot(b, 8); b += a; \

87 a -= c; a ^= rot(c,16); c += b; \

88 b -= a; b ^= rot(a,19); a += c; \

89 c -= b; c ^= rot(b, 4); b += a; \

90}

91

92/*----------

93 * final -- final mixing of 3 32-bit values (a,b,c) into c

94 *

95 * Pairs of (a,b,c) values differing in only a few bits will usually

96 * produce values of c that look totally different. This was tested for

97 * * pairs that differed by one bit, by two bits, in any combination

98 * of top bits of (a,b,c), or in any combination of bottom bits of

99 * (a,b,c).

100 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed

101 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

102 * is commonly produced by subtraction) look like a single 1-bit

103 * difference.

104 * * the base values were pseudorandom, all zero but one bit set, or

105 * all zero plus a counter that starts at zero.

106 *

107 * The use of separate functions for mix() and final() allow for a

108 * substantial performance increase since final() does not need to

109 * do well in reverse, but is does need to affect all output bits.

110 * mix(), on the other hand, does not need to affect all output

111 * bits (affecting 32 bits is enough). The original hash function had

112 * a single mixing operation that had to satisfy both sets of requirements

113 * and was slower as a result.

114 *----------

115 */

116 #define final(a,b,c) \

117{ \

118 c ^= b; c -= rot(b,14); \

119 a ^= c; a -= rot(c,11); \

120 b ^= a; b -= rot(a,25); \

121 c ^= b; c -= rot(b,16); \

122 a ^= c; a -= rot(c, 4); \

123 b ^= a; b -= rot(a,14); \

124 c ^= b; c -= rot(b,24); \

125}

126

127/*

128 * hash_bytes() -- hash a variable-length key into a 32-bit value

129 * k : the key (the unaligned variable-length array of bytes)

130 * len : the length of the key, counting by bytes

131 *

132 * Returns a uint32 value. Every bit of the key affects every bit of

133 * the return value. Every 1-bit and 2-bit delta achieves avalanche.

134 * About 6*len+35 instructions. The best hash table sizes are powers

135 * of 2. There is no need to do mod a prime (mod is sooo slow!).

136 * If you need less than 32 bits, use a bitmask.

137 *

138 * This procedure must never throw elog(ERROR); the ResourceOwner code

139 * relies on this not to fail.

140 *

141 * Note: we could easily change this function to return a 64-bit hash value

142 * by using the final values of both b and c. b is perhaps a little less

143 * well mixed than c, however.

144 */

145uint32

146 hash_bytes(const unsigned char *k, int keylen)

147{

148 uint32 a,

149 b,

150 c,

151 len;

152

153 /* Set up the internal state */

154 len = keylen;

155 a = b = c = 0x9e3779b9 + len + 3923095;

156

157 /* If the source pointer is word-aligned, we use word-wide fetches */

158 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)

159 {

160 /* Code path for aligned source data */

161 const uint32 *ka = (const uint32 *) k;

162

163 /* handle most of the key */

164 while (len >= 12)

165 {

166 a += ka[0];

167 b += ka[1];

168 c += ka[2];

169 mix(a, b, c);

170 ka += 3;

171 len -= 12;

172 }

173

174 /* handle the last 11 bytes */

175 k = (const unsigned char *) ka;

176#ifdef WORDS_BIGENDIAN

177 switch (len)

178 {

179 case 11:

180 c += ((uint32) k[10] << 8);

181 /* fall through */

182 case 10:

183 c += ((uint32) k[9] << 16);

184 /* fall through */

185 case 9:

186 c += ((uint32) k[8] << 24);

187 /* fall through */

188 case 8:

189 /* the lowest byte of c is reserved for the length */

190 b += ka[1];

191 a += ka[0];

192 break;

193 case 7:

194 b += ((uint32) k[6] << 8);

195 /* fall through */

196 case 6:

197 b += ((uint32) k[5] << 16);

198 /* fall through */

199 case 5:

200 b += ((uint32) k[4] << 24);

201 /* fall through */

202 case 4:

203 a += ka[0];

204 break;

205 case 3:

206 a += ((uint32) k[2] << 8);

207 /* fall through */

208 case 2:

209 a += ((uint32) k[1] << 16);

210 /* fall through */

211 case 1:

212 a += ((uint32) k[0] << 24);

213 /* case 0: nothing left to add */

214 }

215#else /* !WORDS_BIGENDIAN */

216 switch (len)

217 {

218 case 11:

219 c += ((uint32) k[10] << 24);

220 /* fall through */

221 case 10:

222 c += ((uint32) k[9] << 16);

223 /* fall through */

224 case 9:

225 c += ((uint32) k[8] << 8);

226 /* fall through */

227 case 8:

228 /* the lowest byte of c is reserved for the length */

229 b += ka[1];

230 a += ka[0];

231 break;

232 case 7:

233 b += ((uint32) k[6] << 16);

234 /* fall through */

235 case 6:

236 b += ((uint32) k[5] << 8);

237 /* fall through */

238 case 5:

239 b += k[4];

240 /* fall through */

241 case 4:

242 a += ka[0];

243 break;

244 case 3:

245 a += ((uint32) k[2] << 16);

246 /* fall through */

247 case 2:

248 a += ((uint32) k[1] << 8);

249 /* fall through */

250 case 1:

251 a += k[0];

252 /* case 0: nothing left to add */

253 }

254#endif /* WORDS_BIGENDIAN */

255 }

256 else

257 {

258 /* Code path for non-aligned source data */

259

260 /* handle most of the key */

261 while (len >= 12)

262 {

263#ifdef WORDS_BIGENDIAN

264 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));

265 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));

266 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));

267#else /* !WORDS_BIGENDIAN */

268 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));

269 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));

270 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));

271#endif /* WORDS_BIGENDIAN */

272 mix(a, b, c);

273 k += 12;

274 len -= 12;

275 }

276

277 /* handle the last 11 bytes */

278#ifdef WORDS_BIGENDIAN

279 switch (len)

280 {

281 case 11:

282 c += ((uint32) k[10] << 8);

283 /* fall through */

284 case 10:

285 c += ((uint32) k[9] << 16);

286 /* fall through */

287 case 9:

288 c += ((uint32) k[8] << 24);

289 /* fall through */

290 case 8:

291 /* the lowest byte of c is reserved for the length */

292 b += k[7];

293 /* fall through */

294 case 7:

295 b += ((uint32) k[6] << 8);

296 /* fall through */

297 case 6:

298 b += ((uint32) k[5] << 16);

299 /* fall through */

300 case 5:

301 b += ((uint32) k[4] << 24);

302 /* fall through */

303 case 4:

304 a += k[3];

305 /* fall through */

306 case 3:

307 a += ((uint32) k[2] << 8);

308 /* fall through */

309 case 2:

310 a += ((uint32) k[1] << 16);

311 /* fall through */

312 case 1:

313 a += ((uint32) k[0] << 24);

314 /* case 0: nothing left to add */

315 }

316#else /* !WORDS_BIGENDIAN */

317 switch (len)

318 {

319 case 11:

320 c += ((uint32) k[10] << 24);

321 /* fall through */

322 case 10:

323 c += ((uint32) k[9] << 16);

324 /* fall through */

325 case 9:

326 c += ((uint32) k[8] << 8);

327 /* fall through */

328 case 8:

329 /* the lowest byte of c is reserved for the length */

330 b += ((uint32) k[7] << 24);

331 /* fall through */

332 case 7:

333 b += ((uint32) k[6] << 16);

334 /* fall through */

335 case 6:

336 b += ((uint32) k[5] << 8);

337 /* fall through */

338 case 5:

339 b += k[4];

340 /* fall through */

341 case 4:

342 a += ((uint32) k[3] << 24);

343 /* fall through */

344 case 3:

345 a += ((uint32) k[2] << 16);

346 /* fall through */

347 case 2:

348 a += ((uint32) k[1] << 8);

349 /* fall through */

350 case 1:

351 a += k[0];

352 /* case 0: nothing left to add */

353 }

354#endif /* WORDS_BIGENDIAN */

355 }

356

357 final(a, b, c);

358

359 /* report the result */

360 return c;

361}

362

363/*

364 * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed

365 * k : the key (the unaligned variable-length array of bytes)

366 * len : the length of the key, counting by bytes

367 * seed : a 64-bit seed (0 means no seed)

368 *

369 * Returns a uint64 value. Otherwise similar to hash_bytes.

370 */

371uint64

372 hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)

373{

374 uint32 a,

375 b,

376 c,

377 len;

378

379 /* Set up the internal state */

380 len = keylen;

381 a = b = c = 0x9e3779b9 + len + 3923095;

382

383 /* If the seed is non-zero, use it to perturb the internal state. */

384 if (seed != 0)

385 {

386 /*

387 * In essence, the seed is treated as part of the data being hashed,

388 * but for simplicity, we pretend that it's padded with four bytes of

389 * zeroes so that the seed constitutes a 12-byte chunk.

390 */

391 a += (uint32) (seed >> 32);

392 b += (uint32) seed;

393 mix(a, b, c);

394 }

395

396 /* If the source pointer is word-aligned, we use word-wide fetches */

397 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)

398 {

399 /* Code path for aligned source data */

400 const uint32 *ka = (const uint32 *) k;

401

402 /* handle most of the key */

403 while (len >= 12)

404 {

405 a += ka[0];

406 b += ka[1];

407 c += ka[2];

408 mix(a, b, c);

409 ka += 3;

410 len -= 12;

411 }

412

413 /* handle the last 11 bytes */

414 k = (const unsigned char *) ka;

415#ifdef WORDS_BIGENDIAN

416 switch (len)

417 {

418 case 11:

419 c += ((uint32) k[10] << 8);

420 /* fall through */

421 case 10:

422 c += ((uint32) k[9] << 16);

423 /* fall through */

424 case 9:

425 c += ((uint32) k[8] << 24);

426 /* fall through */

427 case 8:

428 /* the lowest byte of c is reserved for the length */

429 b += ka[1];

430 a += ka[0];

431 break;

432 case 7:

433 b += ((uint32) k[6] << 8);

434 /* fall through */

435 case 6:

436 b += ((uint32) k[5] << 16);

437 /* fall through */

438 case 5:

439 b += ((uint32) k[4] << 24);

440 /* fall through */

441 case 4:

442 a += ka[0];

443 break;

444 case 3:

445 a += ((uint32) k[2] << 8);

446 /* fall through */

447 case 2:

448 a += ((uint32) k[1] << 16);

449 /* fall through */

450 case 1:

451 a += ((uint32) k[0] << 24);

452 /* case 0: nothing left to add */

453 }

454#else /* !WORDS_BIGENDIAN */

455 switch (len)

456 {

457 case 11:

458 c += ((uint32) k[10] << 24);

459 /* fall through */

460 case 10:

461 c += ((uint32) k[9] << 16);

462 /* fall through */

463 case 9:

464 c += ((uint32) k[8] << 8);

465 /* fall through */

466 case 8:

467 /* the lowest byte of c is reserved for the length */

468 b += ka[1];

469 a += ka[0];

470 break;

471 case 7:

472 b += ((uint32) k[6] << 16);

473 /* fall through */

474 case 6:

475 b += ((uint32) k[5] << 8);

476 /* fall through */

477 case 5:

478 b += k[4];

479 /* fall through */

480 case 4:

481 a += ka[0];

482 break;

483 case 3:

484 a += ((uint32) k[2] << 16);

485 /* fall through */

486 case 2:

487 a += ((uint32) k[1] << 8);

488 /* fall through */

489 case 1:

490 a += k[0];

491 /* case 0: nothing left to add */

492 }

493#endif /* WORDS_BIGENDIAN */

494 }

495 else

496 {

497 /* Code path for non-aligned source data */

498

499 /* handle most of the key */

500 while (len >= 12)

501 {

502#ifdef WORDS_BIGENDIAN

503 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));

504 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));

505 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));

506#else /* !WORDS_BIGENDIAN */

507 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));

508 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));

509 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));

510#endif /* WORDS_BIGENDIAN */

511 mix(a, b, c);

512 k += 12;

513 len -= 12;

514 }

515

516 /* handle the last 11 bytes */

517#ifdef WORDS_BIGENDIAN

518 switch (len)

519 {

520 case 11:

521 c += ((uint32) k[10] << 8);

522 /* fall through */

523 case 10:

524 c += ((uint32) k[9] << 16);

525 /* fall through */

526 case 9:

527 c += ((uint32) k[8] << 24);

528 /* fall through */

529 case 8:

530 /* the lowest byte of c is reserved for the length */

531 b += k[7];

532 /* fall through */

533 case 7:

534 b += ((uint32) k[6] << 8);

535 /* fall through */

536 case 6:

537 b += ((uint32) k[5] << 16);

538 /* fall through */

539 case 5:

540 b += ((uint32) k[4] << 24);

541 /* fall through */

542 case 4:

543 a += k[3];

544 /* fall through */

545 case 3:

546 a += ((uint32) k[2] << 8);

547 /* fall through */

548 case 2:

549 a += ((uint32) k[1] << 16);

550 /* fall through */

551 case 1:

552 a += ((uint32) k[0] << 24);

553 /* case 0: nothing left to add */

554 }

555#else /* !WORDS_BIGENDIAN */

556 switch (len)

557 {

558 case 11:

559 c += ((uint32) k[10] << 24);

560 /* fall through */

561 case 10:

562 c += ((uint32) k[9] << 16);

563 /* fall through */

564 case 9:

565 c += ((uint32) k[8] << 8);

566 /* fall through */

567 case 8:

568 /* the lowest byte of c is reserved for the length */

569 b += ((uint32) k[7] << 24);

570 /* fall through */

571 case 7:

572 b += ((uint32) k[6] << 16);

573 /* fall through */

574 case 6:

575 b += ((uint32) k[5] << 8);

576 /* fall through */

577 case 5:

578 b += k[4];

579 /* fall through */

580 case 4:

581 a += ((uint32) k[3] << 24);

582 /* fall through */

583 case 3:

584 a += ((uint32) k[2] << 16);

585 /* fall through */

586 case 2:

587 a += ((uint32) k[1] << 8);

588 /* fall through */

589 case 1:

590 a += k[0];

591 /* case 0: nothing left to add */

592 }

593#endif /* WORDS_BIGENDIAN */

594 }

595

596 final(a, b, c);

597

598 /* report the result */

599 return ((uint64) b << 32) | c;

600}

601

602/*

603 * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value

604 *

605 * This has the same result as

606 * hash_bytes(&k, sizeof(uint32))

607 * but is faster and doesn't force the caller to store k into memory.

608 */

609uint32

610 hash_bytes_uint32(uint32 k)

611{

612 uint32 a,

613 b,

614 c;

615

616 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

617 a += k;

618

619 final(a, b, c);

620

621 /* report the result */

622 return c;

623}

624

625/*

626 * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed

627 *

628 * Like hash_bytes_uint32, this is a convenience function.

629 */

630uint64

631 hash_bytes_uint32_extended(uint32 k, uint64 seed)

632{

633 uint32 a,

634 b,

635 c;

636

637 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

638

639 if (seed != 0)

640 {

641 a += (uint32) (seed >> 32);

642 b += (uint32) seed;

643 mix(a, b, c);

644 }

645

646 a += k;

647

648 final(a, b, c);

649

650 /* report the result */

651 return ((uint64) b << 32) | c;

652}

653

654/*

655 * string_hash: hash function for keys that are NUL-terminated strings.

656 *

657 * NOTE: this is the default hash function if none is specified.

658 */

659uint32

660 string_hash(const void *key, Size keysize)

661{

662 /*

663 * If the string exceeds keysize-1 bytes, we want to hash only that many,

664 * because when it is copied into the hash table it will be truncated at

665 * that length.

666 */

667 Size s_len = strlen((const char *) key);

668

669 s_len = Min(s_len, keysize - 1);

670 return hash_bytes((const unsigned char *) key, (int) s_len);

671}

672

673/*

674 * tag_hash: hash function for fixed-size tag values

675 */

676uint32

677 tag_hash(const void *key, Size keysize)

678{

679 return hash_bytes((const unsigned char *) key, (int) keysize);

680}

681

682/*

683 * uint32_hash: hash function for keys that are uint32 or int32

684 *

685 * (tag_hash works for this case too, but is slower)

686 */

687uint32

688 uint32_hash(const void *key, Size keysize)

689{

690 Assert(keysize == sizeof(uint32));

691 return hash_bytes_uint32(*((const uint32 *) key));

692}

Min

#define Min(x, y)

Definition: c.h:1003

uint64

uint64_t uint64

Definition: c.h:539

uint32

uint32_t uint32

Definition: c.h:538

Size

size_t Size

Definition: c.h:610

hash_bytes_uint32

uint32 hash_bytes_uint32(uint32 k)

Definition: hashfn.c:610

hash_bytes_extended

uint64 hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)

Definition: hashfn.c:372

hash_bytes

uint32 hash_bytes(const unsigned char *k, int keylen)

Definition: hashfn.c:146

tag_hash

uint32 tag_hash(const void *key, Size keysize)

Definition: hashfn.c:677

hash_bytes_uint32_extended

uint64 hash_bytes_uint32_extended(uint32 k, uint64 seed)

Definition: hashfn.c:631

UINT32_ALIGN_MASK

#define UINT32_ALIGN_MASK

Definition: hashfn.c:46

uint32_hash

uint32 uint32_hash(const void *key, Size keysize)

Definition: hashfn.c:688

string_hash

uint32 string_hash(const void *key, Size keysize)

Definition: hashfn.c:660

mix

#define mix(a, b, c)

Definition: hashfn.c:82

hashfn.h

Assert

Assert(PointerIsAligned(start, uint64))

b

int b

Definition: isn.c:74

a

int a

Definition: isn.c:73

sort-test.key

key

Definition: sort-test.py:19

pg_bitutils.h

len

const void size_t len

Definition: pg_crc32c_sse42.c:28

postgres.h

c

char * c

Definition: preproc-cursor.c:31

PostgreSQL Source Code: src/common/hashfn.c Source File