4 * Building blocks for creating fast inlineable hash functions. The
5 * functions in this file are not guaranteed to be stable between versions,
6 * and may differ by hardware platform. Hence they must not be used in
7 * indexes or other on-disk structures. See hashfn.h if you need stability.
10 * Portions Copyright (c) 2024-2025, PostgreSQL Global Development Group
12 * src/include/common/hashfn_unstable.h
14#ifndef HASHFN_UNSTABLE_H
15#define HASHFN_UNSTABLE_H
19 * fasthash is a modification of code taken from
20 * https://code.google.com/archive/p/fast-hash/source/default/source
21 * under the terms of the MIT license. The original copyright
27 Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
29 Permission is hereby granted, free of charge, to any person
30 obtaining a copy of this software and associated documentation
31 files (the "Software"), to deal in the Software without
32 restriction, including without limitation the rights to use, copy,
33 modify, merge, publish, distribute, sublicense, and/or sell copies
34 of the Software, and to permit persons to whom the Software is
35 furnished to do so, subject to the following conditions:
37 The above copyright notice and this permission notice shall be
38 included in all copies or substantial portions of the Software.
40 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
41 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
43 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
44 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
45 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
46 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
51 * fasthash as implemented here has two interfaces:
53 * 1) Standalone functions, e.g. fasthash32() for a single value with a
54 * known length. These return the same hash code as the original, at
55 * least on little-endian machines.
57 * 2) Incremental interface. This can used for incorporating multiple
58 * inputs. First, initialize the hash state (here with a zero seed):
61 * fasthash_init(&hs, 0);
63 * If the inputs are of types that can be trivially cast to uint64, it's
67 * fasthash_combine(&hs);
69 * fasthash_combine(&hs);
72 * For longer or variable-length input, fasthash_accum() is a more
73 * flexible, but more verbose method. The standalone functions use this
74 * internally, so see fasthash64() for an example of this.
76 * After all inputs have been mixed in, finalize the hash:
78 * hashcode = fasthash_final32(&hs, 0);
80 * The incremental interface allows an optimization for NUL-terminated
83 * len = fasthash_accum_cstring(&hs, str);
84 * hashcode = fasthash_final32(&hs, len);
86 * By handling the terminator on-the-fly, we can avoid needing a strlen()
87 * call to tell us how many bytes to hash. Experimentation has found that
88 * SMHasher fails unless we incorporate the length, so it is passed to
89 * the finalizer as a tweak.
95 /* staging area for chunks of input */
101 #define FH_SIZEOF_ACCUM sizeof(uint64)
105 * Initialize the hash state.
107 * 'seed' can be zero.
113 hs->
hash = seed ^ 0x880355f21e6d1965;
116/* both the finalizer and part of the combining step */
120 h ^= (h >> 23) + tweak;
121 h *= 0x2127599bf4325c37;
126/* combine one chunk of input into the hash */
131 hs->
hash *= 0x880355f21e6d1965;
134/* accumulate up to 8 bytes of input and combine it into the hash */
144 * For consistency, bytewise loads must match the platform's endianness.
146#ifdef WORDS_BIGENDIAN
150 memcpy(&hs->
accum, k, 8);
162 memcpy(&lower_four, k,
sizeof(lower_four));
181 memcpy(&hs->
accum, k, 8);
193 memcpy(&lower_four, k,
sizeof(lower_four));
194 hs->
accum |= lower_four;
214 * Set high bit in lowest byte where the input is zero, from:
215 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
217 #define haszero64(v) \
218 (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
221 * all-purpose workhorse for fasthash_accum_cstring
230 size_t chunk_len = 0;
243 * specialized workhorse for fasthash_accum_cstring
245 * With an aligned pointer, we consume the string a word at a time.
246 * Loading the word containing the NUL terminator cannot segfault since
247 * allocation boundaries are suitably aligned. To keep from setting
248 * off alarms with address sanitizers, exclude this function from
262 * For every chunk of input, check for zero bytes before mixing into the
263 * hash. The chunk with zeros must contain the NUL terminator.
278 /* mix in remaining bytes */
286 * Mix 'str' into the hash state and return the length of the string.
291#if SIZEOF_VOID_P >= 8
294#ifdef USE_ASSERT_CHECKING
303 len = fasthash_accum_cstring_aligned(hs,
str);
308#endif /* SIZEOF_VOID_P */
311 * It's not worth it to try to make the word-at-a-time optimization work
312 * on 32-bit platforms.
320 * 'tweak' is intended to be the input length when the caller doesn't know
321 * the length ahead of time, such as for NUL-terminated strings, otherwise
331 * Reduce a 64-bit hash to a 32-bit hash.
333 * This optional step provides a bit more additional mixing compared to
334 * just taking the lower 32-bits.
340 * Convert the 64-bit hashcode to Fermat residue, which shall retain
341 * information from both the higher and lower parts of hashcode.
343 return h - (h >> 32);
346/* finalize and reduce */
354 * The original fasthash64 function, re-implemented using the incremental
355 * interface. Returns a 64-bit hashcode. 'len' controls not only how
356 * many bytes to hash, but also modifies the internal seed.
357 * 'seed' can be zero.
366 /* re-initialize the seed according to input length */
367 hs.
hash = seed ^ (
len * 0x880355f21e6d1965);
380/* like fasthash64, but returns a 32-bit hashcode */
388 * Convenience function for hashing NUL-terminated strings
399 * Combine string into the hash and save the length for tweaking the final
407#endif /* HASHFN_UNSTABLE_H */
#define PointerIsAligned(pointer, type)
Assert(PointerIsAligned(start, uint64))
static uint32 fasthash_reduce32(uint64 h)
static uint32 fasthash32(const char *k, size_t len, uint64 seed)
pg_attribute_no_sanitize_address() static inline size_t fasthash_accum_cstring_aligned(fasthash_state *hs
static size_t fasthash_accum_cstring(fasthash_state *hs, const char *str)
static uint64 fasthash64(const char *k, size_t len, uint64 seed)
static uint64 fasthash_mix(uint64 h, uint64 tweak)
static uint32 fasthash_final32(fasthash_state *hs, uint64 tweak)
struct fasthash_state fasthash_state
static void fasthash_combine(fasthash_state *hs)
static void fasthash_accum(fasthash_state *hs, const char *k, size_t len)
static uint64 fasthash_final64(fasthash_state *hs, uint64 tweak)
static uint32 hash_string(const char *s)
static size_t fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
static void fasthash_init(fasthash_state *hs, uint64 seed)