1/*-------------------------------------------------------------------------
4 * Miscellaneous functions for bit-wise operations.
7 * Copyright (c) 2019-2025, PostgreSQL Global Development Group
9 * src/include/port/pg_bitutils.h
11 *-------------------------------------------------------------------------
18#define HAVE_BITSCAN_FORWARD
19#define HAVE_BITSCAN_REVERSE
22#if defined(HAVE__BUILTIN_CTZ)
23#define HAVE_BITSCAN_FORWARD
26#if defined(HAVE__BUILTIN_CLZ)
27#define HAVE_BITSCAN_REVERSE
36 * pg_leftmost_one_pos32
37 * Returns the position of the most significant set bit in "word",
38 * measured from the least significant bit. word must not be 0.
43#ifdef HAVE__BUILTIN_CLZ
46 return 31 - __builtin_clz(
word);
47#elif defined(_MSC_VER)
53 non_zero = _BitScanReverse(&result,
word);
60 while ((
word >> shift) == 0)
64#endif /* HAVE__BUILTIN_CLZ */
68 * pg_leftmost_one_pos64
69 * As above, but for a 64-bit word.
74#ifdef HAVE__BUILTIN_CLZ
78 return 63 - __builtin_clzl(
word);
79#elif SIZEOF_LONG_LONG == 8
80 return 63 - __builtin_clzll(
word);
82#error "cannot find integer type of the same size as uint64_t"
85#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
91 non_zero = _BitScanReverse64(&result,
word);
98 while ((
word >> shift) == 0)
102#endif /* HAVE__BUILTIN_CLZ */
106 * pg_rightmost_one_pos32
107 * Returns the position of the least significant set bit in "word",
108 * measured from the least significant bit. word must not be 0.
113#ifdef HAVE__BUILTIN_CTZ
116 return __builtin_ctz(
word);
117#elif defined(_MSC_VER)
118 unsigned long result;
123 non_zero = _BitScanForward(&result,
word);
130 while ((
word & 255) == 0)
137#endif /* HAVE__BUILTIN_CTZ */
141 * pg_rightmost_one_pos64
142 * As above, but for a 64-bit word.
147#ifdef HAVE__BUILTIN_CTZ
151 return __builtin_ctzl(
word);
152#elif SIZEOF_LONG_LONG == 8
153 return __builtin_ctzll(
word);
155#error "cannot find integer type of the same size as uint64_t"
158#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
159 unsigned long result;
164 non_zero = _BitScanForward64(&result,
word);
171 while ((
word & 255) == 0)
178#endif /* HAVE__BUILTIN_CTZ */
183 * Returns the next higher power of 2 above 'num', or 'num' if it's
184 * already a power of 2.
186 * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1.
194 * A power 2 number has only 1 bit set. Subtracting 1 from such a number
195 * will turn on all previous bits resulting in no common bits being set
196 * between num and num-1.
198 if ((num & (num - 1)) == 0)
199 return num;
/* already power 2 */
206 * Returns the next higher power of 2 above 'num', or 'num' if it's
207 * already a power of 2.
209 * 'num' mustn't be 0 or be above PG_UINT64_MAX / 2 + 1.
217 * A power 2 number has only 1 bit set. Subtracting 1 from such a number
218 * will turn on all previous bits resulting in no common bits being set
219 * between num and num-1.
221 if ((num & (num - 1)) == 0)
222 return num;
/* already power 2 */
229 * Returns the next lower power of 2 below 'num', or 'num' if it's
230 * already a power of 2.
232 * 'num' mustn't be 0.
242 * Returns the next lower power of 2 below 'num', or 'num' if it's
243 * already a power of 2.
245 * 'num' mustn't be 0.
255 * Returns equivalent of ceil(log2(num))
268 * Returns equivalent of ceil(log2(num))
280 * With MSVC on x86_64 builds, try using native popcnt instructions via the
281 * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
282 * __builtin_popcount* intrinsic functions as they always emit popcnt
285#if defined(_MSC_VER) && defined(_M_AMD64)
286#define HAVE_X86_64_POPCNTQ
290 * On x86_64, we can use the hardware popcount instruction, but only if
291 * we can verify that the CPU supports it via the cpuid instruction.
293 * Otherwise, we fall back to a hand-rolled implementation.
295#ifdef HAVE_X86_64_POPCNTQ
296#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
297#define TRY_POPCNT_X86_64 1
302 * On AArch64, we can use Neon instructions if the compiler provides access to
303 * them (as indicated by __ARM_NEON). As in simd.h, we assume that all
304 * available 64-bit hardware has Neon support.
306#if defined(__aarch64__) && defined(__ARM_NEON)
307#define POPCNT_AARCH64 1
310#ifdef TRY_POPCNT_X86_64
311/* Attempt to use the POPCNT instruction, but perform a runtime check first */
318 * We can also try to use the AVX-512 popcount instruction on some systems.
319 * The implementation of that is located in its own file.
321#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
322extern bool pg_popcount_avx512_available(
void);
323extern uint64 pg_popcount_avx512(
const char *
buf,
int bytes);
324extern uint64 pg_popcount_masked_avx512(
const char *
buf,
int bytes,
bits8 mask);
328/* Use the Neon version of pg_popcount{32,64} without function pointer. */
333 * We can try to use an SVE-optimized pg_popcount() on some systems For that,
334 * we do use a function pointer.
336#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK
345/* Use a portable implementation -- no need for a function pointer. */
351#endif /* TRY_POPCNT_X86_64 */
354 * Returns the number of 1-bits in buf.
356 * If there aren't many bytes to process, the function call overhead of the
357 * optimized versions isn't worth taking, so we inline a loop that consults
358 * pg_number_of_ones in that case. If there are many bytes to process, we
359 * accept the function call overhead because the optimized versions are likely
366 * We set the threshold to the point at which we'll first use special
367 * instructions in the optimized version.
369#if SIZEOF_VOID_P >= 8
375 if (bytes < threshold)
388 * Returns the number of 1-bits in buf after applying the mask to each byte.
390 * Similar to pg_popcount(), we only take on the function pointer overhead when
391 * it's likely to be faster.
397 * We set the threshold to the point at which we'll first use special
398 * instructions in the optimized version.
400#if SIZEOF_VOID_P >= 8
406 if (bytes < threshold)
419 * Rotate the bits of "word" to the right/left by n bits.
424 return (
word >> n) | (
word << (32 - n));
430 return (
word << n) | (
word >> (32 - n));
433/* size_t variants of the above, as required */
435#if SIZEOF_SIZE_T == 4
436#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos32
437#define pg_nextpower2_size_t pg_nextpower2_32
438#define pg_prevpower2_size_t pg_prevpower2_32
440 #define pg_leftmost_one_pos_size_t pg_leftmost_one_pos64
441 #define pg_nextpower2_size_t pg_nextpower2_64
442 #define pg_prevpower2_size_t pg_prevpower2_64
445#endif /* PG_BITUTILS_H */
Assert(PointerIsAligned(start, uint64))
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
PGDLLIMPORT const uint8 pg_number_of_ones[256]
static uint32 pg_rotate_left32(uint32 word, int n)
static int pg_rightmost_one_pos64(uint64 word)
static uint32 pg_nextpower2_32(uint32 num)
static uint64 pg_ceil_log2_64(uint64 num)
static uint32 pg_rotate_right32(uint32 word, int n)
uint64 pg_popcount_optimized(const char *buf, int bytes)
int pg_popcount64(uint64 word)
int pg_popcount32(uint32 word)
static int pg_rightmost_one_pos32(uint32 word)
static uint64 pg_nextpower2_64(uint64 num)
static int pg_leftmost_one_pos32(uint32 word)
PGDLLIMPORT const uint8 pg_rightmost_one_pos[256]
static uint32 pg_ceil_log2_32(uint32 num)
static uint64 pg_popcount(const char *buf, int bytes)
static uint32 pg_prevpower2_32(uint32 num)
static uint64 pg_prevpower2_64(uint64 num)
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
PGDLLIMPORT const uint8 pg_leftmost_one_pos[256]
static int pg_leftmost_one_pos64(uint64 word)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)