1/*-------------------------------------------------------------------------
4 * Miscellaneous functions for bit-wise operations.
6 * Copyright (c) 2019-2025, PostgreSQL Global Development Group
9 * src/port/pg_bitutils.c
11 *-------------------------------------------------------------------------
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
82 * Array giving the number of 1-bits in each possible byte value.
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
107 * If we are building the Neon versions, we don't need the "slow" fallbacks.
109#ifndef POPCNT_AARCH64
116#ifdef TRY_POPCNT_X86_64
117static bool pg_popcount_available(
void);
120static uint64 pg_popcount_choose(
const char *
buf,
int bytes);
121static uint64 pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask);
122static inline int pg_popcount32_fast(
uint32 word);
123static inline int pg_popcount64_fast(
uint64 word);
124static uint64 pg_popcount_fast(
const char *
buf,
int bytes);
125static uint64 pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask);
131#endif /* TRY_POPCNT_X86_64 */
133#ifdef TRY_POPCNT_X86_64
136 * Return true if CPUID indicates that the POPCNT instruction is available.
139pg_popcount_available(
void)
141 unsigned int exx[4] = {0, 0, 0, 0};
143#if defined(HAVE__GET_CPUID)
144 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
145#elif defined(HAVE__CPUID)
148#error cpuid instruction not available
151 return (exx[2] & (1 << 23)) != 0;
/* POPCNT */
155 * These functions get called on the first call to pg_popcount32 etc.
156 * They detect whether we can use the asm implementations, and replace
157 * the function pointers so that subsequent calls are routed directly to
158 * the chosen implementation.
161choose_popcount_functions(
void)
163 if (pg_popcount_available())
178#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
179 if (pg_popcount_avx512_available())
190 choose_popcount_functions();
197 choose_popcount_functions();
202pg_popcount_choose(
const char *
buf,
int bytes)
204 choose_popcount_functions();
209pg_popcount_masked_choose(
const char *
buf,
int bytes,
bits8 mask)
211 choose_popcount_functions();
217 * Return the number of 1 bits set in word
223 return __popcnt(
word);
227__asm__ __volatile__(
" popcntl %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
234 * Return the number of 1 bits set in word
240 return __popcnt64(
word);
244__asm__ __volatile__(
" popcntq %1,%0\n":
"=q"(res):
"rm"(
word):
"cc");
251 * Returns the number of 1-bits in buf
254pg_popcount_fast(
const char *
buf,
int bytes)
258#if SIZEOF_VOID_P >= 8
259 /* Process in 64-bit chunks if the buffer is aligned. */
266 popcnt += pg_popcount64_fast(*words++);
270 buf = (
const char *) words;
273 /* Process in 32-bit chunks if the buffer is aligned. */
280 popcnt += pg_popcount32_fast(*words++);
284 buf = (
const char *) words;
288 /* Process any remaining bytes */
296 * pg_popcount_masked_fast
297 * Returns the number of 1-bits in buf after applying the mask to each byte
300pg_popcount_masked_fast(
const char *
buf,
int bytes,
bits8 mask)
304#if SIZEOF_VOID_P >= 8
305 /* Process in 64-bit chunks if the buffer is aligned */
306 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
314 popcnt += pg_popcount64_fast(*words++ & maskv);
318 buf = (
const char *) words;
321 /* Process in 32-bit chunks if the buffer is aligned. */
330 popcnt += pg_popcount32_fast(*words++ & maskv);
334 buf = (
const char *) words;
338 /* Process any remaining bytes */
345#endif /* TRY_POPCNT_X86_64 */
348 * If we are building the Neon versions, we don't need the "slow" fallbacks.
350#ifndef POPCNT_AARCH64
354 * Return the number of 1 bits set in word
359#ifdef HAVE__BUILTIN_POPCOUNT
360 return __builtin_popcount(
word);
361#else /* !HAVE__BUILTIN_POPCOUNT */
371#endif /* HAVE__BUILTIN_POPCOUNT */
376 * Return the number of 1 bits set in word
381#ifdef HAVE__BUILTIN_POPCOUNT
383 return __builtin_popcountl(
word);
384#elif SIZEOF_LONG_LONG == 8
385 return __builtin_popcountll(
word);
387#error "cannot find integer of the same size as uint64_t"
389#else /* !HAVE__BUILTIN_POPCOUNT */
399#endif /* HAVE__BUILTIN_POPCOUNT */
404 * Returns the number of 1-bits in buf
411#if SIZEOF_VOID_P >= 8
412 /* Process in 64-bit chunks if the buffer is aligned. */
423 buf = (
const char *) words;
426 /* Process in 32-bit chunks if the buffer is aligned. */
437 buf = (
const char *) words;
441 /* Process any remaining bytes */
449 * pg_popcount_masked_slow
450 * Returns the number of 1-bits in buf after applying the mask to each byte
457#if SIZEOF_VOID_P >= 8
458 /* Process in 64-bit chunks if the buffer is aligned */
459 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
471 buf = (
const char *) words;
474 /* Process in 32-bit chunks if the buffer is aligned. */
487 buf = (
const char *) words;
491 /* Process any remaining bytes */
498#endif /* ! POPCNT_AARCH64 */
500#if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
503 * When special CPU instructions are not available, there's no point in using
504 * function pointers to vary the implementation between the fast and slow
505 * method. We instead just make these actual external functions. The compiler
506 * should be able to inline the slow versions here.
521 * pg_popcount_optimized
522 * Returns the number of 1-bits in buf
531 * pg_popcount_masked_optimized
532 * Returns the number of 1-bits in buf after applying the mask to each byte
540#endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
#define TYPEALIGN(ALIGNVAL, LEN)
const uint8 pg_number_of_ones[256]
uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
static int pg_popcount32_slow(uint32 word)
uint64 pg_popcount_optimized(const char *buf, int bytes)
int pg_popcount64(uint64 word)
int pg_popcount32(uint32 word)
const uint8 pg_rightmost_one_pos[256]
static int pg_popcount64_slow(uint64 word)
const uint8 pg_leftmost_one_pos[256]
static uint64 pg_popcount_slow(const char *buf, int bytes)
static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
static uint64 pg_popcount_masked(const char *buf, int bytes, bits8 mask)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)