1/*-------------------------------------------------------------------------
4 * PostgreSQL generic bitmap set package
6 * A bitmap set can represent any set of nonnegative integers, although
7 * it is mainly intended for sets where the maximum value is not large,
8 * say at most a few hundred. By convention, we always represent a set with
9 * the minimum possible number of words, i.e, there are never any trailing
10 * zero words. Enforcing this requires that an empty set is represented as
11 * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
12 * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
13 * speed up various loops over the Bitmapset's words array by using "do while"
14 * loops instead of "for" loops. This means the code does not waste time
15 * checking the loop condition before the first iteration. For Bitmapsets
16 * containing only a single word (likely the majority of them) this halves the
17 * number of loop condition checks.
19 * Callers must ensure that the set returned by functions in this file which
20 * adjust the members of an existing set is assigned to all pointers pointing
21 * to that existing set. No guarantees are made that we'll ever modify the
22 * existing set in-place and return it.
24 * To help find bugs caused by callers failing to record the return value of
25 * the function which manipulates an existing set, we support building with
26 * REALLOCATE_BITMAPSETS. This results in the set being reallocated each time
27 * the set is altered and the existing being pfreed. This is useful as if any
28 * references still exist to the old set, we're more likely to notice as
29 * any users of the old set will be accessing pfree'd memory. This option is
30 * only intended to be used for debugging.
32 * Copyright (c) 2003-2025, PostgreSQL Global Development Group
35 * src/backend/nodes/bitmapset.c
37 *-------------------------------------------------------------------------
47 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
48 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
50 #define BITMAPSET_SIZE(nwords) \
51 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
54 * This is a well-known cute trick for isolating the rightmost one-bit
55 * in a word. It assumes two's complement arithmetic. Consider any
56 * nonzero value, and focus attention on the rightmost one. The value is
59 * where x's are unspecified bits. The two's complement negative is formed
60 * by inverting all the bits and adding one. Inversion gives
62 * where each y is the inverse of the corresponding x. Incrementing gives
64 * and then ANDing with the original value gives
66 * This works for all cases except original value = zero, where of course
70 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
72 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
74#ifdef USE_ASSERT_CHECKING
76 * bms_is_valid_set - for cassert builds to check for valid sets
81 /* NULL is the correct representation of an empty set */
85 /* check the node tag is set correctly. pfree'd pointer, maybe? */
89 /* trailing zero words are not allowed */
90 if (
a->words[
a->nwords - 1] == 0)
97#ifdef REALLOCATE_BITMAPSETS
100 * Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way
101 * to return a freshly allocated set and pfree the original.
103 * Note: callers which accept multiple sets must be careful when calling this
104 * function to clone one parameter as other parameters may point to the same
105 * set. A good option is to call this just before returning the resulting
119 * bms_copy - make a palloc'd copy of a bitmapset
134 memcpy(result,
a, size);
139 * bms_equal - are two bitmapsets equal? or both NULL?
149 /* Handle cases where either input is NULL */
159 /* can't be equal if the word counts don't match */
160 if (
a->nwords !=
b->nwords)
163 /* check each word matches */
167 if (
a->words[
i] !=
b->words[
i])
169 }
while (++i < a->nwords);
175 * bms_compare - qsort-style comparator for bitmapsets
177 * This guarantees to report values as equal iff bms_equal would say they are
178 * equal. Otherwise, the highest-numbered bit that is set in one value but
179 * not the other determines the result. (This rule means that, for example,
180 * {6} is greater than {5}, which seems plausible.)
190 /* Handle cases where either input is NULL */
192 return (
b == NULL) ? 0 : -1;
196 /* the set with the most words must be greater */
197 if (
a->nwords !=
b->nwords)
198 return (
a->nwords >
b->nwords) ? +1 : -1;
207 return (aw > bw) ? +1 : -1;
213 * bms_make_singleton - build a bitmapset containing a single member
223 elog(
ERROR,
"negative bitmapset member not allowed");
227 result->type = T_Bitmapset;
228 result->
nwords = wordnum + 1;
234 * bms_free - free a bitmapset
236 * Same as pfree except for allowing NULL input
247 * bms_union - create and return a new set containing all members from both
248 * input sets. Both inputs are left unmodified.
261 /* Handle cases where either input is NULL */
266 /* Identify shorter and longer input; copy the longer one */
267 if (
a->nwords <=
b->nwords)
277 /* And union the shorter input into the result */
283 }
while (++
i < otherlen);
288 * bms_intersect - create and return a new set containing members which both
289 * input sets have in common. Both inputs are left unmodified.
303 /* Handle cases where either input is NULL */
304 if (
a == NULL ||
b == NULL)
307 /* Identify shorter and longer input; copy the shorter one */
308 if (
a->nwords <=
b->nwords)
318 /* And intersect the longer input with the result */
319 resultlen = result->
nwords;
326 if (result->
words[
i] != 0)
328 }
while (++
i < resultlen);
329 /* If we computed an empty result, we must return NULL */
330 if (lastnonzero == -1)
336 /* get rid of trailing zero words */
337 result->
nwords = lastnonzero + 1;
342 * bms_difference - create and return a new set containing all the members of
343 * 'a' without the members of 'b'.
354 /* Handle cases where either input is NULL */
361 * In Postgres' usage, an empty result is a very common case, so it's
362 * worth optimizing for that by testing bms_nonempty_difference(). This
363 * saves us a palloc/pfree cycle compared to checking after-the-fact.
368 /* Copy the left input */
371 /* And remove b's bits from result */
372 if (result->
nwords >
b->nwords)
375 * We'll never need to remove trailing zero words when 'a' has more
376 * words than 'b' as the additional words must be non-zero.
381 result->
words[
i] &= ~b->words[
i];
382 }
while (++i < b->nwords);
386 int lastnonzero = -1;
388 /* we may need to remove trailing zero words from the result. */
392 result->
words[
i] &= ~b->words[
i];
394 /* remember the last non-zero word */
395 if (result->
words[
i] != 0)
397 }
while (++i < result->nwords);
399 /* trim off trailing zero words */
400 result->
nwords = lastnonzero + 1;
404 /* Need not check for empty result, since we handled that case above */
409 * bms_is_subset - is A a subset of B?
419 /* Handle cases where either input is NULL */
421 return true;
/* empty set is a subset of anything */
425 /* 'a' can't be a subset of 'b' if it contains more words */
426 if (
a->nwords >
b->nwords)
429 /* Check all 'a' members are set in 'b' */
433 if ((
a->words[
i] & ~
b->words[
i]) != 0)
435 }
while (++i < a->nwords);
440 * bms_subset_compare - compare A and B for equality/subset relationships
442 * This is more efficient than testing bms_is_subset in both directions.
454 /* Handle cases where either input is NULL */
464 /* Check common words */
466 shortlen =
Min(
a->nwords,
b->nwords);
473 if ((aword & ~bword) != 0)
475 /* a is not a subset of b */
480 if ((bword & ~aword) != 0)
482 /* b is not a subset of a */
487 }
while (++
i < shortlen);
488 /* Check extra words */
489 if (
a->nwords >
b->nwords)
491 /* if a has more words then a is not a subset of b */
496 else if (
a->nwords <
b->nwords)
498 /* if b has more words then b is not a subset of a */
507 * bms_is_member - is X a member of A?
517 /* XXX better to just return false for x<0 ? */
519 elog(
ERROR,
"negative bitmapset member not allowed");
525 if (wordnum >=
a->nwords)
527 if ((
a->words[wordnum] & ((
bitmapword) 1 << bitnum)) != 0)
534 * determine 0-based index of member x in the bitmap
536 * Returns (-1) when x is not a member.
549 /* return -1 if not a member of the bitmap */
556 /* count bits in preceding words */
557 for (
i = 0;
i < wordnum;
i++)
561 /* No need to count the bits in a zero word */
567 * Now add bits of the last word, but only those before the item. We can
568 * do that by applying a mask and then using popcount again. To get
569 * 0-based index, we want to count only preceding bits, not the item
570 * itself, so we subtract 1.
579 * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
590 /* Handle cases where either input is NULL */
591 if (
a == NULL ||
b == NULL)
593 /* Check words in common */
594 shortlen =
Min(
a->nwords,
b->nwords);
598 if ((
a->words[
i] &
b->words[
i]) != 0)
600 }
while (++
i < shortlen);
605 * bms_overlap_list - does a set overlap an integer list?
616 if (
a == NULL ||
b ==
NIL)
624 elog(
ERROR,
"negative bitmapset member not allowed");
627 if (wordnum < a->nwords)
628 if ((
a->words[wordnum] & ((
bitmapword) 1 << bitnum)) != 0)
636 * bms_nonempty_difference - do sets have a nonempty difference?
638 * i.e., are any members set in 'a' that are not also set in 'b'.
648 /* Handle cases where either input is NULL */
653 /* if 'a' has more words then it must contain additional members */
654 if (
a->nwords >
b->nwords)
656 /* Check all 'a' members are set in 'b' */
660 if ((
a->words[
i] & ~
b->words[
i]) != 0)
662 }
while (++i < a->nwords);
667 * bms_singleton_member - return the sole integer member of set
669 * Raises error if |a| is not 1.
692 elog(
ERROR,
"bitmapset has multiple members");
696 }
while (++wordnum < nwords);
698 /* we don't expect non-NULL sets to be empty */
704 * bms_get_singleton_member
706 * Test whether the given set is a singleton.
707 * If so, set *member to the value of its sole member, and return true.
708 * If not, return false, without changing *member.
710 * This is more convenient and faster than calling bms_membership() and then
711 * bms_singleton_member(), if we don't care about distinguishing empty sets
712 * from multiple-member sets.
739 }
while (++wordnum < nwords);
741 /* we don't expect non-NULL sets to be empty */
748 * bms_num_members - count members of set
768 /* No need to count the bits in a zero word */
771 }
while (++wordnum < nwords);
776 * bms_membership - does a set have zero, one, or multiple members?
778 * This is faster than making an exact count with bms_num_members().
804 }
while (++wordnum < nwords);
810 * bms_add_member - add a specified member to set
812 * 'a' is recycled when possible.
823 elog(
ERROR,
"negative bitmapset member not allowed");
830 /* enlarge the set if necessary */
831 if (wordnum >=
a->nwords)
833 int oldnwords =
a->nwords;
837 a->nwords = wordnum + 1;
838 /* zero out the enlarged portion */
843 }
while (++i < a->nwords);
848#ifdef REALLOCATE_BITMAPSETS
851 * There's no guarantee that the repalloc returned a new pointer, so copy
852 * and free unconditionally here.
854 a = bms_copy_and_free(
a);
861 * bms_del_member - remove a specified member from set
863 * No error if x is not currently a member of set
865 * 'a' is recycled when possible.
876 elog(
ERROR,
"negative bitmapset member not allowed");
883#ifdef REALLOCATE_BITMAPSETS
884 a = bms_copy_and_free(
a);
887 /* member can't exist. Return 'a' unmodified */
893 /* when last word becomes empty, trim off all trailing empty words */
894 if (
a->words[wordnum] == 0 && wordnum ==
a->nwords - 1)
896 /* find the last non-empty word and make that the new final word */
897 for (
int i = wordnum - 1;
i >= 0;
i--)
899 if (
a->words[
i] != 0)
906 /* the set is now empty */
914 * bms_add_members - like bms_union, but left input is recycled when possible
927 /* Handle cases where either input is NULL */
932#ifdef REALLOCATE_BITMAPSETS
933 a = bms_copy_and_free(
a);
938 /* Identify shorter and longer input; copy the longer one if needed */
939 if (
a->nwords <
b->nwords)
949 /* And union the shorter input into the result */
955 }
while (++
i < otherlen);
958#ifdef REALLOCATE_BITMAPSETS
960 result = bms_copy_and_free(result);
967 * bms_replace_members
968 * Remove all existing members from 'a' and repopulate the set with members
969 * from 'b', recycling 'a', when possible.
987 if (
a->nwords <
b->nwords)
993 a->words[
i] =
b->words[
i];
994 }
while (++i < b->nwords);
996 a->nwords =
b->nwords;
998#ifdef REALLOCATE_BITMAPSETS
1001 * There's no guarantee that the repalloc returned a new pointer, so copy
1002 * and free unconditionally here.
1004 a = bms_copy_and_free(
a);
1012 * Add members in the range of 'lower' to 'upper' to the set.
1014 * Note this could also be done by calling bms_add_member in a loop, however,
1015 * using this function will be faster when the range is large as we work at
1016 * the bitmapword level rather than at bit level.
1029 /* do nothing if nothing is called for, without further checking */
1032#ifdef REALLOCATE_BITMAPSETS
1033 a = bms_copy_and_free(
a);
1040 elog(
ERROR,
"negative bitmapset member not allowed");
1046 a->type = T_Bitmapset;
1047 a->nwords = uwordnum + 1;
1049 else if (uwordnum >=
a->nwords)
1051 int oldnwords =
a->nwords;
1054 /* ensure we have enough words to store the upper bit */
1056 a->nwords = uwordnum + 1;
1057 /* zero out the enlarged portion */
1062 }
while (++i < a->nwords);
1071 * Special case when lwordnum is the same as uwordnum we must perform the
1072 * upper and lower masking on the word.
1074 if (lwordnum == uwordnum)
1081 /* turn on lbitnum and all bits left of it */
1084 /* turn on all bits for any intermediate words */
1085 while (wordnum < uwordnum)
1088 /* turn on upper's bit and all bits right of it. */
1089 a->words[uwordnum] |= (~(
bitmapword) 0) >> ushiftbits;
1092#ifdef REALLOCATE_BITMAPSETS
1095 * There's no guarantee that the repalloc returned a new pointer, so copy
1096 * and free unconditionally here.
1098 a = bms_copy_and_free(
a);
1105 * bms_int_members - like bms_intersect, but left input is recycled when
1118 /* Handle cases where either input is NULL */
1127 /* Intersect b into a; we need never copy */
1128 shortlen =
Min(
a->nwords,
b->nwords);
1133 a->words[
i] &=
b->words[
i];
1135 if (
a->words[
i] != 0)
1137 }
while (++
i < shortlen);
1139 /* If we computed an empty result, we must return NULL */
1140 if (lastnonzero == -1)
1146 /* get rid of trailing zero words */
1147 a->nwords = lastnonzero + 1;
1149#ifdef REALLOCATE_BITMAPSETS
1150 a = bms_copy_and_free(
a);
1157 * bms_del_members - delete members in 'a' that are set in 'b'. 'a' is
1158 * recycled when possible.
1168 /* Handle cases where either input is NULL */
1173#ifdef REALLOCATE_BITMAPSETS
1174 a = bms_copy_and_free(
a);
1180 /* Remove b's bits from a; we need never copy */
1181 if (
a->nwords >
b->nwords)
1184 * We'll never need to remove trailing zero words when 'a' has more
1190 a->words[
i] &= ~b->words[
i];
1191 }
while (++i < b->nwords);
1195 int lastnonzero = -1;
1197 /* we may need to remove trailing zero words from the result. */
1201 a->words[
i] &= ~b->words[
i];
1203 /* remember the last non-zero word */
1204 if (
a->words[
i] != 0)
1206 }
while (++i < a->nwords);
1208 /* check if 'a' has become empty */
1209 if (lastnonzero == -1)
1215 /* trim off any trailing zero words */
1216 a->nwords = lastnonzero + 1;
1219#ifdef REALLOCATE_BITMAPSETS
1220 a = bms_copy_and_free(
a);
1227 * bms_join - like bms_union, but *either* input *may* be recycled
1240 /* Handle cases where either input is NULL */
1243#ifdef REALLOCATE_BITMAPSETS
1244 b = bms_copy_and_free(
b);
1251#ifdef REALLOCATE_BITMAPSETS
1252 a = bms_copy_and_free(
a);
1258 /* Identify shorter and longer input; use longer one as result */
1259 if (
a->nwords <
b->nwords)
1269 /* And union the shorter input into the result */
1270 otherlen = other->
nwords;
1275 }
while (++
i < otherlen);
1276 if (other != result)
/* pure paranoia */
1279#ifdef REALLOCATE_BITMAPSETS
1280 result = bms_copy_and_free(result);
1287 * bms_next_member - find next member of a set
1289 * Returns smallest member greater than "prevbit", or -2 if there is none.
1290 * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1292 * This is intended as support for iterating through the members of a set.
1293 * The typical pattern is
1296 * while ((x = bms_next_member(inputset, x)) >= 0)
1299 * Notice that when there are no more members, we return -2, not -1 as you
1300 * might expect. The rationale for that is to allow distinguishing the
1301 * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1302 * It makes no difference in simple loop usage, but complex iteration logic
1303 * might need such an ability.
1319 for (wordnum =
WORDNUM(prevbit); wordnum < nwords; wordnum++)
1323 /* ignore bits before prevbit */
1335 /* in subsequent words, consider all bits */
1342 * bms_prev_member - find prev member of a set
1344 * Returns largest member less than "prevbit", or -2 if there is none.
1345 * "prevbit" must NOT be more than one above the highest possible bit that can
1346 * be set in the Bitmapset at its current size.
1348 * To ease finding the highest set bit for the initial loop, the special
1349 * prevbit value of -1 can be passed to have the function find the highest
1350 * valued member in the set.
1352 * This is intended as support for iterating through the members of a set in
1353 * reverse. The typical pattern is
1356 * while ((x = bms_prev_member(inputset, x)) >= 0)
1359 * Notice that when there are no more members, we return -2, not -1 as you
1360 * might expect. The rationale for that is to allow distinguishing the
1361 * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1362 * It makes no difference in simple loop usage, but complex iteration logic
1363 * might need such an ability.
1376 * If set is NULL or if there are no more bits to the right then we've
1379 if (
a == NULL || prevbit == 0)
1382 /* Validate callers didn't give us something out of range */
1386 /* transform -1 to the highest possible bit we could have set */
1394 for (wordnum =
WORDNUM(prevbit); wordnum >= 0; wordnum--)
1398 /* mask out bits left of prevbit */
1410 /* in subsequent words, consider all bits */
1417 * bms_hash_value - compute a hash key for a Bitmapset
1425 return 0;
/* All empty sets hash to 0 */
1431 * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1433 * Note: don't forget to specify bitmap_match as the match function!
1443 * bitmap_match - match function to use with bitmap_hash
#define BITMAPSET_SIZE(nwords)
Bitmapset * bms_replace_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
int bms_prev_member(const Bitmapset *a, int prevbit)
Bitmapset * bms_make_singleton(int x)
Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_intersect(const Bitmapset *a, const Bitmapset *b)
uint32 bitmap_hash(const void *key, Size keysize)
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
int bms_next_member(const Bitmapset *a, int prevbit)
uint32 bms_hash_value(const Bitmapset *a)
Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Bitmapset * bms_del_member(Bitmapset *a, int x)
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
int bms_singleton_member(const Bitmapset *a)
void bms_free(Bitmapset *a)
int bms_num_members(const Bitmapset *a)
bool bms_is_member(int x, const Bitmapset *a)
Bitmapset * bms_add_member(Bitmapset *a, int x)
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
#define HAS_MULTIPLE_ONES(x)
int bitmap_match(const void *key1, const void *key2, Size keysize)
BMS_Membership bms_membership(const Bitmapset *a)
int bms_member_index(Bitmapset *a, int x)
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
int bms_compare(const Bitmapset *a, const Bitmapset *b)
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Bitmapset * bms_join(Bitmapset *a, Bitmapset *b)
bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
Bitmapset * bms_copy(const Bitmapset *a)
bool bms_overlap_list(const Bitmapset *a, const List *b)
#define bmw_rightmost_one_pos(w)
#define bmw_leftmost_one_pos(w)
#define BITS_PER_BITMAPWORD
static Datum hash_any(const unsigned char *k, int keylen)
Assert(PointerIsAligned(start, uint64))
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
#define IsA(nodeptr, _type_)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
static uint32 DatumGetUInt32(Datum X)
bitmapword words[FLEXIBLE_ARRAY_MEMBER]