I am looking for opinion on this bitset implementation. It only has 4 functions and they are
set - set a bit/clear - clear a bit/check - test a bit /zero - zero out all bits.
There is another one as well but that isn't meant to be used by the users of bitset.
I have used macro to make the underlying type that bitset uses generic as well as the size. I am very new to macros for generic programming. I have been using them here and there in C++ but it had templates and constexpr so there was never really a need. So I want the emphasis of this review to be on use of macros with a little bit on the actual implementation on the side.
First is some test code that shows how bitset can be used.
#include "cim_generic_bitset.h"
#include <stdio.h>
enum { n_bits = 150 };
//CIM_GENERIC_BITSET_DECL(state, uint8_t, CIM_BITSET_OF_SIZE(uint8_t, n_bits));
CIM_GENERIC_BITSET_DEF(state, uint8_t, CIM_BITSET_OF_SIZE(uint8_t, n_bits));
int main()
{
uint8_t state[CIM_BITSET_OF_SIZE(uint8_t, n_bits)];
cim_state_bitset_zero(state);
cim_state_bitset_set(state, 0);
cim_state_bitset_set(state,22);
cim_state_bitset_set(state, 49);
cim_state_bitset_set(state, 93);
cim_state_bitset_set(state, 141);
cim_state_bitset_set(state, 149);
assert(cim_state_bitset_check(state, 0));
assert(cim_state_bitset_check(state, 22));
assert(cim_state_bitset_check(state, 49));
assert(cim_state_bitset_check(state, 93));
assert(cim_state_bitset_check(state, 141));
assert(cim_state_bitset_check(state, 149));
for (int i = 0; i < n_bits; i++)
{
if (i != 0 && i != 22 && i != 49 && i != 93 && i != 141 && i != 149)
assert(!cim_state_bitset_check(state, i));
}
for (int i = 0; i < n_bits; i++)
cim_state_bitset_clear(state, i);
assert(!cim_state_bitset_check(state, 0));
assert(!cim_state_bitset_check(state, 22));
assert(!cim_state_bitset_check(state, 49));
assert(!cim_state_bitset_check(state, 93));
assert(!cim_state_bitset_check(state, 141));
assert(!cim_state_bitset_check(state, 149));
return 0;
}
This is the bitset implementation.
#pragma once
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
typedef struct cim_byte_bit_index
{
size_t byte;
size_t bit;
} cim_byte_bit_index;
//1 extra sizeof(type) will be allocated if n_bits / sizeof(type) happens to be a whole number
//but idk how to implement a constexpr ceil soo
#define CIM_BITSET_OF_SIZE(type, n_bits) (size_t)( (n_bits / (sizeof(type) * 8)) + 1)
#define CIM_GENERIC_BITSET_ZERO_DECL(name, type, size)\
void cim_##name##_bitset_zero(type* bitfield);
#define CIM_GENERIC_BITSET_ZERO_DEF(name, type, size)\
void cim_##name##_bitset_zero(type* bitfield)\
{\
memset(bitfield, 0, size * sizeof(type));\
}
#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DECL(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index);
#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index)\
{\
size_t word_size = sizeof(type) * 8;\
size_t which_byte = (size_t)(index / (float)word_size);\
size_t which_bit = (size_t)(index - (which_byte * word_size));\
cim_byte_bit_index byte_bit_index;\
byte_bit_index.byte = which_byte;\
byte_bit_index.bit = which_bit;\
return byte_bit_index;\
}
#define CIM_GENERIC_BITSET_SET_BIT_DECL(name, type, size)\
void cim_##name##_bitset_set(type* bitfield, size_t index);
#define CIM_GENERIC_BITSET_SET_BIT_DEF(name, type, size)\
void cim_##name##_bitset_set(type* bitfield, size_t index)\
{\
assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
bitfield[byte_bit_index.byte] |= 1 << byte_bit_index.bit;\
}
#define CIM_GENERIC_BITSET_CLEAR_BIT_DECL(name, type, size)\
void cim_##name##_bitset_clear(type* bitfield, size_t index);
#define CIM_GENERIC_BITSET_CLEAR_BIT_DEF(name, type, size)\
void cim_##name##_bitset_clear(type* bitfield, size_t index)\
{\
assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
bitfield[byte_bit_index.byte] &= ~(1 << byte_bit_index.bit);\
}
#define CIM_GENERIC_BITSET_CHECK_BIT_DECL(name, type, size)\
bool cim_##name##_bitset_check(type* bitfield, size_t index);
#define CIM_GENERIC_BITSET_CHECK_BIT_DEF(name, type, size)\
bool cim_##name##_bitset_check(type* bitfield, size_t index)\
{\
assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
return bitfield[byte_bit_index.byte] & (1 << byte_bit_index.bit);\
}
#define CIM_GENERIC_BITSET_DECL(name, type, size)\
CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DECL(name, type, size)\
CIM_GENERIC_BITSET_SET_BIT_DECL(name, type, size)\
CIM_GENERIC_BITSET_CHECK_BIT_DECL(name, type, size)\
CIM_GENERIC_BITSET_ZERO_DECL(name, type, size)\
CIM_GENERIC_BITSET_CLEAR_BIT_DECL(name, type, size)
#define CIM_GENERIC_BITSET_DEF(name, type, size)\
CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
CIM_GENERIC_BITSET_SET_BIT_DEF(name, type, size)\
CIM_GENERIC_BITSET_CHECK_BIT_DEF(name, type, size)\
CIM_GENERIC_BITSET_ZERO_DEF(name, type, size)\
CIM_GENERIC_BITSET_CLEAR_BIT_DEF(name, type, size)
2 Answers 2
size_t word_size = sizeof(type) * 8;\
That magic number 8
looks like it ought to be CHAR_BIT
. The same number crops up in other places where it looks like CHAR_BIT
is intended.
//1 extra sizeof(type) will be allocated if n_bits / sizeof(type) happens to be a whole number //but idk how to implement a constexpr ceil soo #define CIM_BITSET_OF_SIZE(type, n_bits) (size_t)( (n_bits / (sizeof(type) * 8)) + 1)
Oops - we need to ensure precedence rules in the expansion, by parenthesising (n_bits)
.
We should probably avoid the over-allocation by adjusting before we divide:
#define CIM_BITSET_OF_SIZE(type, n_bits) \
((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1)
#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\ cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index)\ {\ size_t word_size = sizeof(type) * 8;\
That should probably be a static const
Instead of having separate ⋯_DECL()
and ⋯_DEF()
macros, perhaps just define functions with file scope (i.e. static
)? That will increase opportunities for inlining, too.
-
\$\begingroup\$ Thank you for the review. My intention with
(size_t)( (n_bits / (sizeof(type) * 8)) + 1)
was for it be be kind of similar toceil((float)n_bits / sizeof(type * 8))
.((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1)
doesn't look quite right. did you mistype something? \$\endgroup\$sOmEonE– sOmEonE2023年01月06日 13:12:24 +00:00Commented Jan 6, 2023 at 13:12 -
1\$\begingroup\$ I only inspected that, and haven't unit-tested. The standard technique for ceiling integer division adds one less than the denominator to the numerator before dividing (i.e.
(p + (q - 1)) / q
. I refactored that to((p - 1) + q) / q
≡(p - 1) / q + 1
. That's all assumingp ≥ 1
, of course. \$\endgroup\$Toby Speight– Toby Speight2023年01月06日 13:21:36 +00:00Commented Jan 6, 2023 at 13:21 -
\$\begingroup\$ Actually yes that was correct. I just changed the second division in your version to
*
i.e.((size_t)(((n_bits) - 1)) / (sizeof (type) * 8) + 1)
and it now avoids the extra allocation. \$\endgroup\$sOmEonE– sOmEonE2023年01月06日 13:27:48 +00:00Commented Jan 6, 2023 at 13:27 -
\$\begingroup\$ Yes,
a / (b * c)
is equivalent toa / b / c
- I was just trying to keep the parens to a minimum. \$\endgroup\$Toby Speight– Toby Speight2023年01月06日 13:33:14 +00:00Commented Jan 6, 2023 at 13:33 -
\$\begingroup\$ Nothing about
assert
being optimized away when the code is not compiled in debug mode? There is at least 1 assert in the macros. \$\endgroup\$2023年01月06日 17:21:40 +00:00Commented Jan 6, 2023 at 17:21
No need for float
math.
float
is simply not needed. Conversions to/from float add unnecessary overhead, implementation defined rounding and loses precision when word_size
is very large.
//size_t which_byte = (size_t)(index / (float)word_size);\
//size_t which_bit = (size_t)(index - (which_byte * word_size));\
size_t which_byte = index / word_size;\
size_t which_bit = index % word_size;\
Good compilers see nearby a/b
and a%b
and emit efficient code, often calculating both in one step.
-
\$\begingroup\$ Thanks. So I removed all casts like you mentioned. But for calculating
which_bit
wouldn'tbyte_bit_index.bit = index - byte_bit_index.byte * word_size
be faster than mod? Is there some edge case that it misses or something? All implementations of bitset that I have seen seem to use %. Or would performance just be the same \$\endgroup\$sOmEonE– sOmEonE2023年01月09日 20:29:12 +00:00Commented Jan 9, 2023 at 20:29 -
\$\begingroup\$ @sOmEonE Review answer's "Good compilers see nearby a/b and a%b and emit efficient code, often calculating both in one step.". After
a/b
, cost ofa%b
is often nil. With such linear coding "improvements" concerns, code for clarity and let the compiler do its job and you focus on the big issues. Review Is premature optimization really the root of all evil. \$\endgroup\$chux– chux2023年01月09日 20:39:25 +00:00Commented Jan 9, 2023 at 20:39 -
1\$\begingroup\$ Ok got it. Thank you \$\endgroup\$sOmEonE– sOmEonE2023年01月09日 20:45:45 +00:00Commented Jan 9, 2023 at 20:45
-
\$\begingroup\$ @sOmEonE IMO, understanding evil and good of premature optimization is a most valuable lesson. It will save you "Acres of time" (my phrase). \$\endgroup\$chux– chux2023年01月09日 20:49:10 +00:00Commented Jan 9, 2023 at 20:49
size_t which_byte = (size_t)(index / (float)word_size);
is trying to do. Using floating point with an integer problem is not a good thing. \$\endgroup\$