Something like std::bitset in C

Question 1

I am looking for opinion on this bitset implementation. It only has 4 functions and they are

set - set a bit/clear - clear a bit/check - test a bit /zero - zero out all bits.

There is another one as well but that isn't meant to be used by the users of bitset.

I have used macro to make the underlying type that bitset uses generic as well as the size. I am very new to macros for generic programming. I have been using them here and there in C++ but it had templates and constexpr so there was never really a need. So I want the emphasis of this review to be on use of macros with a little bit on the actual implementation on the side.

First is some test code that shows how bitset can be used.

#include "cim_generic_bitset.h"
#include <stdio.h>
 
enum { n_bits = 150 };
//CIM_GENERIC_BITSET_DECL(state, uint8_t, CIM_BITSET_OF_SIZE(uint8_t, n_bits));
CIM_GENERIC_BITSET_DEF(state, uint8_t, CIM_BITSET_OF_SIZE(uint8_t, n_bits));
 
int main()
{
 uint8_t state[CIM_BITSET_OF_SIZE(uint8_t, n_bits)];
 
 cim_state_bitset_zero(state);
 
 cim_state_bitset_set(state, 0);
 cim_state_bitset_set(state,22);
 cim_state_bitset_set(state, 49);
 cim_state_bitset_set(state, 93);
 cim_state_bitset_set(state, 141);
 cim_state_bitset_set(state, 149);
 
 assert(cim_state_bitset_check(state, 0));
 assert(cim_state_bitset_check(state, 22));
 assert(cim_state_bitset_check(state, 49));
 assert(cim_state_bitset_check(state, 93));
 assert(cim_state_bitset_check(state, 141));
 assert(cim_state_bitset_check(state, 149));
 
 for (int i = 0; i < n_bits; i++)
 {
 if (i != 0 && i != 22 && i != 49 && i != 93 && i != 141 && i != 149)
 assert(!cim_state_bitset_check(state, i));
 }
 
 for (int i = 0; i < n_bits; i++)
 cim_state_bitset_clear(state, i);
 
 assert(!cim_state_bitset_check(state, 0));
 assert(!cim_state_bitset_check(state, 22));
 assert(!cim_state_bitset_check(state, 49));
 assert(!cim_state_bitset_check(state, 93));
 assert(!cim_state_bitset_check(state, 141));
 assert(!cim_state_bitset_check(state, 149));
 
 return 0;
}

This is the bitset implementation.

#pragma once
 
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
 
typedef struct cim_byte_bit_index
{
 size_t byte;
 size_t bit;
} cim_byte_bit_index;
 
//1 extra sizeof(type) will be allocated if n_bits / sizeof(type) happens to be a whole number 
//but idk how to implement a constexpr ceil soo
#define CIM_BITSET_OF_SIZE(type, n_bits) (size_t)( (n_bits / (sizeof(type) * 8)) + 1)
 
#define CIM_GENERIC_BITSET_ZERO_DECL(name, type, size)\
void cim_##name##_bitset_zero(type* bitfield);
 
#define CIM_GENERIC_BITSET_ZERO_DEF(name, type, size)\
void cim_##name##_bitset_zero(type* bitfield)\
{\
 memset(bitfield, 0, size * sizeof(type));\
}
 
#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DECL(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index);
 
#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index)\
{\
 size_t word_size = sizeof(type) * 8;\
 size_t which_byte = (size_t)(index / (float)word_size);\
 size_t which_bit = (size_t)(index - (which_byte * word_size));\
 cim_byte_bit_index byte_bit_index;\
 byte_bit_index.byte = which_byte;\
 byte_bit_index.bit = which_bit;\
 return byte_bit_index;\
}
 
#define CIM_GENERIC_BITSET_SET_BIT_DECL(name, type, size)\
void cim_##name##_bitset_set(type* bitfield, size_t index);
 
#define CIM_GENERIC_BITSET_SET_BIT_DEF(name, type, size)\
void cim_##name##_bitset_set(type* bitfield, size_t index)\
{\
 assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
 cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
 bitfield[byte_bit_index.byte] |= 1 << byte_bit_index.bit;\
}
 
 
#define CIM_GENERIC_BITSET_CLEAR_BIT_DECL(name, type, size)\
void cim_##name##_bitset_clear(type* bitfield, size_t index);
 
#define CIM_GENERIC_BITSET_CLEAR_BIT_DEF(name, type, size)\
void cim_##name##_bitset_clear(type* bitfield, size_t index)\
{\
 assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
 cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
 bitfield[byte_bit_index.byte] &= ~(1 << byte_bit_index.bit);\
}
 
 
#define CIM_GENERIC_BITSET_CHECK_BIT_DECL(name, type, size)\
bool cim_##name##_bitset_check(type* bitfield, size_t index);
 
#define CIM_GENERIC_BITSET_CHECK_BIT_DEF(name, type, size)\
bool cim_##name##_bitset_check(type* bitfield, size_t index)\
{\
 assert(index < size * sizeof(type) * 8 && "Bitset index out of range");\
 cim_byte_bit_index byte_bit_index = cim_get_##name##_byte_bit_index(size, index);\
 return bitfield[byte_bit_index.byte] & (1 << byte_bit_index.bit);\
}
 
#define CIM_GENERIC_BITSET_DECL(name, type, size)\
CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DECL(name, type, size)\
CIM_GENERIC_BITSET_SET_BIT_DECL(name, type, size)\
CIM_GENERIC_BITSET_CHECK_BIT_DECL(name, type, size)\
CIM_GENERIC_BITSET_ZERO_DECL(name, type, size)\
CIM_GENERIC_BITSET_CLEAR_BIT_DECL(name, type, size)
 
#define CIM_GENERIC_BITSET_DEF(name, type, size)\
CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
CIM_GENERIC_BITSET_SET_BIT_DEF(name, type, size)\
CIM_GENERIC_BITSET_CHECK_BIT_DEF(name, type, size)\
CIM_GENERIC_BITSET_ZERO_DEF(name, type, size)\
CIM_GENERIC_BITSET_CLEAR_BIT_DEF(name, type, size)

Question 2

Please explain what size_t which_byte = (size_t)(index / (float)word_size); is trying to do. Using floating point with an integer problem is not a good thing.

Question 3

@chux-ReinstateMonica It is supposed to calculate which byte in the array is the given index in. Index is the bit in the whole array. How about if I convert index to float as well? Is float / float bad as well? Why is it bad? What would be the correct way to divide them?

Question 4

sOmEon, Answer added.

Question 5

 size_t word_size = sizeof(type) * 8;\

That magic number 8 looks like it ought to be CHAR_BIT. The same number crops up in other places where it looks like CHAR_BIT is intended.

//1 extra sizeof(type) will be allocated if n_bits / sizeof(type) happens to be a whole number 
//but idk how to implement a constexpr ceil soo
#define CIM_BITSET_OF_SIZE(type, n_bits) (size_t)( (n_bits / (sizeof(type) * 8)) + 1)

Oops - we need to ensure precedence rules in the expansion, by parenthesising (n_bits).

We should probably avoid the over-allocation by adjusting before we divide:

#define CIM_BITSET_OF_SIZE(type, n_bits) \
 ((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1)

#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index)\
{\
 size_t word_size = sizeof(type) * 8;\

That should probably be a static const

Instead of having separate ⋯_DECL() and ⋯_DEF() macros, perhaps just define functions with file scope (i.e. static)? That will increase opportunities for inlining, too.

Question 6

Thank you for the review. My intention with (size_t)( (n_bits / (sizeof(type) * 8)) + 1) was for it be be kind of similar to ceil((float)n_bits / sizeof(type * 8)) . ((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1) doesn't look quite right. did you mistype something?

Question 7

I only inspected that, and haven't unit-tested. The standard technique for ceiling integer division adds one less than the denominator to the numerator before dividing (i.e. (p + (q - 1)) / q. I refactored that to ((p - 1) + q) / q ≡ (p - 1) / q + 1. That's all assuming p ≥ 1, of course.

Question 8

Actually yes that was correct. I just changed the second division in your version to * i.e. ((size_t)(((n_bits) - 1)) / (sizeof (type) * 8) + 1) and it now avoids the extra allocation.

Question 9

Yes, a / (b * c) is equivalent to a / b / c - I was just trying to keep the parens to a minimum.

Question 10

Nothing about assert being optimized away when the code is not compiled in debug mode? There is at least 1 assert in the macros.

Question 11

No need for float math.

float is simply not needed. Conversions to/from float add unnecessary overhead, implementation defined rounding and loses precision when word_size is very large.

//size_t which_byte = (size_t)(index / (float)word_size);\
//size_t which_bit = (size_t)(index - (which_byte * word_size));\
size_t which_byte = index / word_size;\
size_t which_bit = index % word_size;\

Good compilers see nearby a/b and a%b and emit efficient code, often calculating both in one step.

Question 12

Thanks. So I removed all casts like you mentioned. But for calculating which_bit wouldn't byte_bit_index.bit = index - byte_bit_index.byte * word_size be faster than mod? Is there some edge case that it misses or something? All implementations of bitset that I have seen seem to use %. Or would performance just be the same

Question 13

@sOmEonE Review answer's "Good compilers see nearby a/b and a%b and emit efficient code, often calculating both in one step.". After a/b, cost of a%b is often nil. With such linear coding "improvements" concerns, code for clarity and let the compiler do its job and you focus on the big issues. Review Is premature optimization really the root of all evil.

Question 14

Ok got it. Thank you

Question 15

@sOmEonE IMO, understanding evil and good of premature optimization is a most valuable lesson. It will save you "Acres of time" (my phrase).

Toby Speight Toby Speight 87.8k14 gold badges104 silver badges325 bronze badges · Accepted Answer · 2023-01-06 12:30:34Z

 size_t word_size = sizeof(type) * 8;\

That magic number 8 looks like it ought to be CHAR_BIT. The same number crops up in other places where it looks like CHAR_BIT is intended.

//1 extra sizeof(type) will be allocated if n_bits / sizeof(type) happens to be a whole number 
//but idk how to implement a constexpr ceil soo
#define CIM_BITSET_OF_SIZE(type, n_bits) (size_t)( (n_bits / (sizeof(type) * 8)) + 1)

Oops - we need to ensure precedence rules in the expansion, by parenthesising (n_bits).

We should probably avoid the over-allocation by adjusting before we divide:

#define CIM_BITSET_OF_SIZE(type, n_bits) \
 ((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1)

#define CIM_GENERIC_BITSET_GET_BIT_BYTE_INDEX_DEF(name, type, size)\
cim_byte_bit_index cim_get_##name##_byte_bit_index(size_t field_size, size_t index)\
{\
 size_t word_size = sizeof(type) * 8;\

That should probably be a static const

Instead of having separate ⋯_DECL() and ⋯_DEF() macros, perhaps just define functions with file scope (i.e. static)? That will increase opportunities for inlining, too.

Thank you for the review. My intention with (size_t)( (n_bits / (sizeof(type) * 8)) + 1) was for it be be kind of similar to ceil((float)n_bits / sizeof(type * 8)) . ((size_t)((n_bits) - 1) / sizeof (type) / CHAR_BIT) + 1) doesn't look quite right. did you mistype something?
I only inspected that, and haven't unit-tested. The standard technique for ceiling integer division adds one less than the denominator to the numerator before dividing (i.e. (p + (q - 1)) / q. I refactored that to ((p - 1) + q) / q ≡ (p - 1) / q + 1. That's all assuming p ≥ 1, of course.
Actually yes that was correct. I just changed the second division in your version to * i.e. ((size_t)(((n_bits) - 1)) / (sizeof (type) * 8) + 1) and it now avoids the extra allocation.
Yes, a / (b * c) is equivalent to a / b / c - I was just trying to keep the parens to a minimum.
Nothing about assert being optimized away when the code is not compiled in debug mode? There is at least 1 assert in the macros.

Stack Exchange Network

Something like std::bitset in C

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Something like std::bitset in C

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions