I have two functions to count the number of set '1' bits in the first offset
bits of either a 64 bit number, or a bitmap struct.
After some profiling, particularly the latter is quite a bottleneck in our system.
What can we do to make it faster?
#define POPCOUNT16(x) __builtin_popcount(x)
#define POPCOUNT64(x) __builtin_popcountl(x)
static __always_inline __constant int32 count64(int64 const bitmap, int32 const offset) {
return offset == 0 ? 0 : POPCOUNT64(bitmap & (0xffffffffffffffffUL << (64 - offset)));
}
typedef struct {
int64 hi;
int16 lo;
} __packed int80;
static __always_inline __constant int32 countBitmap(int80 const bitmap, int32 const offset) {
int32 count = 0;
if (offset > 0) {
count += POPCOUNT64(bitmap.hi & (0xffffffffffffffffUL << (sizeof (bitmap.hi)*8 - MIN(offset, sizeof (bitmap.hi)*8))));
if (offset > sizeof (bitmap.hi)*8)
count += POPCOUNT16(bitmap.lo & ((int16)0xffff << (sizeof (bitmap.hi)*8+sizeof (bitmap.lo)*8 - offset)));
}
return count;
}
1 Answer 1
first issue: you hardcode 8
as the number of bits per sizeof
unit: portability says it should be CHAR_BIT
which may be something else on some architectures
second issue: both members of int80 are signed, only hi
should be while lo
can be unsigned
if you can guarantee that offset is between 0 and 80 then you don't need to test for 0, and just document that exceeding the bounds is undefined behavior
static __always_inline __constant int32 countBitmap(int80 const bitmap, uint32 const offset) {
int32 count = 0;
if (offset <= sizeof (bitmap.hi)*CHAR_BIT) {
count = POPCOUNT64(bitmap.hi >>> (sizeof (bitmap.hi)*CHAR_BIT - offset));
// logical right shift to do away with the AND
}else{
count = POPCOUNT64(bitmap.hi)+
POPCOUNT16(bitmap.lo >>> (sizeof (bitmap)*CHAR_BIT - offset));
}
return count;
}
here there is only 1 branch compared to the 2 you had before (possibly 3 depending on how MIN
was implemented)
count64()
takes anint64 bitmap
— how can there be abitmap.hi
? \$\endgroup\$inline __attribute__((always_inline))
, I figured it would make no difference which, so I put it in the function for simplicity. \$\endgroup\$