Move an unaligned bit-addressed range into another bitmap

Question 1

Context

I have an array of maximum-fixed-size, size, containers of items; these items are either pointers to objects or an index of the same array, (but no loops.) When the current container is full, to add an object, a new container will be created and a contiguous block of some of the items will be moved to the container; the new container will have it's index recorded where the void was left in the old container.

Implementation

Since space is an issue, I have made a bitmap of size to tell whether an individual item is a pointer to object (0) or an index (1) to store with each element of the array of containers. This is the move operation, applied to the bitmap.

TDD

I created tests and stopped when it didn't work. I added a clause and then tried again. Eventually I got all of the size-bit combinations working. What I'm really interested in is reducing the size of move, especially the spot where I move bits back in a; I feel that it has too many special cases.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
/** Moves and overwrites `bmp_b` size `bmp_b_size` with `bit_offset` to
 `bit_range` from `bmp_a` size `bmp_a_size`. `bmp_a` has the moved part
 replaced with a single bit, '1'. `bit_range` cannot be zero. */
static void move(unsigned char *const bmp_a, const unsigned bmp_a_size,
 const unsigned bit_offset, const unsigned bit_range,
 unsigned char *const bmp_b, const unsigned bmp_b_size) {
 assert(bmp_a && bmp_b);
 assert(bit_range && bit_offset + bit_range <= bmp_a_size << 3);
 { /* Copy a contiguous subset of bits from `a` into the new array, `b`. */
 const unsigned a = bit_offset >> 3, a_bit = bit_offset & 7;
 unsigned b, rest;
 for(b = 0, rest = bit_range; rest > 8; b++, rest -= 8) bmp_b[b]
 = (bmp_a[a + b] << a_bit) | (bmp_a[a + b + 1] >> (8 - a_bit));
 bmp_b[b] = bmp_a[a + b] << a_bit;
 if(a + b < (bit_offset + bit_range) >> 3)
 bmp_b[b] |= (bmp_a[a + b + 1] >> (8 - a_bit));
 bmp_b[b++] &= ~(255 >> rest);
 memset(bmp_b + b, 0, bmp_b_size - b);
 }
 { /* Replace copied bits from `a` with '1'. */
 const unsigned a = bit_offset >> 3, a_bit = bit_offset & 7;
 bmp_a[a] |= 128 >> a_bit;
 }
 { /* Move bits back in `a`.
 <https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge> */
 unsigned a0 = (bit_offset + 1) >> 3, a1 = (bit_offset + bit_range) >> 3;
 const unsigned a0_bit = (bit_offset + 1) & 7,
 a1_bit = (bit_offset + bit_range) & 7;
 assert(a0 <= bmp_a_size && a1 <= bmp_a_size);
 if(a1 == bmp_a_size) { /* On the trailing edge. */
 assert(!a1_bit);
 if(a0 == bmp_a_size) assert(!a0_bit); /* Extreme right. */
 else bmp_a[a0++] &= 255 << 8-a0_bit;
 } else if(a1_bit < a0_bit) { /* Inversion of shift. */
 const unsigned shift = a0_bit - a1_bit;
 assert(a0 < a1);
 {
 const unsigned char bmp_a_a0 = bmp_a[a0],
 bmp_a_a1 = bmp_a[a1] >> shift,
 mask = 255 >> a0_bit;
 bmp_a[a0] = bmp_a_a0 ^ ((bmp_a_a0 ^ bmp_a_a1) & mask);
 }
 while(++a0, ++a1 < bmp_a_size)
 bmp_a[a0] = bmp_a[a1 - 1] << 8-shift | bmp_a[a1] >> shift;
 bmp_a[a0++] = bmp_a[a1 - 1] << 8-shift;
 } else { /* Shift right or zero. */
 const unsigned shift = a1_bit - a0_bit;
 assert(a0 <= a1);
 {
 const unsigned char bmp_a_a0 = bmp_a[a0],
 bmp_a_a1 = bmp_a[a1] << shift,
 mask = 255 >> a0_bit;
 bmp_a[a0] = bmp_a_a0 ^ ((bmp_a_a0 ^ bmp_a_a1) & mask);
 }
 while(++a0, ++a1 < bmp_a_size)
 bmp_a[a0 - 1] |= bmp_a[a1] >> 8-shift,
 bmp_a[a0] = bmp_a[a1] << shift;
 }
 memset(bmp_a + a0, 0, bmp_a_size - a0);
 }
}
/** Move `byte_offset` to `byte_range` from string `bmp_a` size `bmp_a_size` to
 `bmp_b` size `bmp_b_size`, replacing it with '1'. Ensure that the strings have
 enough space. This is for testing against the bit vector version. */
static void move_string(char *const bmp_a, const unsigned bmp_a_size,
 const unsigned byte_offset, const unsigned byte_range,
 char *const bmp_b, const unsigned bmp_b_size) {
 assert(bmp_a && bmp_b);
 assert(byte_offset + byte_range <= bmp_a_size << 3);
 assert(byte_range && byte_range <= bmp_b_size << 3);
 /* Copy select `a` to `b`. */
 memcpy(bmp_b, bmp_a + byte_offset, byte_range);
 memset(bmp_b + byte_range, '0', (bmp_b_size << 3) - byte_range);
 bmp_b[bmp_b_size << 3] = '0円';
 /* Move `a` selection to '1'. */
 bmp_a[byte_offset] = '1';
 memmove(bmp_a + byte_offset + 1, bmp_a + byte_offset + byte_range,
 (bmp_a_size << 3) - byte_offset - byte_range);
 memset(bmp_a + (bmp_a_size << 3) - byte_range + 1, '0', byte_range - 1);
 bmp_a[bmp_a_size << 3] = '0円';
}
/** `bmp` of `bmp_size` is written in `text` for use in testing. Ensure that
 `text` has enough space. */
static void bitmap_to_string(char *text, const unsigned char *const bmp,
 const size_t bmp_size) {
 unsigned i, j;
 assert(text && bmp && bmp_size);
 for(i = 0; i < bmp_size; i++)
 for(j = 0; j < 8; j++) *text++ = bmp[i] & 128 >> j ? '1' : '0';
 *text = '0円';
}
/** Adds spaces into `s` and adds parentheses between `a` and `b` and sends it
 to `stdout`. */
static void adorn(const char *const s,
 const unsigned a, const unsigned b) {
 size_t i;
 for(i = 0; ; i++) {
 if(i == b) fputc(']', stdout);
 if(s[i] == '0円') break;
 if(!(i & 3) && i) fputc(i & 7 ? ':' : ' ', stdout);
 if(i == a) fputc('[', stdout);
 fputc(s[i], stdout);
 }
}
/** Tests all legal substrings of `a0` size `a0_size`. */
static void test(const unsigned char *const a0, const unsigned a0_size) {
 /* `a` size: practical limit for testing `a0_size <= sizeof a`. */
 unsigned char a[32], b[sizeof a];
 char a_ver_str[(sizeof a << 3) + 1], b_ver_str[(sizeof b << 3) + 1],
 a_str[(sizeof a << 3) + 1], b_str[(sizeof b << 3) + 1];
 unsigned offset, range;
 assert(a0 && a0_size && a0_size <= sizeof a && a0_size <= sizeof b);
 for(offset = 0; offset < a0_size << 3; offset++) {
 for(range = 1; offset + range <= a0_size << 3; range++) {
 memcpy(a, a0, a0_size); /* Copy `a0` to `a` for modification. */
 bitmap_to_string(a_ver_str, a, a0_size);
 adorn(a_ver_str, offset, offset + range), fputs(" <- a_0\n", stdout);
 move_string(a_ver_str, a0_size, offset, range, b_ver_str, a0_size);
 move(a, a0_size, offset, range, b, sizeof b);
 bitmap_to_string(a_str, a, a0_size);
 bitmap_to_string(b_str, b, a0_size);
 adorn(a_str, offset, offset + 1), fputs(" <- a_1\n", stdout);
 adorn(b_str, 0, range), fputs(" <- b_1\n", stdout);
 assert(!strcmp(a_str, a_ver_str));
 assert(!strcmp(b_str, b_ver_str));
 fputc('\n', stdout);
 }
 }
}
int main(void) {
 /* Test run time is maybe `O(sizeof bmp^3)`? Changing the size will
 increase the bits of the problem up to `sizeof a` in <fn:test>. */
 unsigned char bmp[(256 >> 3) / 7];
 unsigned seed = (unsigned)clock();
 size_t i;
 srand(seed), rand(), printf("Seed %u.\n", seed);
 /* <http://c-faq.com/lib/randrange.html> */
 for(i = 0; i < sizeof bmp; i++) bmp[i] = rand() / (RAND_MAX / 255 + 1);
 test(bmp, sizeof bmp);
 return EXIT_SUCCESS;
}

Example

(Edit) Offset 5. Range 9.

Seed 0.
0000:0[011 0110:00]01 0110:1001 0001:0001 <- a_0
0000:0[1]01 0110:1001 0001:0001 0000:0000 <- a_1
[0110:1100 0]000:0000 0000:0000 0000:0000 <- b_1

The bit addressed [offset, offset + range], is moved from a_0 to b_1. a_1 is the replacement of [offset, offset + range], in this case [011 0110:00], with [1] (moving the all the right bits of a_0 from position offset + range = 14 to offset + 1 = 6). This corresponds to splitting the container a_0 (full 32 entries), into a_1 (23 entries + 1 entry for the new bit 1) and b_1 (9 entries).

Question 2

I'm not a fan of random testing in unit tests. They should give repeatable results to be useful as a build step.

A tip for creating a good unit-test suite, given that we can't test every possible input, is to start with the obvious error cases (verify that they correctly report errors), then move on to the likely problem inputs (extreme values, boundaries between different behaviours, etc.)

In this case the error tests would include passing bit_range values of 0 and one past the end of input array size. The boundary cases include 1 and exactly the end of input array for the same argument.

As a module test, random runs are great, but you've only done half the job:

srand(seed), rand(), printf("Seed %u.\n", seed);

It's no use printing the seed that was used if there's no option to re-use that seed in a future run. We need to add a way (command-line option, environment variable, whatever) to pass a specific seed into the test program if we want to reproduce and diagnose any failures we uncover this way.

assert() is problematic, as most implementations won't show the evaluation of the expression, and it terminates the program at the first failure. A test suite is more useful if it records all the tests that fail. Consider using one of the many available unit-test libraries to give more useful results than this.

Question 3

Since testing it with all bitsets is probably unviable, I test on a random bitset, but I see what you mean. I really like your answer because it focuses on how I can verify that this works as expected.

Question 4

so from my understanding, in the move function you want to:

move a continuous set of bits from bit_map_a to bit_map_b
set any moved bits in bit_map_a to 1
set any unwritten bits in bit_map_b to 0

To clean this up I would break the problem into smaller chunks. The first chunk should deal with the first n bits that are not byte aligned. The second chunk deals with the last n bits that are not byte aligned. The third chunk deals with the all other bits. The last chunk clears any unused bits in b. The code below follows that format.

#define roundDown(x, y) ((x/y) * y)
static void move(unsigned char *const bmp_a, const unsigned bmp_a_size,
 const unsigned bit_offset, const unsigned bit_range,
 unsigned char *const bmp_b, const unsigned bmp_b_size)
{
 unsigned aStartIdx = bit_offset / 8;
 unsigned bStartIdx = 0;
 const unsigned endBit = bit_offset + bit_range;
 const unsigned startRuntNumBits = (bit_offset - roundDown(bit_offset, 8));
 assert(bmp_a && bmp_b);
 assert(bit_range && bit_offset + bit_range <= bmp_a_size << 3);
 
 // Copy and clear first byte
 {
 unsigned char mask = 0xFF << startRuntNumBits;
 // cover case where first and last bit are in the same byte
 if (endBit / 8 == aStartIdx) {
 mask = mask >> (8 * (aStartIdx + 1) - endBit);
 }
 bmp_b[bStartIdx] = mask & bmp_a[aStartIdx];
 bmp_a[aStartIdx] |= mask;
 aStartIdx++; 
 bStartIdx++;
 }
 // copy and clear non-runt bytes
 if(endBit > aStartIdx * 8)
 {
 const unsigned numNonRuntBytes = (bit_range - startRuntNumBits) / 8;
 memcpy(&bmp_b[bStartIdx], &bmp_a[aStartIdx], numNonRuntBytes);
 memset(&bmp_a[aStartIdx], 0xFF, numNonRuntBytes);
 aStartIdx += numNonRuntBytes;
 bStartIdx += numNonRuntBytes;
 }
 // copy last runt byte
 if(endBit > aStartIdx * 8)
 {
 const unsigned char endRuntBits = endBit - (bStartIdx * 8);
 const unsigned char mask = 0xFF >> (8 - endRuntBits);
 bmp_b[bStartIdx] = mask & bmp_a[aStartIdx];
 bmp_a[aStartIdx] |= mask;
 bStartIdx++; 
 }
 // zero out rest of b
 memset(&bmp_b[bStartIdx], 0, bmp_b_size - bStartIdx);
}

Question 5

I would leave the assert statements as the first 2 statements, there really isn't any reason to do any assignments or calculations if the assert statements fail.

Question 6

Your code is pretty good, clean, and close to what I want. The '2 set any moved bits to 1' should be '2 replace any moved bits by the single bit 1.' This (generally) has to move all the items offset + range left, and replace them with 0. (This shrinks the size from the maximum to max - range + 1, so the new entries can fit.)

Question 7

Be Careful With Assert Usage

If you are using the assert() macro in a debug environment that is great, however, if this becomes production code and the code is optimized and not compiled with the debug option then the assert() statements are optimized away. Only expect assert() statements to work when the program is compiled in debug mode. If you need to always check at runtime then use if instead with error messages.

Question 8

Since I may be exporting move to a bigger project, do you have any specific advice on assert(bmp_a && bmp_b && bit_range && bit_offset + bit_range <= bmp_a_size * 8 && bit_range <= bmp_b_size * 8); is it clear from the comments that this is considered a programming error?

Question 9

@Neil The assert statement terminates the program with no error message. Error messages provide the user with feedback about what is wrong. Assert statements are useful when debugging and possibly unit testing, they are not good for production code. Your comments are clear, but a user doesn't look at the comments in the code.

Question 10

@pacmaninbw "No error message"??! Only if your C implementation is broken! It's supposed to produce implementation-specific diagnostic information on the standard error output before terminating. The diagnostic information is required to include the text of expression, as well as the values of [(since C99) the predefined variable __func__ and of] the predefined macros __FILE__ and __LINE__.

Question 11

The storage is inefficient on machines whose CHAR_BIT is larger than 8. It wouldn't be hard to use all the available bits in each char. For example:

{ /* Copy a contiguous subset of bits from `a` into the new array, `b`. */
 const unsigned a = bit_offset / CHAR_BIT;
 const unsigned a_bit = bit_offset % CHAR_BIT;
 unsigned b, rest;
 for (b = 0, rest = bit_range; rest > CHAR_BIT; b++, rest -= CHAR_BIT) {
 bmp_b[b] = (bmp_a[a + b] << a_bit) | (bmp_a[a + b + 1] >> (CHAR_BIT - a_bit));
 }
 bmp_b[b] = bmp_a[a + b] << a_bit;
 if (a + b < (bit_offset + bit_range) / CHAR_BIT) {
 bmp_b[b] |= (bmp_a[a + b + 1] >> (CHAR_BIT - a_bit));
 }
 bmp_b[b++] &= ~(UCHAR_MAX >> rest);
 memset(bmp_b + b, 0, bmp_b_size - b);
}

Note that I've used plain division and modulo rather than shifting, to be clear what we mean - we can trust any reasonable compiler to implement those with bit operations if they are more efficient. There's no need to do that kind of micro-optimisation by hand.

Question 12

That's bit-for-bit what my code looks like now. I was worried that UCHAR_MAX might be something crazy, but it's both >= 255 and 2^n-1.

Toby Speight Toby Speight 87.1k14 gold badges104 silver badges322 bronze badges · Accepted Answer · 2021-09-01 12:48:47Z

I'm not a fan of random testing in unit tests. They should give repeatable results to be useful as a build step.

A tip for creating a good unit-test suite, given that we can't test every possible input, is to start with the obvious error cases (verify that they correctly report errors), then move on to the likely problem inputs (extreme values, boundaries between different behaviours, etc.)

In this case the error tests would include passing bit_range values of 0 and one past the end of input array size. The boundary cases include 1 and exactly the end of input array for the same argument.

As a module test, random runs are great, but you've only done half the job:

srand(seed), rand(), printf("Seed %u.\n", seed);

It's no use printing the seed that was used if there's no option to re-use that seed in a future run. We need to add a way (command-line option, environment variable, whatever) to pass a specific seed into the test program if we want to reproduce and diagnose any failures we uncover this way.

assert() is problematic, as most implementations won't show the evaluation of the expression, and it terminates the program at the first failure. A test suite is more useful if it records all the tests that fail. Consider using one of the many available unit-test libraries to give more useful results than this.

Since testing it with all bitsets is probably unviable, I test on a random bitset, but I see what you mean. I really like your answer because it focuses on how I can verify that this works as expected.

Stack Exchange Network

Move an unaligned bit-addressed range into another bitmap

Context

Implementation

TDD

Example

4 Answers 4

Be Careful With Assert Usage

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Move an unaligned bit-addressed range into another bitmap

Context

Implementation

TDD

Example

4 Answers 4

Be Careful With Assert Usage

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions