Context
I have an array of maximum-fixed-size, size
, containers of items; these items are either pointers to objects or an index of the same array, (but no loops.) When the current container is full, to add an object, a new container will be created and a contiguous block of some of the items will be moved to the container; the new container will have it's index recorded where the void was left in the old container.
Implementation
Since space is an issue, I have made a bitmap of size
to tell whether an individual item is a pointer to object (0) or an index (1) to store with each element of the array of containers. This is the move
operation, applied to the bitmap.
TDD
I created tests and stopped when it didn't work. I added a clause and then tried again. Eventually I got all of the size
-bit combinations working. What I'm really interested in is reducing the size of move
, especially the spot where I move bits back in a
; I feel that it has too many special cases.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
/** Moves and overwrites `bmp_b` size `bmp_b_size` with `bit_offset` to
`bit_range` from `bmp_a` size `bmp_a_size`. `bmp_a` has the moved part
replaced with a single bit, '1'. `bit_range` cannot be zero. */
static void move(unsigned char *const bmp_a, const unsigned bmp_a_size,
const unsigned bit_offset, const unsigned bit_range,
unsigned char *const bmp_b, const unsigned bmp_b_size) {
assert(bmp_a && bmp_b);
assert(bit_range && bit_offset + bit_range <= bmp_a_size << 3);
{ /* Copy a contiguous subset of bits from `a` into the new array, `b`. */
const unsigned a = bit_offset >> 3, a_bit = bit_offset & 7;
unsigned b, rest;
for(b = 0, rest = bit_range; rest > 8; b++, rest -= 8) bmp_b[b]
= (bmp_a[a + b] << a_bit) | (bmp_a[a + b + 1] >> (8 - a_bit));
bmp_b[b] = bmp_a[a + b] << a_bit;
if(a + b < (bit_offset + bit_range) >> 3)
bmp_b[b] |= (bmp_a[a + b + 1] >> (8 - a_bit));
bmp_b[b++] &= ~(255 >> rest);
memset(bmp_b + b, 0, bmp_b_size - b);
}
{ /* Replace copied bits from `a` with '1'. */
const unsigned a = bit_offset >> 3, a_bit = bit_offset & 7;
bmp_a[a] |= 128 >> a_bit;
}
{ /* Move bits back in `a`.
<https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge> */
unsigned a0 = (bit_offset + 1) >> 3, a1 = (bit_offset + bit_range) >> 3;
const unsigned a0_bit = (bit_offset + 1) & 7,
a1_bit = (bit_offset + bit_range) & 7;
assert(a0 <= bmp_a_size && a1 <= bmp_a_size);
if(a1 == bmp_a_size) { /* On the trailing edge. */
assert(!a1_bit);
if(a0 == bmp_a_size) assert(!a0_bit); /* Extreme right. */
else bmp_a[a0++] &= 255 << 8-a0_bit;
} else if(a1_bit < a0_bit) { /* Inversion of shift. */
const unsigned shift = a0_bit - a1_bit;
assert(a0 < a1);
{
const unsigned char bmp_a_a0 = bmp_a[a0],
bmp_a_a1 = bmp_a[a1] >> shift,
mask = 255 >> a0_bit;
bmp_a[a0] = bmp_a_a0 ^ ((bmp_a_a0 ^ bmp_a_a1) & mask);
}
while(++a0, ++a1 < bmp_a_size)
bmp_a[a0] = bmp_a[a1 - 1] << 8-shift | bmp_a[a1] >> shift;
bmp_a[a0++] = bmp_a[a1 - 1] << 8-shift;
} else { /* Shift right or zero. */
const unsigned shift = a1_bit - a0_bit;
assert(a0 <= a1);
{
const unsigned char bmp_a_a0 = bmp_a[a0],
bmp_a_a1 = bmp_a[a1] << shift,
mask = 255 >> a0_bit;
bmp_a[a0] = bmp_a_a0 ^ ((bmp_a_a0 ^ bmp_a_a1) & mask);
}
while(++a0, ++a1 < bmp_a_size)
bmp_a[a0 - 1] |= bmp_a[a1] >> 8-shift,
bmp_a[a0] = bmp_a[a1] << shift;
}
memset(bmp_a + a0, 0, bmp_a_size - a0);
}
}
/** Move `byte_offset` to `byte_range` from string `bmp_a` size `bmp_a_size` to
`bmp_b` size `bmp_b_size`, replacing it with '1'. Ensure that the strings have
enough space. This is for testing against the bit vector version. */
static void move_string(char *const bmp_a, const unsigned bmp_a_size,
const unsigned byte_offset, const unsigned byte_range,
char *const bmp_b, const unsigned bmp_b_size) {
assert(bmp_a && bmp_b);
assert(byte_offset + byte_range <= bmp_a_size << 3);
assert(byte_range && byte_range <= bmp_b_size << 3);
/* Copy select `a` to `b`. */
memcpy(bmp_b, bmp_a + byte_offset, byte_range);
memset(bmp_b + byte_range, '0', (bmp_b_size << 3) - byte_range);
bmp_b[bmp_b_size << 3] = '0円';
/* Move `a` selection to '1'. */
bmp_a[byte_offset] = '1';
memmove(bmp_a + byte_offset + 1, bmp_a + byte_offset + byte_range,
(bmp_a_size << 3) - byte_offset - byte_range);
memset(bmp_a + (bmp_a_size << 3) - byte_range + 1, '0', byte_range - 1);
bmp_a[bmp_a_size << 3] = '0円';
}
/** `bmp` of `bmp_size` is written in `text` for use in testing. Ensure that
`text` has enough space. */
static void bitmap_to_string(char *text, const unsigned char *const bmp,
const size_t bmp_size) {
unsigned i, j;
assert(text && bmp && bmp_size);
for(i = 0; i < bmp_size; i++)
for(j = 0; j < 8; j++) *text++ = bmp[i] & 128 >> j ? '1' : '0';
*text = '0円';
}
/** Adds spaces into `s` and adds parentheses between `a` and `b` and sends it
to `stdout`. */
static void adorn(const char *const s,
const unsigned a, const unsigned b) {
size_t i;
for(i = 0; ; i++) {
if(i == b) fputc(']', stdout);
if(s[i] == '0円') break;
if(!(i & 3) && i) fputc(i & 7 ? ':' : ' ', stdout);
if(i == a) fputc('[', stdout);
fputc(s[i], stdout);
}
}
/** Tests all legal substrings of `a0` size `a0_size`. */
static void test(const unsigned char *const a0, const unsigned a0_size) {
/* `a` size: practical limit for testing `a0_size <= sizeof a`. */
unsigned char a[32], b[sizeof a];
char a_ver_str[(sizeof a << 3) + 1], b_ver_str[(sizeof b << 3) + 1],
a_str[(sizeof a << 3) + 1], b_str[(sizeof b << 3) + 1];
unsigned offset, range;
assert(a0 && a0_size && a0_size <= sizeof a && a0_size <= sizeof b);
for(offset = 0; offset < a0_size << 3; offset++) {
for(range = 1; offset + range <= a0_size << 3; range++) {
memcpy(a, a0, a0_size); /* Copy `a0` to `a` for modification. */
bitmap_to_string(a_ver_str, a, a0_size);
adorn(a_ver_str, offset, offset + range), fputs(" <- a_0\n", stdout);
move_string(a_ver_str, a0_size, offset, range, b_ver_str, a0_size);
move(a, a0_size, offset, range, b, sizeof b);
bitmap_to_string(a_str, a, a0_size);
bitmap_to_string(b_str, b, a0_size);
adorn(a_str, offset, offset + 1), fputs(" <- a_1\n", stdout);
adorn(b_str, 0, range), fputs(" <- b_1\n", stdout);
assert(!strcmp(a_str, a_ver_str));
assert(!strcmp(b_str, b_ver_str));
fputc('\n', stdout);
}
}
}
int main(void) {
/* Test run time is maybe `O(sizeof bmp^3)`? Changing the size will
increase the bits of the problem up to `sizeof a` in <fn:test>. */
unsigned char bmp[(256 >> 3) / 7];
unsigned seed = (unsigned)clock();
size_t i;
srand(seed), rand(), printf("Seed %u.\n", seed);
/* <http://c-faq.com/lib/randrange.html> */
for(i = 0; i < sizeof bmp; i++) bmp[i] = rand() / (RAND_MAX / 255 + 1);
test(bmp, sizeof bmp);
return EXIT_SUCCESS;
}
Example
(Edit) Offset 5. Range 9.
Seed 0.
0000:0[011 0110:00]01 0110:1001 0001:0001 <- a_0
0000:0[1]01 0110:1001 0001:0001 0000:0000 <- a_1
[0110:1100 0]000:0000 0000:0000 0000:0000 <- b_1
The bit addressed [offset, offset + range]
, is moved from a_0
to b_1
. a_1
is the replacement of [offset, offset + range]
, in this case [011 0110:00]
, with [1]
(moving the all the right bits of a_0
from position offset + range = 14
to offset + 1 = 6
). This corresponds to splitting the container a_0
(full 32 entries), into a_1
(23 entries + 1 entry for the new bit 1) and b_1
(9 entries).
4 Answers 4
I'm not a fan of random testing in unit tests. They should give repeatable results to be useful as a build step.
A tip for creating a good unit-test suite, given that we can't test every possible input, is to start with the obvious error cases (verify that they correctly report errors), then move on to the likely problem inputs (extreme values, boundaries between different behaviours, etc.)
In this case the error tests would include passing bit_range
values of 0
and one past the end of input array size. The boundary cases include 1
and exactly the end of input array for the same argument.
As a module test, random runs are great, but you've only done half the job:
srand(seed), rand(), printf("Seed %u.\n", seed);
It's no use printing the seed that was used if there's no option to re-use that seed in a future run. We need to add a way (command-line option, environment variable, whatever) to pass a specific seed into the test program if we want to reproduce and diagnose any failures we uncover this way.
assert()
is problematic, as most implementations won't show the evaluation of the expression, and it terminates the program at the first failure. A test suite is more useful if it records all the tests that fail. Consider using one of the many available unit-test libraries to give more useful results than this.
-
\$\begingroup\$ Since testing it with all bitsets is probably unviable, I test on a random bitset, but I see what you mean. I really like your answer because it focuses on how I can verify that this works as expected. \$\endgroup\$Neil– Neil2021年09月01日 22:42:33 +00:00Commented Sep 1, 2021 at 22:42
so from my understanding, in the move function you want to:
- move a continuous set of bits from bit_map_a to bit_map_b
- set any moved bits in bit_map_a to 1
- set any unwritten bits in bit_map_b to 0
To clean this up I would break the problem into smaller chunks. The first chunk should deal with the first n bits that are not byte aligned. The second chunk deals with the last n bits that are not byte aligned. The third chunk deals with the all other bits. The last chunk clears any unused bits in b. The code below follows that format.
#define roundDown(x, y) ((x/y) * y)
static void move(unsigned char *const bmp_a, const unsigned bmp_a_size,
const unsigned bit_offset, const unsigned bit_range,
unsigned char *const bmp_b, const unsigned bmp_b_size)
{
unsigned aStartIdx = bit_offset / 8;
unsigned bStartIdx = 0;
const unsigned endBit = bit_offset + bit_range;
const unsigned startRuntNumBits = (bit_offset - roundDown(bit_offset, 8));
assert(bmp_a && bmp_b);
assert(bit_range && bit_offset + bit_range <= bmp_a_size << 3);
// Copy and clear first byte
{
unsigned char mask = 0xFF << startRuntNumBits;
// cover case where first and last bit are in the same byte
if (endBit / 8 == aStartIdx) {
mask = mask >> (8 * (aStartIdx + 1) - endBit);
}
bmp_b[bStartIdx] = mask & bmp_a[aStartIdx];
bmp_a[aStartIdx] |= mask;
aStartIdx++;
bStartIdx++;
}
// copy and clear non-runt bytes
if(endBit > aStartIdx * 8)
{
const unsigned numNonRuntBytes = (bit_range - startRuntNumBits) / 8;
memcpy(&bmp_b[bStartIdx], &bmp_a[aStartIdx], numNonRuntBytes);
memset(&bmp_a[aStartIdx], 0xFF, numNonRuntBytes);
aStartIdx += numNonRuntBytes;
bStartIdx += numNonRuntBytes;
}
// copy last runt byte
if(endBit > aStartIdx * 8)
{
const unsigned char endRuntBits = endBit - (bStartIdx * 8);
const unsigned char mask = 0xFF >> (8 - endRuntBits);
bmp_b[bStartIdx] = mask & bmp_a[aStartIdx];
bmp_a[aStartIdx] |= mask;
bStartIdx++;
}
// zero out rest of b
memset(&bmp_b[bStartIdx], 0, bmp_b_size - bStartIdx);
}
-
\$\begingroup\$ I would leave the assert statements as the first 2 statements, there really isn't any reason to do any assignments or calculations if the assert statements fail. \$\endgroup\$2021年05月15日 23:32:53 +00:00Commented May 15, 2021 at 23:32
-
\$\begingroup\$ Your code is pretty good, clean, and close to what I want. The '2 set any moved bits to 1' should be '2 replace any moved bits by the single bit 1.' This (generally) has to move all the items
offset + range
left, and replace them with 0. (This shrinks the size from the maximum tomax - range + 1
, so the new entries can fit.) \$\endgroup\$Neil– Neil2021年05月16日 02:56:48 +00:00Commented May 16, 2021 at 2:56
Be Careful With Assert Usage
If you are using the assert()
macro in a debug environment that is great, however, if this becomes production code and the code is optimized and not compiled with the debug option then the assert()
statements are optimized away. Only expect assert()
statements to work when the program is compiled in debug mode. If you need to always check at runtime then use if
instead with error messages.
-
\$\begingroup\$ Since I may be exporting
move
to a bigger project, do you have any specific advice onassert(bmp_a && bmp_b && bit_range && bit_offset + bit_range <= bmp_a_size * 8 && bit_range <= bmp_b_size * 8)
; is it clear from the comments that this is considered a programming error? \$\endgroup\$Neil– Neil2021年05月16日 03:13:48 +00:00Commented May 16, 2021 at 3:13 -
1\$\begingroup\$ @Neil The assert statement terminates the program with no error message. Error messages provide the user with feedback about what is wrong. Assert statements are useful when debugging and possibly unit testing, they are not good for production code. Your comments are clear, but a user doesn't look at the comments in the code. \$\endgroup\$2021年05月16日 13:33:22 +00:00Commented May 16, 2021 at 13:33
-
1\$\begingroup\$ @pacmaninbw "No error message"??! Only if your C implementation is broken! It's supposed to produce implementation-specific diagnostic information on the standard error output before terminating. The diagnostic information is required to include the text of
expression
, as well as the values of [(since C99) the predefined variable__func__
and of] the predefined macros__FILE__
and__LINE__
. \$\endgroup\$Toby Speight– Toby Speight2021年09月01日 12:36:42 +00:00Commented Sep 1, 2021 at 12:36
The storage is inefficient on machines whose CHAR_BIT
is larger than 8. It wouldn't be hard to use all the available bits in each char. For example:
{ /* Copy a contiguous subset of bits from `a` into the new array, `b`. */
const unsigned a = bit_offset / CHAR_BIT;
const unsigned a_bit = bit_offset % CHAR_BIT;
unsigned b, rest;
for (b = 0, rest = bit_range; rest > CHAR_BIT; b++, rest -= CHAR_BIT) {
bmp_b[b] = (bmp_a[a + b] << a_bit) | (bmp_a[a + b + 1] >> (CHAR_BIT - a_bit));
}
bmp_b[b] = bmp_a[a + b] << a_bit;
if (a + b < (bit_offset + bit_range) / CHAR_BIT) {
bmp_b[b] |= (bmp_a[a + b + 1] >> (CHAR_BIT - a_bit));
}
bmp_b[b++] &= ~(UCHAR_MAX >> rest);
memset(bmp_b + b, 0, bmp_b_size - b);
}
Note that I've used plain division and modulo rather than shifting, to be clear what we mean - we can trust any reasonable compiler to implement those with bit operations if they are more efficient. There's no need to do that kind of micro-optimisation by hand.
-
\$\begingroup\$ That's bit-for-bit what my code looks like now. I was worried that
UCHAR_MAX
might be something crazy, but it's both>= 255
and2^n-1
. \$\endgroup\$Neil– Neil2021年09月02日 14:40:59 +00:00Commented Sep 2, 2021 at 14:40