I wanted a polyfill of flexible array members in C89. So I made something similar, and made a toy std::vector with limited functionality on top of it. Here is the code:
vector.h
/* Flexible array member example in C89 - Just because we can!
* It's demonstrated by vectors, but can be generalized.
* MIT License; see below for the license text.
* By Scorbie, 2019. */
#ifndef VECTOR_H
#define VECTOR_H
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* We will use a plain array!
* We just store the header/metadata/other-fields/whatever
* in the front few fields, the number of which determined below.
* So the overall picture is the following:
* v-- The start of the allocated memory.
* v v-- The pointer returned.
* | | | | | | |*| | | | | ...
* |<- header->|<- array-- ...
* If we don't know the header size in advance, we can use another
* size_t for specifying the header size:
* | | | | | | | | | | | | |*| | | | | ...
* |<- header->|<- size_t->|<- array-- ...
*/
/* We will going to use this fixed header for our vector. */
struct header {
size_t size;
size_t capacity;
};
/* We define SIZE_MAX, because C89 doesn't define it for us. */
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t)(-1))
#endif
/* If needed, increase the capacity of the given header.
* On success, return the new header.
* On failure, return {0, 0} to signal an error.
* I am not sure whether I should raise an error. (errno=EDOM) */
static inline struct header header_increase_capacity(struct header head) {
assert(head.size <= head.capacity);
if (head.size == SIZE_MAX) {
head.size = 0; head.capacity = 0;
} else if (head.size == head.capacity) {
head.capacity = (head.capacity > SIZE_MAX/2) ? SIZE_MAX : 2 * head.capacity;
}
return head;
}
/* Byte, the unit of all data in C.
* Just gave it a better name to be explicit.
* I'm going to use memcpy to copy data byte by byte.
* This was the only way I found to be C89-conformant. */
typedef unsigned char byte;
#define VECTOR_INITIAL_CAPACITY 32
/* Get the size needed to completely cover the header by items of size item_size. */
static inline size_t header_get_offset(size_t item_size) {
const size_t header_size = sizeof(struct header);
/* header_len: how many items needed to cover the header. */
const size_t header_len = (header_size/item_size) + ((header_size%item_size) ? 1 : 0);
const size_t offset = (item_size * header_len);
return offset;
}
static inline struct header vector_get_header(void* vec, size_t item_size) {
struct header head;
byte* src;
byte* dest;
assert(vec);
src = (byte*)vec - header_get_offset(item_size);
dest = (byte*)(&head); /* To make my linter quiet and be explicit */
memcpy(dest, src, sizeof(struct header));
return head;
}
static inline void vector_set_header(void* vec, size_t item_size, struct header head) {
byte* src;
byte* dest;
assert(vec);
src = (byte*)(&head);
dest = (byte*)vec - header_get_offset(item_size);
memcpy(dest, src, sizeof(struct header));
}
static inline void* vector_init(size_t item_size) {
struct header head = {0, VECTOR_INITIAL_CAPACITY};
assert(item_size < SIZE_MAX / VECTOR_INITIAL_CAPACITY);
void* vec = malloc(item_size * head.capacity);
if (!vec) {
return NULL;
} else {
vec = (byte*)vec + header_get_offset(item_size);
vector_set_header(vec, item_size, head);
return vec;
}
}
static inline void* vector_increase_capacity(void* vec, size_t item_size) {
struct header head;
const size_t header_offset = header_get_offset(item_size);
byte* vec_mem_start = NULL;
byte* new_vec_mem_start = NULL;
void* new_vec = NULL;
assert(vec);
/* Try to allocate more memory with overflow checks. */
head = header_increase_capacity(vector_get_header(vec, item_size));
if ( head.capacity > (SIZE_MAX-header_offset)/(item_size) ) {
errno = EDOM;
} else if (head.capacity == 0) {
errno = EDOM;
} else {
vec_mem_start = (byte*)vec - header_offset;
new_vec_mem_start = realloc(vec_mem_start, header_offset + sizeof item_size * head.capacity);
}
/* Check for failure. */
if (!new_vec_mem_start) {
perror("Error: vector_increase_capacity failed");
return NULL;
} else {
new_vec = new_vec_mem_start + header_offset;
vector_set_header(new_vec, item_size, head);
return new_vec;
}
}
static inline size_t vector_get_size(void* vec, size_t item_size) {
assert(vec);
return vector_get_header(vec, item_size).size;
}
static inline int vector_bounds_check(void* vec, size_t item_size, size_t i) {
return (i >= 0 && i < vector_get_size(vec, item_size)) ? 0 : -1;
}
#endif /* VECTOR_H */
/* MIT License
* Copyright (c) 2019 Scorbie
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
Here's a small example of a vector<int>
using it.
main.c
#include <assert.h>
#include <stdio.h>
#include "vector.h"
int* stack_init() {
return vector_init(sizeof(int));
}
int* stack_push(int* stack, int item) {
struct header head = vector_get_header(stack, sizeof *stack);
int* new_stack = vector_increase_capacity(stack, sizeof *stack);
if(!new_stack) { return NULL; }
assert(head.size < head.capacity);
new_stack[head.size++] = item;
vector_set_header(new_stack, sizeof *stack, head);
return new_stack;
}
int stack_peek(int* stack) {
size_t size = vector_get_size(stack, sizeof *stack);
return stack[size-1];
}
int stack_pop(int* stack) {
struct header head = vector_get_header(stack, sizeof *stack);
head.size--;
vector_set_header(stack, sizeof *stack, head);
return stack_peek(stack);
}
int stack_size(int* stack) {
return vector_get_size(stack, sizeof *stack);
}
int stack_at(int* stack, size_t i) {
int status = vector_bounds_check(stack, sizeof *stack, i);
if (status == -1) {
errno = ERANGE;
perror("Error while indexing stack");
return -1;
} else {
return stack[i];
}
}
int main(void) {
size_t i;
int* a = stack_init();
stack_push(a, 3);
stack_push(a, 4);
printf("%d\n", stack_peek(a));
for (i=0; i<stack_size(a); ++i) {
printf("%d\n", stack_at(a, i));
}
stack_pop(a);
stack_push(a, 5);
for (i=0; i<stack_size(a); ++i) {
printf("%d\n", stack_at(a, i));
}
return 0;
}
It would be especially thankful if any of these points are answered:
- Standard C89 conformance (Are some parts not in C89/result in UB etc?)
- Performance (e.g. Is it slower than the sane approach; i.e. using double indirection by struct with pointers?)
- Readability (Refactoring / Idiomatic C etc.)
- Maintainance (My current expectation: This would get you fired if you use this in production, but only a little later than the one using only macros.)
2 Answers 2
A very well done effort even though I am not a fan of loading .h files with so much inline code.
- Standard C89 conformance (Are some parts not in C89/result in UB etc?)
Good to have used unsigned char
.
Only UB I see is with pathological size == 0
leads to /0
- Performance (e.g. Is it slower than the sane approach; i.e. using double indirection by struct with pointers?)
Consider space performance
An VECTOR_INITIAL_CAPACITY 32
would be fairly piggish if code had a largest array of vectors. I'd start with 0 or pass into vector_init()
the initial size.
Correct function?
In vector_increase_capacity()
, I have doubts that after code sets errno = EDOM;
the rest of code is correct. Should not code avoid the following { new_vec = new_vec_mem_start + header_offset; vector_set_header(new_vec, item_size, head); ... }
?
- Readability (Refactoring / Idiomatic C etc.)
why dest
Unclear why extra code. Suggested simplification.
// byte* dest;
// dest = (byte*)(&head); /* To make my linter quiet and be explicit */
// memcpy(dest, src, sizeof(struct header));
memcpy(&head, src, sizeof *head);
unneeded else
if (!vec) {
return NULL;
// } else {
}
vec = (byte*)vec + header_get_offset(item_size);
vector_set_header(vec, item_size, head);
return vec;
// }
- Maintenance (My current expectation: This would get you fired if you use this in production, but only a little later than the one using only macros.)
Post C89
Good this code tests for prior SIZE_MAX
as perhaps another .h file made it or maybe code is now using C99
Collisions
Avoid using name space in an unexpected fashion.
Inside vector.h
, I would not expect to find a struct
named header
. I recommend to use vector
or vector_header
.
This include function names like header_get_offset()
. Better to uniformly start with vector
.
To define byte
as typedef unsigned char byte;
in a .h file is fairly brazen to assume some other .h and application did not define it, perhaps a bit differently. I'd recommend simple using unsigned char
.
Does #include "vector.h"
stand on its own?
As a test to insure "vector.h"
includes itself, needed include files, try "vector.h"
first.
#include "vector.h"
#include <assert.h>
#include <stdio.h>
// #include "vector.h"
IANAL but
Copyright notices need to be obvious, not buried at the end of code.
No free
I'd a function to call to free all allocations. Perhaps void stack_uninit(void* vec)
?
-
\$\begingroup\$ Wow! A big THANK YOU for the detailed and clear explanation! Could you elaborate on static inline, whether it's bad style or personal preference and why? \$\endgroup\$Scorbie– Scorbie2019年12月24日 00:34:56 +00:00Commented Dec 24, 2019 at 0:34
-
1\$\begingroup\$ I see long
inline
functions as repetitive code, rarely improving performance that much (and certainly not big - 0). For performance improvements, focusing on high level simplifications is more worth my time. \$\endgroup\$chux– chux2019年12月24日 02:03:43 +00:00Commented Dec 24, 2019 at 2:03
Update: here's the edited code thanks to the feedback!
/* Flexible array member example in C89 - Just because we can!
* It's demonstrated by vectors, but can be generalized.
* By Scorbie, 2019.
* Reviewed by StackExchange users "chux - Reinstate Monica" and "Björn Lindqvist":
* https://codereview.stackexchange.com/questions/234514
*
* MIT License
* Copyright (c) 2019 Scorbie
* Copyright (c) 2019 chux - Reinstate Monica
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef VECTOR_H
#define VECTOR_H
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* We will use a plain array!
* We just store the header/metadata/other-fields/whatever
* in the front few fields, the number of which determined below.
* So the overall picture is the following:
* v-- The start of the allocated memory.
* v v-- The pointer returned.
* | | | | | | |*| | | | | ...
* |<- header->|<- array-- ...
* If we don't know the header size in advance, we can use another
* size_t for specifying the header size:
* | | | | | | | | | | | | |*| | | | | ...
* |<- header->|<- size_t->|<- array-- ...
* */
/* We will going to use this fixed header for our vector. */
struct vector_header {
size_t size;
size_t capacity;
};
/* We define SIZE_MAX, because C89 doesn't define it for us. */
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t)(-1))
#endif
/* Get the size needed to completely cover the header by items of size item_size. */
size_t vector_header_get_offset(size_t item_size) {
const size_t header_size = sizeof(struct vector_header);
/* header_len: how many items needed to cover the header. */
const size_t header_len = (header_size/item_size) + ((header_size%item_size) ? 1 : 0);
const size_t offset = (item_size * header_len);
return offset;
}
void* vector_get_mem_start(void* vec, size_t item_size) {
assert(vec);
return (unsigned char*)vec - vector_header_get_offset(item_size);
}
struct vector_header vector_get_header(void* vec, size_t item_size) {
struct vector_header header;
unsigned char* src = vector_get_mem_start(vec, item_size);
memcpy(&header, src, sizeof header);
return header;
}
void vector_set_header(void* vec, size_t item_size, struct vector_header header) {
unsigned char* dest = vector_get_mem_start(vec, item_size);
memcpy(dest, &header, sizeof header);
}
/* Initialize vector with the given initial_capacity.
* Initial capacity should be >= 1. */
void* vector_init(size_t item_size, size_t initial_capacity) {
struct vector_header header = {0, initial_capacity};
void* vec;
/* Assertions to prevent divide-by-0 and overflows */
assert(item_size);
assert(initial_capacity != 0 && initial_capacity < (SIZE_MAX/item_size));
vec = malloc(initial_capacity * item_size);
if (!vec) { return NULL; }
vec = (unsigned char*)vec + vector_header_get_offset(item_size);
vector_set_header(vec, item_size, header);
return vec;
}
/* If needed, increase the capacity of the given header.
* On success, return the new header.
* On failure, return {0, 0} to signal an error. */
struct vector_header vector_header_increase_capacity(struct vector_header header) {
assert(header.size <= header.capacity);
if (header.size == SIZE_MAX) {
/* No more memory, set it to error value */
header.size = header.capacity = 0;
} else if (header.size == header.capacity) {
header.capacity = (header.capacity >= SIZE_MAX/2) ? SIZE_MAX : (2 * header.capacity);
}
return header;
}
void* vector_increase_capacity(void* vec, size_t item_size) {
struct vector_header header;
const size_t header_offset = vector_header_get_offset(item_size);
void* vec_mem_start = vector_get_mem_start(vec, item_size);
void* new_vec_mem_start = vec_mem_start;
assert(vec);
assert(item_size);
assert(header_offset);
/* Try to allocate more memory with overflow checks. */
header = vector_header_increase_capacity(vector_get_header(vec, item_size));
if (header.capacity == 0 || header.capacity > (SIZE_MAX-header_offset)/(item_size)) {
errno = EDOM;
perror("Error: vector_increase_capacity failed");
return NULL;
}
new_vec_mem_start = realloc(vec_mem_start, header_offset + item_size * header.capacity);
/* Check for failure. */
if (!new_vec_mem_start) {
perror("Error: vector_increase_capacity failed");
return NULL;
} else {
void* new_vec = (unsigned char*)new_vec_mem_start + header_offset;
vector_set_header(new_vec, item_size, header);
return new_vec;
}
}
int vector_is_out_of_bounds(void* vec, size_t item_size, size_t i) {
return (i > vector_get_header(vec, item_size).size);
}
void vector_free(void* vec, size_t item_size) {
void* vec_mem_start = vector_get_mem_start(vec, item_size);
free(vec_mem_start);
}
#endif /* VECTOR_H */
```
inline
keyword is not part of C89. Are you sure you want conformance to C89 and not a newer C standard? \$\endgroup\$