This function mimics Python's map()
function (No, it doesn't support generic return types for the function or multiple iterables) by applying the provided function func
to each element of the array iter
. It returns a new array containing the results which must be passed to free()
.
#include <stdlib.h>
#define map(count, iter, func) _Generic (*iter, \
char: map_c, \
unsigned char: map_uc, \
short int: map_si, \
unsigned short int: map_usi, \
int: map_i, \
unsigned int: map_ui, \
long int: map_li, \
unsigned long int: map_uli, \
long long int: map_lli, \
unsigned long long int: map_ulli, \
float: map_f, \
double: map_d, \
long double: map_ld, \
_Bool: map_b, \
char *: map_s \
)(count, iter, func)
#define gen_map(_suffix, _type) \
_type *map_##_suffix(size_t count, _type iter[static count], \
_type (*func)(_type x)) \
{ \
_type *out = malloc(sizeof *out * count); \
\
if (!out) { \
return NULL; \
} \
for (size_t i = 0; i < count; ++i) { \
out[i] = func(iter[i]); \
} \
return out; \
}
gen_map(c, char)
gen_map(uc, unsigned char)
gen_map(si, short int)
gen_map(usi, unsigned short int)
gen_map(i, int)
gen_map(ui, unsigned int)
gen_map(li, long int)
gen_map(uli, unsigned long int)
gen_map(lli, long long int)
gen_map(ulli, unsigned long long int)
gen_map(f, float)
gen_map(d, double)
gen_map(ld, long double)
gen_map(b, _Bool)
char **map_s(size_t count, char *iter[static count],
char *(*func)(char *s))
{
char **out = malloc(sizeof *out * count);
if (!out) {
return NULL;
}
for (size_t i = 0; i < count; ++i) {
out[i] = func(iter[i]);
}
return out;
}
#ifdef TEST_MAIN
#include <stdio.h>
#include <string.h>
static int square(int x)
{
/* This can overflow, but we don't care for now. */
return x * x;
}
static int cube(int x)
{
return x * x * x;
}
static char *rev(char *s)
{
if (!*s) {
/* So that it can be passed to free(). */
return strdup("");
}
const size_t len = strlen(s);
char *const t = malloc(len + 1);
if (!t) {
return NULL;
}
for (size_t i = 0, j = len - 1; s[i]; ++i, --j) {
t[i] = s[j];
}
t[len] = '0円';
return t;
}
int main(void)
{
int items[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
size_t const item_count = sizeof items / sizeof *items;
int *const squares = map(item_count, items, square);
int *const cubes = map(item_count, items, cube);
for (size_t i = 0; i < item_count; ++i) {
printf("Number: %d, Square: %d, Cube: %d.\n", items[i], squares[i],
cubes[i]);
}
putchar('\n');
free(squares);
free(cubes);
char *strs[] = { "", "a", "abc", "abcd", "abcde" };
const size_t strs_count = sizeof strs / sizeof *strs;
char **rev_strs = map(strs_count, strs, rev);
for (size_t i = 0; i < strs_count; ++i) {
printf("%s ----> %s\n", strs[i], rev_strs[i]);
free(rev_strs[i]);
}
free(rev_strs);
return EXIT_SUCCESS;
}
#endif /* TEST_MAIN */
This is what the preprocessor produced (after running gcc -E map.c -o map.i
, removing irrelevant code, and formatting):
static char *map_c(size_t len, const char iter[static len],
char (*func)(char x))
{
char *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static unsigned char *map_uc(size_t len, const unsigned char iter[static len],
unsigned char (*func)(unsigned char x))
{
unsigned char *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static short int *map_si(size_t len, const short int iter[static len],
short int (*func)(short int x))
{
short int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static unsigned short int *map_usi(size_t len,
const unsigned short int iter[static len],
unsigned short int (*func)(unsigned short int x))
{
unsigned short int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static int *map_i(size_t len, const int iter[static len], int (*func)(int x))
{
int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static unsigned int *map_ui(size_t len, const unsigned int iter[static len],
unsigned int (*func)(unsigned int x))
{
unsigned int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static long int *map_li(size_t len, const long int iter[static len],
long int (*func)(long int x))
{
long int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static unsigned long int *map_uli(size_t len,
const unsigned long int iter[static len],
unsigned long int (*func)(unsigned long int x))
{
unsigned long int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static long long int *map_lli(size_t len, const long long int iter[static len],
long long int (*func)(long long int x))
{
long long int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static unsigned long long int *map_ulli(size_t len,
const unsigned long long int iter[static len],
unsigned long long int (*func)(unsigned long long int x))
{
unsigned long long int *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static float *map_f(size_t len, const float iter[static len],
float (*func)(float x))
{
float *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static double *map_d(size_t len, const double iter[static len],
double (*func)(double x))
{
double *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static long double *map_ld(size_t len, const long double iter[static len],
long double (*func)(long double x))
{
long double *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static _Bool *map_b(size_t len, const _Bool iter[static len],
_Bool (*func)(_Bool x))
{
_Bool *out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
} return out;
}
static char **map_s(size_t len, char *iter[const static len],
char *(*func)(char *s))
{
char **out = malloc(sizeof *out * len);
if (!out) {
return ((void *) 0);
}
for (size_t i = 0; i < len; ++i) {
out[i] = func(iter[i]);
}
return out;
}
And here's the output of the test program:
$ make CPPFLAGS=-DTEST_MAIN CFLAGS="-Wall -Werror -Wpedantic" map
cc -Wall -Werror -Wpedantic -DTEST_MAIN map.c -o map
$ ./map
Number: 1, Square: 1, Cube: 1.
Number: 2, Square: 4, Cube: 8.
Number: 3, Square: 9, Cube: 27.
Number: 4, Square: 16, Cube: 64.
Number: 5, Square: 25, Cube: 125.
Number: 6, Square: 36, Cube: 216.
Number: 7, Square: 49, Cube: 343.
Number: 8, Square: 64, Cube: 512.
Number: 9, Square: 81, Cube: 729.
Number: 10, Square: 100, Cube: 1000.
---->
a ----> a
abc ----> cba
abcd ----> dcba
abcde ----> edcba
Review Goals:
- Is this foolproof?
- Can it be structured better?
- Is the behavior of the code defined?
- Have I missed any types? What should the
default
case be?
PS: Why do this? It served as a good exercise.
2 Answers 2
This is Inflexible
It always allocates an output buffer on the heap, which the caller must free()
. It cannot write to a buffer allocated by the caller. It cannot use a map function whose output type is different than the input type. It does not support user-defined types.
Be Aware of the Rules for Generic Types
The C17 Standard requires that, in a _Generic
expression,
No two generic associations in the same generic selection shall specify compatible types. The type of the controlling expression is the type of the expression as if it had undergone an lvalue conversion [....]
So, if you expand the list of types, you need to ensure that no two types are compatible.
A void*
Implementation is Surprisingly Good
Consider the following implementation, which uses a similar interface to qsort()
or bsearch()
:
#include <stddef.h>
typedef void (*map_func)(const void*, void*);
/* Fills the destination array dest with the image of the source array src
* under the map function f.
*
* The source array must contain at least n elements of size src_elem_size.
* The destination array must contain at least n elements of size dest_elem_size.
* Both arrays must be correctly-aligned, and must not overlap.
* Returns a pointer to the destination array.
*/
inline void* map_generic( const map_func f,
const size_t n,
const void* const src,
const size_t src_elem_size,
void* const dest,
const size_t dest_elem_size ) {
for (size_t i = 0; i < n; ++i) {
const char* const srcp = (char*)src + src_elem_size*i;
char* const destp = (char*)dest + dest_elem_size*i;
f( srcp, destp );
}
return dest;
}
(Here, I made the source argument come before the destination, but perhaps I should have followed the K&R convention of destination before source.)
Let’s test it with the following function that takes the square root of an unsigned int
argument: (Technically, I should have declared the arguments as void*
for maximum portability.)
#include <math.h>
inline void my_sqrt(const unsigned* const srcp, double* const destp ) {
*destp = sqrt((double)*srcp);
}
And the following test driver:
#include <stdio.h>
#include <stdlib.h>
#define ELEMS 101U
int main(void) {
unsigned xs[ELEMS];
double ys[ELEMS];
for (unsigned i = 0; i < ELEMS; ++i) {
xs[i] = i;
}
map_generic( (map_func)my_sqrt,
ELEMS,
xs,
sizeof(xs[0]),
ys,
sizeof(ys[0]));
for (unsigned i = 1; i < ELEMS; ++i) {
printf("%f\n", ys[i]);
}
return EXIT_SUCCESS;
}
GCC 13.2 with -std=c17 -march=x86-64-v3 -O3
compiles the main loop of map_generic
to:
.L8:
mov eax, DWORD PTR [rbp-1296+rbx*4]
vcvtsi2sd xmm0, xmm2, rax
vucomisd xmm1, xmm0
ja .L15
vsqrtsd xmm0, xmm0, xmm0
vmovsd QWORD PTR [rbp-864+rbx*8], xmm0
add rbx, 1
cmp rbx, 101
jne .L8
With -ffast-math
, it unrolls this loop into a series of statements like:
vsqrtpd ymm0, YMMWORD PTR .LC0[rip]
vmovapd YMMWORD PTR [rbp-880], ymm0
MSVC 19.38 with /std:c17 /arch:AVX2 /O2 /EHc /fp:fast
compiles it to:
$LL12@main:
vcvtdq2pd ymm2, XMMWORD PTR xs$[rsp+rbx*4]
vcmppd ymm1, ymm2, ymm5, 1
vandpd ymm0, ymm1, ymm4
vaddpd ymm2, ymm0, ymm2
vsqrtpd ymm3, ymm2
vmovupd YMMWORD PTR ys$[rsp+rbx*8], ymm3
add rbx, 4
cmp rbx, 100 ; 00000064H
jb SHORT $LL12@main
Both Clang 17.0.1 and ICX 202400 are able to optimize away the initialization of both xs
and ys
entirely. They set ys
to pre-calculated constants, like:
.LCPI0_0:
.quad 0x0000000000000000 # double 0
.quad 0x3ff0000000000000 # double 1
.quad 0x3ff6a09e667f3bcd # double 1.4142135623730951
.quad 0x3ffbb67ae8584caa # double 1.7320508075688772
-
\$\begingroup\$ "Re: So, if you expand the list of types, you need to ensure that no two types are compatible." ==> Does compatible mean that
long
shouldn't be atypedef
forint
? I did not understand this. \$\endgroup\$Madagascar– Madagascar2024年02月07日 06:59:10 +00:00Commented Feb 7, 2024 at 6:59 -
\$\begingroup\$ @Harith I believe
int
andlong
are supposed to be incompatible types, in this context. However,_Generic
is really only intended to make the implementation of<math.h>
more portable, so I don’t entirely trust it with integral types. I’d be especially wary ofchar
andshort int
getting widened toint
. \$\endgroup\$Davislor– Davislor2024年02月07日 14:55:30 +00:00Commented Feb 7, 2024 at 14:55 -
\$\begingroup\$ "Be Aware of the Rules for Generic Types" ==> After much playing around with
_Generic
and asking on StackOverflow, I found that this rule is not applicable here, as none of the types are compatible. However, ifsize_t
was present, andunsigned long long int
was also present, andsize_t
was atypedef
forunsigned long long
, then you'll get diagnostic messages and the program would likely fail to compile. It would also be okay if the types were only fixed-width integer types (int8_t
toint_64_t
). But in case ofint_leastn_t
/int_fastn_t
, several types in the_Generic
.. \$\endgroup\$Madagascar– Madagascar2024年06月07日 19:17:04 +00:00Commented Jun 7, 2024 at 19:17 -
\$\begingroup\$ ...association list might collide. Moreover,
_Generic
does not type-promote the first item in the association list (but puts it through "lvalue conversion" meaning it discards type qualifiers). Sochar
andshort int
would not implicitly get promoted toint
. Although if an expression was to it, that expression itself may contain implicit type promotions. For example_Generic((signed char){}, ...
will result in asigned char
but_Generic(+(signed char){}, ...
will result in anint
. \$\endgroup\$Madagascar– Madagascar2024年06月07日 19:17:17 +00:00Commented Jun 7, 2024 at 19:17
I'm concerned that this is too inflexible, since we can't use a function that operates on user-defined type.
I'd be inclined to stick with the style of interface that traditional C (without generics) uses, as demonstrated in qsort()
and bsearch()
, where we pass a function taking its input as const void*
and writing its output through a void*
(and, perhaps accesses any external state via a void*
).
If we did that, we could help programmers by providing a macro to easily generate an appropriate wrapper function from a strongly-typed function that returns by value:
// untested
#define wrap_for_map(func, name, outtype, intype) \
void name(void *dst, const void *src, void*) \
{ \
outtype *out = dst; \
const intype *in = src; \
*out = func(*in); \
}
#define wrap_for_map_x(func, name, outtype, intype, statetype) \
void name(void *dst, const void *src, void *data) \
{ \
outtype *out = dst; \
const intype *in = src; \
statetype *state = data; \
*out = func(*in, state); \
}
signed char
in thegeneric
list? \$\endgroup\$char
must be exactly equivalent tosigned char
, similar to howint
is exactly equivalent tosigned int
, which turned out to be wrong of course. Should I have 3 different cases forchar
,signed char
, andunsigned char
? \$\endgroup\$generic
, I recommend to include the 3char
types. \$\endgroup\$