I wanted to understand how base64 encoding (and decoding) works so I implemented this tool in the spirit of "classic UNIX tools" (read from stdin, write to stdout).
I'd like to get general feedback on style and implementation (hoping I got it right). Also, since I'm doing bit manipulation, should I worry about endianness?
b64.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define USAGE "usage: b64 [-d]\n" \
" base64 encode/decode standard input to standard output\n"
static void die(const char *reason);
static void encode(void);
static void decode(void);
static int getcharskipn(void);
static int isvalid(int c);
static char enctable[] =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/',
};
static int dectable[] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
};
int main(int argc, char **argv)
{
if (argc == 1)
encode();
else if (argc == 2 && strcmp(argv[1], "-d") == 0)
decode();
else
die(USAGE);
return 0;
}
static void die(const char *reason)
{
fprintf(stderr, reason);
exit(EXIT_FAILURE);
}
static void encode(void)
{
int b1, b2, b3;
unsigned long g; /* group of 4 6-bit indices for enctable built using 3 input bytes */
while ((b1 = getchar()) != EOF) {
b2 = getchar();
b3 = getchar();
g = b1;
g = (g << 8) | (b2 == EOF ? 0 : b2);
g = (g << 8) | (b3 == EOF ? 0 : b3);
putchar(enctable[(g >> 18) & 0x3F]);
putchar(enctable[(g >> 12) & 0x3F]);
putchar(b2 == EOF ? '=' : enctable[(g >> 6) & 0x3F]);
putchar(b3 == EOF ? '=' : enctable[g & 0x3F]);
}
}
static void decode(void)
{
int c1, c2, c3, c4;
unsigned long g; /* group of 3 bytes built using dectable indexed by 4 input characters */
while ((c1 = getcharskipn()) != EOF) {
c2 = getcharskipn();
c3 = getcharskipn();
c4 = getcharskipn();
if ( ! isvalid(c1) || c1 == '='
|| ! isvalid(c2) || c2 == '='
|| ! isvalid(c3)
|| ! isvalid(c4))
die("b64: invalid input\n");
g = dectable[c1];
g = (g << 6) | dectable[c2];
g = (g << 6) | (c3 == '=' ? 0 : dectable[c3]);
g = (g << 6) | (c4 == '=' ? 0 : dectable[c4]);
putchar((g >> 16) & 0xFF);
if (c3 != '=')
putchar((g >> 8) & 0xFF);
if (c4 != '=')
putchar(g & 0xFF);
}
}
static int getcharskipn(void)
{
int c;
if ((c = getchar()) == '\n')
return getchar();
if (c == '\r') {
if ((c = getchar()) == '\n')
return getchar();
ungetc(c, stdin);
return '\r';
}
return c;
}
static int isvalid(int c)
{
return (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9')
|| c == '+' || c == '/' || c == '=';
}
b64.test
#!/bin/sh
test_str()
{
printf "%s" "1ドル" > original
printf "%s" "2ドル" > expected
./b64 < original > enc
diff enc expected || exit 1
./b64 -d < enc > dec
diff dec original || exit 1
}
test_rnd()
{
head -c "1ドル" /dev/urandom > rnd
./b64 < rnd > enc
./b64 -d < enc > dec
diff dec rnd || exit 1
}
cleanup()
{
rm original expected rnd dec enc
}
test_str "" ""
test_str "f" "Zg=="
test_str "fo" "Zm8="
test_str "foo" "Zm9v"
test_str "foob" "Zm9vYg=="
test_str "fooba" "Zm9vYmE="
test_str "foobar" "Zm9vYmFy"
test_str "foobarb" "Zm9vYmFyYg=="
test_str "foobarba" "Zm9vYmFyYmE="
test_str "foobarbaz" "Zm9vYmFyYmF6"
for i in `seq 1000 1024`; do test_rnd $i; done
cleanup
echo "all tests passed"
Makefile
.POSIX:
CC := cc
CFLAGS := -std=c89 -pedantic -Wall -Wextra -Werror
PREFIX := /usr/local
all: b64.debug
b64.debug: b64.c
$(CC) $(CFLAGS) -g -DDEBUG $^ -o $@
b64: b64.c
$(CC) $(CFLAGS) -DNDEBUG $^ -o $@
test: b64
sh b64.test
install: b64
mkdir -p $(DESTDIR)$(PREFIX)/bin
cp b64 $(DESTDIR)$(PREFIX)/bin
uninstall:
rm $(DESTDIR)$(PREFIX)/bin/b64
clean:
rm -f b64 b64.debug
1 Answer 1
All the code looks like you are very experienced since you didn't make any obvious mistakes.
Some small things to consider:
I'd compile the release binary with assertions enabled since I prefer an obvious crash over undefined behavior.
Since you don't include
<assert.h>
at all, you don't need the-DNDEBUG
flags at all since they won't make any difference.The headers from the C standard library should be sorted alphabetically.
The function name
isvalid
is reserved for future versions of the C standard library, though I don't think that name will ever be taken. The nameisvalid
is way too unspecific to land in the standard library. In the narrow scope of a base64 encoder/decoder, the name is perfect.Your decision to have 18 table entries per line looks a bit arbitrary to me. I'd select 16 since that's how the code points in ASCII are arranged.
The decoding table assumes that the execution character set is ASCII. Try running this program on an IBM machine. :)
Since you already use the
const
keyword, it makes sense to use it forenctable
anddectable
as well.At the very end of the program, you could check
stdin
andstdout
for I/O errors and in such a case returnEXIT_FAILURE
.Having a test suite with even fuzzing included makes the code trustworthy. :)
The Makefile even works on ancient Solaris where
/bin/sh
does not even know about functions. In such a situation, one can just setPATH
before running make and thereby provide a sane shell.Thank you for including
DESTDIR
in the Makefile. :)For installing the program, you should not use
cp
:It will overwrite the file in-place, which leads to problems if the program is still running while being overwritten.
It doesn't overwrite write-protected files. Use
install -m 555 b64 $(DESTDIR)$(PREFIX)/bin/
instead.
In my mind the program is ready to be used and packaged. You might write a manual page to make the distribution package complete.
-
\$\begingroup\$ thank you for your feedback! I didn't know about
install
, I definitely going to use it from now on =) To fix the "IBM machine" problem I think I could builddectable
at runtime starting fromenctable
and maybe makeisvalid
dependent ondectable
since I suspect thatisvalid
wouldn't work either on non-ascii machine. Does it make sense to you? \$\endgroup\$MarcoLucidi– MarcoLucidi2019年11月10日 11:44:27 +00:00Commented Nov 10, 2019 at 11:44 -
1\$\begingroup\$ Sounds perfect. \$\endgroup\$Roland Illig– Roland Illig2019年11月10日 14:40:04 +00:00Commented Nov 10, 2019 at 14:40
uudecode
. \$\endgroup\$