Ever wanted a strictly conformant echo(1)
implementation on your system? Wait no more: I've built it!
What is it?
My first large-ish body of C code in a while.
The code provides implementations of all of the following echo(1)
descriptions from the Open Group POSIX specification.
In summary:
- The echo utility writes its arguments to standard output, followed by a newline. If there are no arguments, only the newline is written.
- POSIX admits no options or escape characters
- BSD admits
-n
to suppress newlines - XSI admits the following table of escape characters
\a
Write an alert.\b
Write a backspace.\c
Suppress the newline that otherwise follows the final argument in the output. All characters following the '\c
' in the arguments shall be ignored.\f
Write a form-feed.\n
Write a newline.\r
Write a carriage-return.\t
Write a tab.\v
Write a vertical-tab.\\
Write a backslash character.0円num
Write an 8-bit value that is the zero, one, two, or three-digit octal numbernum
.
A good review might...
- Address the lookup-table nature of escape-character processing, and suggest a more compact (but still readable) system
- Address the repetition in the escape-character processing (
++str
,shift_left_one_char
, &c.)
Code organization
The code is broken down into roughly three (3) sections:
- Argument-echoing, with and without newline (
echo.[ch]
) - Escape-character processing (
escape.[ch]
) - Front-end implementations (
posix.c
,bsd.c
,xsi.c
,sysv.c
is a link toxsi.c
)
Build with make(1)
if you clone from the link.
Known failings
- No account for environment variables, e.g.,
LANG
andLC_*
- Limited error checking
- No make(1) support for installation, out-of-source building, documentation
- No tests
- No documentation
Code
echo.h
#ifndef ECHO_H
#define ECHO_H
int echo(int argc, char **argv);
int echo_n(int argc, char **argv);
#endif
echo.c
#include "echo.h"
#include <stdio.h>
int print_args(int argc, char **argv) {
if (argc == 0) return 0;
int err = 0;
for (int i = 0; i < argc-1; ++i) {
err = printf("%s ", argv[i]);
if (err < 0) return err;
}
err = printf("%s", argv[argc-1]);
if (err < 0) return err;
return 0;
}
int echo(int argc, char **argv) {
int err = print_args(argc, argv);
if (err < 0) return err;
err = printf("\n");
if (err < 0) return err;
return 0;
}
int echo_n(int argc, char **argv) {
return print_args(argc, argv);
}
escape.h
#ifndef ESCAPE_H
#define ESCAPE_H
#include <stdbool.h>
int interpret_escapes(int argc, char **argv, bool *suppress_newline);
#endif
escape.c
#include "escape.h"
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
void shift_left_one_char(char *str) {
for (int i = 0; str[i] != '0円'; ++i) {
str[i] = str[i+1];
}
}
#define ALERT '\a'
#define BACKSPACE '\b'
#define FORMFEED '\f'
#define NEWLINE '\n'
#define CARRIAGE_RETURN '\r'
#define TAB '\t'
#define VTAB '\v'
#define BACKSLASH '\\'
#define MAX_OCTAL 3
#define START 0
#define ESC_CHAR_POS 1
#define ESC_MIN_LEN 2
int escape(char *str, bool *suppress_newline) {
int escapes_handled = 0;
while ((str = strchr(str, BACKSLASH)) != NULL) {
if (strnlen(str, ESC_MIN_LEN) < ESC_MIN_LEN) break;
int num_digits = 0;
int octal = 0;
switch (str[ESC_CHAR_POS]) {
case 'a':
str[START] = ALERT;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 'b':
str[START] = BACKSPACE;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 'c':
*suppress_newline = true;
// clear out this arg
str[START] = '0円';
++escapes_handled;
break;
case 'f':
str[START] = FORMFEED;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 'n':
str[START] = NEWLINE;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 'r':
str[START] = CARRIAGE_RETURN;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 't':
str[START] = TAB;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case 'v':
str[START] = VTAB;
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case '\\':
// unnecessary: already a backslash at str[START]
/* str[START] = BACKSLASH; */
++str;
shift_left_one_char(str);
++escapes_handled;
break;
case '0':
while (isdigit(str[ESC_CHAR_POS + num_digits])) {
octal *= 8;
switch(str[ESC_CHAR_POS + num_digits]) {
case '0': octal += 0; break;
case '1': octal += 1; break;
case '2': octal += 2; break;
case '3': octal += 3; break;
case '4': octal += 4; break;
case '5': octal += 5; break;
case '6': octal += 6; break;
case '7': octal += 7; break;
case '8': octal += 8; break;
case '9': octal += 9; break;
}
if (num_digits == MAX_OCTAL) break;
++num_digits;
}
str[START] = (char)octal;
++str;
for (int i = 0; i < ESC_CHAR_POS + num_digits; ++i)
{ shift_left_one_char(str); }
++escapes_handled;
break;
default:
break;
}
}
return escapes_handled;
}
int interpret_escapes(int argc, char **argv, bool *suppress_newline) {
*suppress_newline = false;
for (int i = 0; i < argc; ++i) {
int err = escape(argv[i], suppress_newline);
if (err < 0) return err;
if (*suppress_newline) return i+1;
}
return argc;
}
posix.c
#include "echo.h"
#include <stdlib.h>
int main(int argc, char** argv) {
int err = echo(argc-1, &argv[1]);
if (err < 0) return EXIT_FAILURE;
return 0;
}
bsd.c
#include "echo.h"
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv) {
int err;
if (argc > 1 && strcmp(argv[1], "-n") == 0) {
// swallow the -n
err = echo_n(argc-2, &argv[2]);
} else {
// still print newline
err = echo(argc-1, &argv[1]);
}
if (err < 0) return EXIT_FAILURE;
return 0;
}
xsi.c (sysv.c links to this)
#include "echo.h"
#include "escape.h"
#include <stdlib.h>
#include <stdbool.h>
int main(int argc, char** argv) {
char **args = &argv[1];
bool suppress_newline;
int actual_argc = interpret_escapes(argc-1, args, &suppress_newline);
if (actual_argc < 0) return EXIT_FAILURE;
int err;
if (suppress_newline) {
err = echo_n(actual_argc, args);
} else {
err = echo(actual_argc, args);
}
if (err < 0) return EXIT_FAILURE;
return 0;
}
1 Answer 1
Danger!
while (isdigit(str[ESC_CHAR_POS + num_digits])) {
Promoting char
to int
(as argument to isidigt()
) can sign-extend it. We have to launder the argument through unsigned char
on the way:
while (isdigit((unsigned char)(str[ESC_CHAR_POS + num_digits]))) {
We can make this a bit more readable with a function:
static bool char_isdigit(char c) { return isdigit((unsigned char)c); }
Several functions ought to have static
linkage: print_args()
, shift_left_one_char()
and escape()
.
escape()
repeatedly calls shift_left_one_char()
to modify the string in place. This is very inefficient with long arguments, as we're repeatedly accessing the rightmost part of the string (and not even using memmove()
for this, though a good compiler might spot the pattern).
A much more efficient strategy is to treat the arguments as read-only strings - print the literal text directly, and output the escapes as they are interpreted. To get different escape-character behaviours, we would pass a function pointer to the interpreter.
When processing \c
escapes, there's no need to continue reading the rest of the input string - we can return immediately after terminating the output.
When reading octal escapes, we can make the num_digits
test part of the loop condition instead of using break
, and we can avoid the inner switch
by using C's guarantee that the digits 0
..9
must have consecutive character values:
while (isdigit(str[ESC_CHAR_POS + num_digits]) && num_digits++ <= MAX_OCTAL) {
octal *= 8;
octal += str[ESC_CHAR_POS + num_digits] - '0';
}
When we read an octal number zero, we shouldn't truncate the string at that point. We should output a literal NUL
character and continue.
We could change the return value from echo()
and echo_n()
to be simply EXIT_SUCCESS
or EXIT_FAILURE
. That would simplify the frontend programs - they can simply
return echo_n(actual_argc, args);
instead of
int err; err = echo_n(actual_argc, args); if (err < 0) return EXIT_FAILURE; return 0;
-
\$\begingroup\$ Great comments, thanks; suggestions on how to output the NUL and not truncate the string? Placing a null into the string is clearly what truncates it, but im not sure if its possible to place an « escaped null » into the string that, eg, printf wont choke on. Can you clarify several functions ought to have static linkage? \$\endgroup\$D. Ben Knoble– D. Ben Knoble2019年08月27日 13:27:49 +00:00Commented Aug 27, 2019 at 13:27
-
\$\begingroup\$ Two ways to output the NUL. First and most obvious is to output as you go, as suggested elsewhere in this answer. The other (if you want to retain the transform-in-place method) is to return the new length to the caller, and use
fwrite()
instead offprintf()
orfputs()
. \$\endgroup\$Toby Speight– Toby Speight2019年08月27日 13:53:34 +00:00Commented Aug 27, 2019 at 13:53 -
1\$\begingroup\$ Static linkage makes the functions visible only to code in the same translation unit, and not to code in other TUs at link time. So declaring those functions
static
allows us to freely use those names in other TUs without fear of conflict. Not a big deal in a small program like this, but it's good hygiene that can avoid surprises in big codebases. \$\endgroup\$Toby Speight– Toby Speight2019年08月27日 13:55:29 +00:00Commented Aug 27, 2019 at 13:55 -
\$\begingroup\$ The problem with
fwrite
as I see it is that each item must be the same length (and for echo that's just not guaranteeable). That said, I like the transformation approach, as it's clearer. I will have to think on this \$\endgroup\$D. Ben Knoble– D. Ben Knoble2019年08月27日 14:04:45 +00:00Commented Aug 27, 2019 at 14:04 -
1\$\begingroup\$
fwrite()
does require objects of all the same length, but those objects can bechar
objects (i.e. pass 1 for thesize
argument, and length of string for thecount
). \$\endgroup\$Toby Speight– Toby Speight2019年08月27日 15:29:46 +00:00Commented Aug 27, 2019 at 15:29
Explore related questions
See similar questions with these tags.