I am working through Kernighan and Ritchie to brush up on my C skills, so I thought I would invite critiques of my code.
Here is my Hex-to-int function:
int htoi(char *str, unsigned long long *result)
{
int c;
int i;
int msnibble;
int digits;
const int nibbles[] = {'\x0','\x1','\x2','\x3', \
'\x4','\x5','\x6','\x7', \
'\x8','\x9','\xA','\xB', \
'\xC','\xD','\xE','\xF'};
if (NULL == result) {
printf("Invalid pointer\n");
return -1;
}
if ('x' == str[1] || 'X' == str[1]) {
msnibble = 2;
digits = strlen(str) - 2;
} else {
msnibble = 0;
digits = strlen(str);
}
if (digits > 16) {
printf("Too many hex digits\n");
return -1;
}
for (i=0, *result = 0; '0円' != (c = str[i+msnibble]); i++) {
if ( c >= 'a' && c <= 'f') {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') {
c -= '0';
} else {
printf("Non Hex Character Detected; Exiting\n");
return -1;
}
*result = *result << 4 | nibbles[c];
}
return 0;
}
I also tried a solution using the math library. Using pow, I would get correct output until I passed 0xffffffffffffffff and then it would be one greater than the correct answer. This is why I settled on using the array of nibbles.
Edit 1:
int htoi(const char *str, unsigned long long *result)
{
int c;
int i;
int msnibble;
int digits;
if (NULL == result) {
printf("Invalid pointer\n");
return -1;
}
if ('0' == str[0]) {
if ('x' == str[1] || 'X' == str[1]) {
msnibble = 2;
digits = strlen(str) - 2;
} else {
msnibble = 0;
digits = strlen(str);
}
}
if (digits > 16) {
printf("Too many hex digits\n");
return -1;
} else if (0 == digits)
return -1;
for (i=0, *result = 0; '0円' != (c = str[i+msnibble]); i++) {
if ( c >= 'a' && c <= 'f') {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') {
c -= '0';
} else {
printf("Non hex character detected; Exiting\n");
return -1;
}
*result = *result << 4 | c;
}
return 0;
}
Edit 2:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
enum status { NOINPUT = -1,
INVPTR = -2,
TOOBIG = -3,
NOHEX = -4,
NONHEX = -5};
int htoi(const char *str, unsigned long long *result);
int main(int argc, char *argv[])
{
unsigned long long result;
if (argc <= 1) {
printf("No input!\n");
return EXIT_FAILURE;
}
if (0 > htoi(argv[1],&result))
return EXIT_FAILURE;
else
printf("Value = %llu, strtol says %lu\n", result, strtol(argv[1],NULL,16));
return EXIT_SUCCESS;
}
int htoi(const char *str, unsigned long long *result)
{
int c;
int i;
int msnibble = 0;
int digits = 0;
if (NULL == result) {
printf("Invalid pointer\n");
return INVPTR;
}
if ('0' == str[0] && 'x' == str[1] || 'X' == str[1]) {
msnibble = 2;
digits = strlen(str) - 2;
} else {
msnibble = 0;
digits = strlen(str);
}
if (digits > 16) {
printf("Too many hex digits\n");
return TOOBIG;
} else if (0 == digits) {
printf("Nothing to do\n");
return NOHEX;
}
for (i=0, *result = 0; '0円' != (c = str[i+msnibble]); i++) {
if ( c >= 'a' && c <= 'f')
c = c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
c = c - 'A' + 10;
else if (c >= '0' && c <= '9')
c -= '0';
else {
printf("Non hex character detected; Exiting\n");
return NONHEX;
}
*result = *result << 4 | c;
}
return EXIT_SUCCESS;
}
Edit 3:
I decided to simplify the error reporting and get rid of the printf
s. I also fixed the precedence error. I am omitting the braces on the if
s because I am following this. I am trying to follow the kernel style. Next time, I will include that with my post. I apologize for any confusion. I also got rid of i
and msnibble
as the pointer arithmetic is much simpler.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int htoi(const char *str, unsigned long *result);
int main(int argc, char *argv[])
{
unsigned long result;
if (argc <= 1) {
printf("No input!\n");
return EXIT_FAILURE;
}
if (0 > htoi(argv[1],&result))
return EXIT_FAILURE;
else
printf("Value = %llu, strtol says %lu\n", result, strtol(argv[1],NULL,16));
return EXIT_SUCCESS;
}
int htoi(const char *str, unsigned long *result)
{
int c;
int digits;
if ('0' == str[0] && ('x' == str[1] || 'X' == str[1]))
str += 2;
digits = strlen(str);
if (digits > sizeof(long)*2 || 0 == digits)
return -1;
for (*result = 0; '0円' != (c = *str); str++) {
if ( c >= 'a' && c <= 'f')
c = c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
c = c - 'A' + 10;
else if (c >= '0' && c <= '9')
c -= '0';
else
return -1;
*result = *result << 4 | c;
}
return 0;
}
Edit 4:
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
int htoi(const char *s, unsigned long *res);
int main(int argc, char *argv[])
{
unsigned long res;
if (argc <= 1) {
printf("No input!\n");
return EXIT_FAILURE;
}
if (0 > htoi(argv[1],&res))
return EXIT_FAILURE;
else
printf("Value = %llu, strtol says %lu\n", res, strtol(argv[1],NULL,16));
return EXIT_SUCCESS;
}
int htoi(const char *s, unsigned long *res)
{
int n;
int digits;
if ('0' == s[0] && ('x' == s[1] || 'X' == s[1]))
s += 2;
for (*res = 0; '0円' != *s; s++) {
n = 0;
if ( *s >= 'a' && *s <= 'f') {
n = *s - 'a' + 10;
} else if (*s >= 'A' && *s <= 'F') {
n = *s - 'A' + 10;
} else if (*s >= '0' && *s <= '9') {
n = *s - '0';
} else {
errno = EINVAL;
return -1;
}
if (*res > (ULONG_MAX/16)) {
errno = ERANGE;
return -1;
}
*res *= 16;
*res += (unsigned long) n;
}
return 0;
}
Edit 5:
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
int htoi(const char *s, unsigned long *res);
int main(int argc, char *argv[])
{
unsigned long res;
if (argc <= 1) {
printf("No input!\n");
return EXIT_FAILURE;
}
if (0 > htoi(argv[1],&res))
return EXIT_FAILURE;
else
printf("Value = %llu, strtol says %lu\n", res, strtol(argv[1],NULL,16));
return EXIT_SUCCESS;
}
int htoi(const char *s, unsigned long *res)
{
if ('0' == s[0] && ('x' == s[1] || 'X' == s[1]))
s += 2;
int c;
unsigned long rc;
for (rc = 0; '0円' != (c = *s); s++) {
if ( c >= 'a' && c <= 'f') {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') {
c = c - '0';
} else {
errno = EINVAL;
return -1;
}
if (rc > (ULONG_MAX/16)) {
errno = ERANGE;
return -1;
}
rc *= 16;
rc += (unsigned long) c;
}
*res = rc;
return 0;
}
7 Answers 7
Your indentation is messy (non-standard and not uniform).
If the 2nd character is 'x' or'X' you don't also assert that the 1st character is '0'.
You don't signal an error if the input string has zero length.
I doubt you need \
at the end of the lines of code which initialize nibbles.
The nibbles array is a waste of time and space, because nibbles[c]
has the same value as c
(so you might as well write *result = *result << 4 | c;
).
The input parameter should be const (const char*
).
printf writes to stdout. If you want to log the cause of an error you should probably write to stderr instead.
Aside from that, congratulations: it works!
-
\$\begingroup\$ @Steve I added a line about printf and stderr. \$\endgroup\$ChrisW– ChrisW2014年02月27日 23:22:07 +00:00Commented Feb 27, 2014 at 23:22
I think you should be returning different error values for different errors. You're returning -1
for an invalid pointer, too many hex digits, no hex digits, and a non-hex character detected.
Instead, choose separate error values for each of these (and any additional error types) and return the appropriate one. These can all be negative values, and you could still use 1
.
Once you've chosen some error values, you could have an enum
so that you can return a "more specific" type rather than just a number.
Here's just a rough example, but you may end up with something different:
typedef enum { INVALID_PTR = -1, EXCESSIVE_HEX = -2, /* ... */ } Error;
To demonstrate with one of the error conditions:
if (NULL == result) {
printf("Invalid pointer\n");
return INVALID_PTR;
}
However, if you have just one or two error types, you may not need this. You could just return the appropriate hard-coded error value.
In either case, to make sure the reader is clear on your error-handling, you should then provide some documentation (such as comments) mentioning which error value corresponds with which error type. Those printed error statements are good for user feedback, but won't be enough if you need to deal with testing and maintainability on the code itself.
Reviewing your Edit 2...
Precedence bug
This does not do what you want, because &&
has higher precedence than ||
:
if ('0' == str[0] && 'x' == str[1] || 'X' == str[1])
Very few people know all 17 levels of precedence of C operators by heart. I don't, either. Just make it a habit to use parentheses generously.
Error handling
A function that performs computation shouldn't also print output, since that would be an unexpected side effect that hurts code reusability. If you must print something, print to stderr
to avoid contaminating stdout
.
You defined an enum status
, but your htoi()
function just returns an int
. Either return an enum status
, or just #define
your status codes without bothering with the enum. If you do use an enum, make sure it is complete — you omitted SUCCESS
(which I would prefer over EXIT_SUCCESS
, since you aren't exit()
ing the program). Also, you never use NOINPUT
.
In htoi()
, you check for a null result
. To be consistent, why don't you also check for a null str
? (In C, it's customary to assume that the caller isn't stupid enough to pass you a null pointer, and dispense with such checks.)
If I were writing htoi()
, I would just keep things simple and have just return a success/failure indication without being more specific. In practice, checking for individual error codes is probably more trouble than it's worth. (That means I disagree with @Jamal's advice.)
Braces
Omitting braces with your if-elses is a filthy habit. Saving those few characters of source code just isn't worth the danger!
If you really feel compelled to omit the braces, then also put the statement on the same line:
if (c >= 'a' && c <= 'f') c = c - 'a' + 10;
else if (c >= 'A' && c <= 'F') c = c - 'A' + 10;
else if (c >= '0' && c <= '9') c -= '0';
else /* Non hex character */ return NONHEX;
Iteration
You could simplify your code a lot by just incrementing the str
pointer.
enum status htoi(const char *str, unsigned long long *result)
{
int c;
int digits;
if (NULL == result) {
/* Invalid pointer */
return INVPTR;
}
*result = 0;
if (NULL == str) {
result INVPTR;
}
if ('0' == str[0] && ('x' == str[1] || 'X' == str[1])) {
str += 2;
}
digits = strlen(str);
if (digits > 16) {
/* Too many hex digits */
return TOOBIG;
} else if (0 == digits) {
/* Nothing to do */
return NOHEX;
}
for (; '0円' != (c = *str); str++) {
if (c >= 'a' && c <= 'f') {
c = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
c = c - 'A' + 10;
} else if (c >= '0' && c <= '9') {
c -= '0';
} else {
/* Non hex character detected */
return NONHEX;
}
*result = *result << 4 | c;
}
return SUCCESS;
}
Your code looks nice and is easily readable. However I have a few problems with it.
I disagree with your error return codes. This might be possible in one
function, but if all functions were to do this it would become unmanageable.
It is also not the C way, which is more along the lines of strtoul
(which you are approximating with htoi
), which returns an error (NULL in
its case) and sets errno
to ERANGE or EINVAL.
I would also not check for NULL pointers, as again it is not normal. And lastly, I dislike your method of range checking. Your function would balk for example at 0x00000000000000001 (16 leading zeroes) but it is quite valid. A better way is to check against ULONG_MAX before shifting left.
Here is my version, for what it is worth:
int htoi(const char *s, unsigned long *result)
{
if ((*s == '0') && (s[1] == 'x' || s[1] == 'X')) {
s += 2;
}
for (*result = 0; *s; ++s) {
int n = 0;
if (*s >= 'a' && *s <= 'f') {
n = *s - 'a' + 10;
} else if (*s >= 'A' && *s <= 'F') {
n = *s - 'A' + 10;
} else if (*s >= '0' && *s <= '9') {
n = *s - '0';
} else {
errno = EINVAL;
return -1;
}
if (*result > (ULONG_MAX/16)) {
errno = ERANGE;
return -1;
}
*result *= 16;
*result += (unsigned long) n;
}
return 0;
}
-
\$\begingroup\$ Great suggestions; I implemented them. Although, I wonder if it would be better to assign a variable c with the char value from the string, in case I forget to dereference the pointer. This is an exercise from K&R. Posting it here made it an even better exercise. Thanks! \$\endgroup\$user37718– user377182014年02月28日 20:32:23 +00:00Commented Feb 28, 2014 at 20:32
Your 2nd version is incorrect (doesn't work): because it doesn't initialize msnibble
and digits
if ('0' != str[0])
.
For this kind of reason it's dangerous to declare local variables before you assign a value to them.
Reviewing your 4th edit:
- It's much better (more compact, less to go wrong)
digits
is unused- You could delay declaring 'n' until later, when you first assign a value to it
- The code would be slightly faster if you didn't reference pointers (i.e.
*res
and*s
) so often and used a local variable (i.e.rc
andn
) instead - If you return an error (e.g. from meeting an invalid digit) it might be better to do so without having altered (left a partially calculated value in) the caller's
*res
- This version doesn't return an error if you pass in a zero-length string
For example:
int htoi(const char *s, unsigned long *res)
{
if ('0' == s[0] && ('x' == s[1] || 'X' == s[1])) {
s += 2;
}
if ('0円' == *s) {
errno = NOHEX;
return -1;
}
unsigned long rc;
int n;
for (rc = 0; '0円' != (n = *s); s++) {
if ( n >= 'a' && n <= 'f') {
n = n - 'a' + 10;
} else if (n >= 'A' && n <= 'F') {
n = n - 'A' + 10;
} else if (n >= '0' && n <= '9') {
n = n - '0';
} else {
errno = EINVAL;
return -1;
}
if (rc > (ULONG_MAX/16)) {
errno = ERANGE;
return -1;
}
rc *= 16;
rc += (unsigned long) n;
}
*res = rc;
return 0;
}
Also, using long long is unnecessary since the bit wise operators only operate on integral types; I replaced them with longs. In addition, I changed
if (digits > 16)
with
if (digits > sizeof(long)*2)
strtol
can accomplish this. \$\endgroup\$-
in front of the macros. Also be sure to define theenum
somewhere here, otherwise the above code won't compile. You should also useEXIT_SUCCESS
instead, which is part of<stdlib.h>
. \$\endgroup\$