6
\$\begingroup\$

I've created this little library to help ease myself (and for others, hopefully) with the pain of having to convert integers to string and vice-versa. It's written in C, and for maximum portability I decided to try and stick to C89 (no booleans yet by that time I believe). I'd like to ask you to give some of your comments and your recommendations, especially on the performance side.

Please note that when I was writing this code, my focus was on readability, self-documenting code and safety. Please also note that my naming convention varies very much from the C naming conventions that I'm sure a lot of you are used to seeing; e.g. abbreviated words instead of the complete words like "recv" for "receive", "u" for "unsigned", etc so if I confuse you with my naming convention then I hope you understand that readability was one of my primary concerns.

For some of the code which I thought some people might find unclear, I've added some comments in hopes of clearing up any confusion.

#include <stdio.h>
#include <stdlib.h>
/*CONSTANTS*/
 /* ERROR MESSAGES */
 const char *invalidInputError = "convert.c: Unable to convert the input string into a valid number. Offending character has an ASCII value of %d. Valid ASCII values are from 48 to 57 (inclusive) and the null terminator '0円' at the end of the input string.";
 const char *outOfBoundariesError = "convert.c: Value was outside of boundaries. Minimum value is 0 and maximum value is 4,294,967,295. Value was %ul";
 /* BOUNDARIES */
 #define CONVERT_UNSIGNED_INT_MAX 4294967295
 #define CONVERT_SIGNED_INT_MIN -2147483648
 #define CONVERT_SIGNED_INT_MAX 2147483647
 #define CONVERT_UNSIGNED_MIN 0
 #define CONVERT_ASCII_ZERO 48
 #define CONVERT_ASCII_NINE 57
unsigned int FromStringToUnsignedInteger(char inputString[])
{
 unsigned long long boundariesChecker;
 unsigned int returnResult; returnResult = 0;
 int counter; counter = 0;
 // if the ASCII value is not between 48 and 57 inclusive, that's an error, except for '0円', which is the null terminator for C.
 while (inputString[counter] != '0円')
 {
 // check that the input is valid.
 if (inputString[counter] >= CONVERT_ASCII_ZERO && inputString[counter] <= CONVERT_ASCII_NINE)
 {
 // 48 is ASCII for 0, 49 for 1, 50 for 2, ..., 57 for 9.
 returnResult = (returnResult * 10) + (inputString[counter] - 48);
 boundariesChecker = returnResult;
 // check for overflow by getting the max of an unsigned integer (4,294,967,295)
 if (boundariesChecker > CONVERT_UNSIGNED_INT_MAX || boundariesChecker < CONVERT_UNSIGNED_MIN)
 {
 fprintf(stderr, outOfBoundariesError, returnResult);
 return 0;
 }
 counter++;
 }
 else
 {
 fprintf(stderr, invalidInputError, inputString[counter]);
 return 0;
 }
 }
 return returnResult;
}
int FromStringToInteger(char inputString[])
{
 long long boundariesChecker;
 int returnResult; returnResult = 0;
 int counter;
 int isNegative = 0;
 // if it's a negative number skip the minus sign and set the flag.
 if (inputString[0] == '-')
 {
 counter = 1;
 isNegative = 1;
 }
 else
 {
 counter = 0;
 isNegative = 0;
 }
 // if the ASCII value is not between 48 and 57 inclusive, that's an error, except for '0円', which is the null terminator for C.
 while (inputString[counter] != '0円')
 {
 // check that the input is valid.
 if (inputString[counter] >= CONVERT_ASCII_ZERO && inputString[counter] <= CONVERT_ASCII_NINE)
 {
 // 48 is ASCII for 0, 49 for 1, 50 for 2, ..., 57 for 9.
 returnResult = (returnResult * 10) + (inputString[counter] - 48);
 boundariesChecker = returnResult;
 // check for overflow by getting the max of an unsigned integer (4,294,967,295)
 if (boundariesChecker > CONVERT_SIGNED_INT_MAX || boundariesChecker < CONVERT_SIGNED_INT_MIN)
 {
 fprintf(stderr, outOfBoundariesError, returnResult);
 return 0;
 }
 counter++;
 }
 else
 {
 fprintf(stderr, invalidInputError, inputString[counter]);
 return 0;
 }
 }
 if (isNegative)
 {
 returnResult = returnResult * -1;
 }
 return returnResult;
}
char *FromUnsignedIntegerToString(unsigned int inputUnsignedInt) {
 // check for out of range
 if (inputUnsignedInt > CONVERT_UNSIGNED_INT_MAX || inputUnsignedInt < CONVERT_UNSIGNED_MIN)
 {
 fprintf(stderr, outOfBoundariesError, inputUnsignedInt);
 return NULL;
 }
 // 10 is the multiplier because there's only 10 chars in the max value of an unsigned int.
 char *convertedInputUnsignedInt = malloc(10 * sizeof(char));
 sprintf(convertedInputUnsignedInt, "%u", inputUnsignedInt);
 return convertedInputUnsignedInt;
}
char *FromIntegerToString(int inputInt) {
 // check for out of range
 if (inputInt > CONVERT_SIGNED_INT_MAX || inputInt < CONVERT_SIGNED_INT_MIN)
 {
 fprintf(stderr, outOfBoundariesError, inputInt);
 return NULL;
 }
 // 11 is the multiplier because there's only 11 chars in the max value of an unsigned int, including the negative sign.
 char *convertedInputUnsignedInt = malloc(11 * sizeof(char));
 sprintf(convertedInputUnsignedInt, "%d", inputInt);
 return convertedInputUnsignedInt;
}
JaDogg
4,5513 gold badges29 silver badges65 bronze badges
asked Sep 22, 2014 at 11:39
\$\endgroup\$
4
  • \$\begingroup\$ Why do you have constants indented to right ? \$\endgroup\$ Commented Sep 24, 2014 at 6:49
  • \$\begingroup\$ @JaDogg I got spoiled by Visual Studio indenting things for me to the right. It's a pet peeve. :) \$\endgroup\$ Commented Sep 24, 2014 at 7:12
  • \$\begingroup\$ btw Checkout code review chat room chat.stackexchange.com/rooms/8595/the-2nd-monitor \$\endgroup\$ Commented Sep 24, 2014 at 13:32
  • \$\begingroup\$ why not use sprintf() \$\endgroup\$ Commented Oct 2, 2014 at 23:57

4 Answers 4

3
\$\begingroup\$

Firstly, the interface:

You have no header for clients to include which defines the API clients are expected to use.

As you print a message and then return a value cannot be determined to be invalid, the client code has no chance of sensibly dealing with an overflow itself, and you're polluting stderr. They might want to print something in French.

It'd be better to return a value and a status, so the client code can deal with the error as it sees fit (and you should have a different state for overflow and invalid string).

The inputStrings should be char const * - you don't need to alter them, and this gives both you and the client confidence that you don't.

You don't flag to the client that they're meant to free the string on exit. You must do that, or alternatively ask for a buffer to be supplied and do your sprintf into that.

Now to the implementation:

Use <limits.h> to work out the min/max possible values for integer types.

Initialisation of isNegative is potentially done twice, and isn't particularly readable. Perhaps write it like this:

int counter = 0;
int isNegative = 0;
if (inputString[0] == '-')
{
 isNegative = !isNegative;
 ++counter;
}

Declare boundariesChecker in the block in which it is used.

int fred; fred = 0 reads strangely. What's wrong with int fred = 0;?

CONVERT_ASCII_ZERO should be '0' (and similarly for the NINE). Then you can drop the ASCII tag, as this'd work fine on EBCDIC machines.

returnResult = -returnResult would work fine and be probably more readable.

You don't check for malloc returning NULL.

Lastly, long long isn't available in C89 except as an extension, so you can't use it for overflow checking.

Morwenn
20.2k3 gold badges69 silver badges132 bronze badges
answered Sep 22, 2014 at 12:17
\$\endgroup\$
3
  • \$\begingroup\$ thanks. Actually I do have a header file, but I didn't include it here. Will take note of what you said. For the int fred; fred = 0; part, you can blame that on CLion, it likes to suggest code like that. Will take note of that as well. \$\endgroup\$ Commented Sep 22, 2014 at 12:22
  • \$\begingroup\$ If you have a header file you should include it in the .c file as well, so I assumed you didn't \$\endgroup\$ Commented Sep 22, 2014 at 12:39
  • \$\begingroup\$ "CONVERT_ASCII_ZERO should be '0' ... drop the ASCII tag, ... fine on EBCDIC machines." changes the program's goal. In general I agree that this is typically what should happen. The coding character set and the user IO character set are often the same. But if the goal was to translate ASCII input, even if the source code was non-ASCII (e.g. EBCDIC), then OP's approach is correct. \$\endgroup\$ Commented Sep 27, 2014 at 5:34
2
\$\begingroup\$

There are many problems with your code. I'll just look at the first function, FromStringToUnsignedInteger

  • the function name is verbose. str_to_uint would suffice.

  • the prototype should not print errors but instead should return the conversion status and the result, so this would be better:

    int str_to_uint(const char *s, unsigned int *n);
    
  • the limits should be from limits.h (UINT_MAX, INT_MAX etc)

  • accumulating the result in an int means you cannot handle the full range of UINT_MAX

  • and because you accumulate in an int, by the time you assign the intermediate result to boundariesChecker, any overflow has already happened and gone. You need to accumulate in something bigger than unsigned int and use that directly for the bound check.

  • checking for the result being less than 0 is unnecessary for an unsigned. Accumulate using an unsigned type.

  • use isdigit() frm ctype.h to check for a number

  • use '0' instead of '48'

  • your comments are noisy (i.e. excessive and unhelpful)

  • your variable names are unnecessarily long.

Here's an example of how it might be written:

errno_t str_to_uint(const char *s, unsigned int *n)
{
 unsigned long long res = 0;
 for (; *s; ++s) {
 if (!isdigit(*s)) {
 return EINVAL;
 }
 res = (res * 10) + (unsigned long long) (*s - '0');
 if (res > UINT_MAX) {
 return ERANGE;
 }
 }
 *n = (unsigned int) res;
 return 0;
}
answered Sep 22, 2014 at 19:33
\$\endgroup\$
5
  • \$\begingroup\$ As I've said, my naming convention varies very much from the C naming conventions; it's verbose and loud, but that is because I designed it that way. What about on code efficiency? Have any suggestions to make it faster? \$\endgroup\$ Commented Sep 22, 2014 at 22:29
  • \$\begingroup\$ Verbose/loud is a design option - but one that I think makes the code less readable. You may disagree, that's ok. On efficiency, it doesn't hurt to be efficient if it comes at no cost, but efficiency is really a non-issue with this code unless you are using the functions very often and it causes a measurable bottleneck. Better to be clear and understandable. \$\endgroup\$ Commented Sep 23, 2014 at 1:45
  • \$\begingroup\$ Ok, good to know. I'm currently updating the entire code base from all the feedback I had. I'll post it here when I'm done for the benefit of all. \$\endgroup\$ Commented Sep 23, 2014 at 1:55
  • 1
    \$\begingroup\$ @pacoalphonso Don't update the question with new code, Instead post a followup question. \$\endgroup\$ Commented Sep 24, 2014 at 4:32
  • \$\begingroup\$ 1) (unsigned long long) (*s - '0') could be simplified to *s - '0';. 2) This approach depends on UINT_MAX < ULLONG_MAX: reasonable, but not specified in C. \$\endgroup\$ Commented Nov 28, 2014 at 22:35
1
\$\begingroup\$

Taking on FromStringToInteger() and FromIntegerToString().

First, using a "bigger" integer type is not always available and is not needed. long long may have the same range as int, so why use long long. Further, what if code was attempting to convert the maximum width integer type? Best to use a solution that works within the given type.

long long boundariesChecker; // not needed
int returnResult;

OP's code attempts to test for overflow after it happened. Per C spec, that is UB and it is too late.

Second, negation is a problem when trying to convert a string to INT_MIN. Negating INT_MIN is UB when INT_MIN < -INT_MAX.

To solve this, test for overflow before multiplication and use the negative side of int. Need a basic re-write of code.

int FromStringToInteger(const char inputString[]) { // Add `const`
 int returnResult = 0;
 size_t counter = 0; // should use `size_t` here
 int overflow = 0; // could use `bool` here
 char sign = inputString[0]; 
 if (sign == '-' || sign == '+') { // Look for + sign too
 counter++;
 }
 while (inputString[counter] != '0円') {
 int digit = inputString[counter++];
 if (digit < CONVERT_ASCII_ZERO || digit > CONVERT_ASCII_NINE) {
 fprintf(stderr, invalidInputError, digit);
 return 0;
 }
 digit -= CONVERT_ASCII_ZERO;
 // Test if overflow is about to occur...
 if ((returnResult <= INT_MIN / 10) && 
 ((returnResult < INT_MIN / 10) || (-digit < INT_MIN % 10))) {
 returnResult = INT_MIN;
 overflow = 1;
 break;
 }
 returnResult = returnResult * 10 - digit; // note use of -
 }
 if (sign != '-') {
 if (returnResult < -INT_MAX) {
 returnResult = INT_MAX;
 overflow = 1;
 } else {
 returnResult = -returnResult;
 }
 }
 if (overflow) {
 // At this point, returnResult is INT_MAX or INT_MIN 
 fprintf(stderr, outOfBoundariesError, returnResult);
 return returnResult; // or drop through
 }
 return returnResult;
}

FromIntegerToString()

With all the macros for portability, using 11 is wrong.

Better to scale the buffer based of INT_MIN or sizeof(int) and test for OOM. See ansi-c-maximum-number-of-characters-printing-a-decimal-int

sizeof(char) is always 1. Little point in using it here. If code might change from char to wchar_t, then better to use malloc(n * sizeof *convertedInputUnsignedInt);

// char *convertedInputUnsignedInt = malloc(11 * sizeof(char));
#define INT_STR_SIZE(type) (10*sizeof(type)*CHAR_BIT/33 + 3)
char *convertedInputUnsignedInt = malloc(INT_STR_SIZE(int));
if (convertedInputUnsignedInt == NULL) {
 Handle_OOM();
}
answered Sep 27, 2014 at 22:24
\$\endgroup\$
1
  • \$\begingroup\$ @pacoalphonso UB: Undefined behavior. Code has done something it should not do. Code should not be expected to behave in any fashion from now on. You are lucky if you get a seg fault. \$\endgroup\$ Commented Sep 28, 2014 at 3:13
0
\$\begingroup\$

I've finally updated the functions and have come up with these:

Header:

#ifndef CONVERT_H
#define CONVERT_H
/* Structs */
typedef struct ToIntegerResult {char ErrorStatus; int IntegerResult;} ToIntegerResult;
typedef struct ToUnsignedIntegerResult {char ErrorStatus; unsigned int UnsignedIntegerResult;} ToUnsignedIntegerResult;
/* Prototypes */
ToUnsignedIntegerResult FromStringToUnsignedInteger(const char inputString[]);
void FromUnsignedIntegerToString(unsigned int inputUnsignedInt, char buffer[]);
ToIntegerResult FromStringToInteger(const char inputString[]);
void FromIntegerToString(char buffer[], int inputInt);
#endif /*CONVERT_H*/

Implementation:

#include <stdio.h>
#include <limits.h>
#include "convert.h"
/*CONSTANTS*/
#define CONVERT_ZERO '0'
#define CONVERT_NINE '9'
ToUnsignedIntegerResult FromStringToUnsignedInteger(const char inputString[])
{
 ToUnsignedIntegerResult returnResult;
 char wasReturnResultSet = 0;
 returnResult.ErrorStatus = 0;
 returnResult.UnsignedIntegerResult = 0;
 size_t counter = 0;
 while (inputString[counter] != '0円') {
 unsigned int digit = inputString[counter++];
 if (digit < CONVERT_ZERO || digit > CONVERT_NINE)
 {
 returnResult.ErrorStatus = -1; // invalid input
 return returnResult;
 }
 digit -= CONVERT_ZERO;
 // check for overflow
 if (wasReturnResultSet != 0 && (returnResult.UnsignedIntegerResult * 10 + digit) < returnResult.UnsignedIntegerResult)
 {
 // detected overflow
 returnResult.ErrorStatus = 1;
 return returnResult;
 }
 // set it so that the above check would work
 wasReturnResultSet = 1;
 returnResult.UnsignedIntegerResult = returnResult.UnsignedIntegerResult * 10 + digit;
 }
 return returnResult;
}
ToIntegerResult FromStringToInteger(const char inputString[])
{
 ToIntegerResult returnResult;
 returnResult.ErrorStatus = 0;
 returnResult.IntegerResult = 0;
 size_t counter = 0; // should use `size_t` here
 char sign = inputString[0];
 if (sign == '-' || sign == '+') { // Look for + sign too
 counter++;
 }
 while (inputString[counter] != '0円') {
 int digit = inputString[counter++];
 if (digit < CONVERT_ZERO || digit > CONVERT_NINE)
 {
 returnResult.ErrorStatus = -1; // invalid input
 return returnResult;
 }
 digit -= CONVERT_ZERO;
 // Test if overflow is about to occur...
 if ((returnResult.IntegerResult <= INT_MIN / 10) &&
 ((returnResult.IntegerResult < INT_MIN / 10) || (-digit < INT_MIN % 10)))
 {
 returnResult.IntegerResult = INT_MIN;
 returnResult.ErrorStatus = 2; // detected overflow
 return returnResult;
 }
 returnResult.IntegerResult = returnResult.IntegerResult * 10 - digit; // note use of -
 }
 if (sign != '-') {
 if (returnResult.IntegerResult < -INT_MAX)
 {
 returnResult.IntegerResult = INT_MAX;
 returnResult.ErrorStatus = 2; //overflow detected
 return returnResult;
 } else {
 returnResult.IntegerResult = -returnResult.IntegerResult;
 }
 }
 return returnResult;
}
void FromUnsignedIntegerToString(unsigned int inputUnsignedInt, char buffer[])
{
 snprintf(buffer, sizeof(buffer), "%u", inputUnsignedInt);
}
void FromIntegerToString(char buffer[], int integer)
{
 snprintf(buffer, sizeof(buffer), "%d", integer);
}

I've decided to remove all checking with the From*ToString methods and just encapsulate snprintf into a method.

\$\endgroup\$
1
  • \$\begingroup\$ @admin why did you edit out my "thank you to all who contributed?" \$\endgroup\$ Commented Oct 2, 2014 at 21:14

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.