fsize: A Command-Line Tool for Checking File Sizes

Question 1

I have created a cross-platform CLI tool to quickly retrieve file size and present it to the user in different units of data. The whole program is written in C and for Windows Win32 API is used to check if a location given is a directory or not. I have tried to be as minimal as I could be while coding this project.

/**
 * @file fsize.c
 * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
 * @author Tushar Chaurasia (Dark-CodeX)
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
#include <Windows.h>
#else
#include <sys/stat.h>
#include <sys/types.h>
#endif
#define FSIZE_VER "1.0.0"
enum FILE_SIZE_TYPE
{
 BIT = 8, // multiply
 BYTES = 1, // as-it-is BASE
 KIB = 1024, // divide
 MIB = 1048576, // divide
 GIB = 1073741824, // divide
 KB = 1000, // divide
 MB = 1000000, // divide
 GB = 1000000000 // divide
};
bool is_directory(const char *loc)
{
 if (!loc)
 return false;
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
 DWORD fileAttributes = GetFileAttributesA(loc);
 if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
 else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
 return true;
 else
 return false;
#else
 struct stat buffer;
 if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode))
 return true;
#endif
 return false;
}
int main(int argc, char **argv)
{
 if (argc == 1)
 {
 fprintf(stderr, "err: no file was given\n");
 return EXIT_FAILURE;
 }
 int i = 1;
 if (strcmp(argv[i], "--help") == 0)
 {
 puts("Usage: [option] [files]");
 puts("Options:");
 puts(" --help Display this help message.");
 puts(" --version Display the version of the app.");
 puts(" --size=<unit_of_byte> Converts bytes into other units.");
 puts(" =bit, bits, BIT, BITS Converts bytes into bits.");
 puts(" =byte, bytes, BYTE, BYTES Default size.");
 puts(" =kib, KIB Converts bytes into kibibytes.");
 puts(" =mib, MIB Converts bytes into mebibytes.");
 puts(" =gib, GIB Converts bytes into gibibytes.");
 puts(" =kb, KB Converts bytes into kilobytes.");
 puts(" =mb, MB Converts bytes into megabytes.");
 puts(" =gb, GB Converts bytes into gigabytes.");
 return EXIT_SUCCESS;
 }
 if (strcmp(argv[i], "--version") == 0)
 {
 printf("%s: %s\n", argv[0], FSIZE_VER);
 return EXIT_SUCCESS;
 }
 enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES
 if (strncmp(argv[i], "--size=", 7) == 0)
 {
 argv[i] += 7; // skip `--size=` which is total of 7 characters/bytes excluding null-terminating character
 if (strcmp(argv[i], "bit") == 0 || strcmp(argv[i], "bits") == 0 ||
 strcmp(argv[i], "BIT") == 0 || strcmp(argv[i], "BITS") == 0)
 {
 type = BIT;
 }
 else if (strcmp(argv[i], "byte") == 0 || strcmp(argv[i], "bytes") == 0 ||
 strcmp(argv[i], "BYTE") == 0 || strcmp(argv[i], "BYTES") == 0)
 {
 type = BYTES;
 }
 else if (strcmp(argv[i], "kib") == 0 || strcmp(argv[i], "KIB") == 0)
 {
 type = KIB;
 }
 else if (strcmp(argv[i], "mib") == 0 || strcmp(argv[i], "MIB") == 0)
 {
 type = MIB;
 }
 else if (strcmp(argv[i], "gib") == 0 || strcmp(argv[i], "GIB") == 0)
 {
 type = GIB;
 }
 else if (strcmp(argv[i], "kb") == 0 || strcmp(argv[i], "KB") == 0)
 {
 type = KB;
 }
 else if (strcmp(argv[i], "mb") == 0 || strcmp(argv[i], "MB") == 0)
 {
 type = MB;
 }
 else if (strcmp(argv[i], "gb") == 0 || strcmp(argv[i], "GB") == 0)
 {
 type = GB;
 }
 else
 {
 fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", argv[i]);
 return EXIT_FAILURE;
 }
 i++;
 if (i == argc)
 {
 fprintf(stderr, "err: no file was given\n");
 return EXIT_FAILURE;
 }
 }
 for (; i < argc; i++)
 {
 if (is_directory(argv[i]) == true)
 {
 fprintf(stderr, "err: '%s': Is a directory\n", argv[i]);
 continue;
 }
 FILE *fptr = fopen(argv[i], "rb");
 if (!fptr)
 {
 fprintf(stderr, "err: fopen(): '%s': %s\n", argv[i], strerror(errno));
 continue;
 }
 fseek(fptr, 0, SEEK_END);
 size_t len = ftell(fptr); // size of file in bytes
 fseek(fptr, 0, SEEK_SET);
 switch (type)
 {
 case BIT:
 printf("%zu bits : %s\n", len * BIT, argv[i]);
 break;
 case BYTES:
 printf("%zu bytes : %s\n", len, argv[i]);
 break;
 case KIB:
 printf("%0.3f kibibytes : %s \n", (float)len / KIB, argv[i]);
 break;
 case MIB:
 printf("%0.3f mebibytes : %s \n", (float)len / MIB, argv[i]);
 break;
 case GIB:
 printf("%0.3f gibibytes : %s \n", (float)len / GIB, argv[i]);
 break;
 case KB:
 printf("%0.3f kilobytes : %s \n", (float)len / KB, argv[i]);
 break;
 case MB:
 printf("%0.3f megabytes : %s \n", (float)len / MB, argv[i]);
 break;
 case GB:
 printf("%0.3f gigabytes : %s \n", (float)len / GB, argv[i]);
 break;
 }
 fclose(fptr);
 }
 return EXIT_SUCCESS;
}

I'm open to any improvements and better practices, if I have done something here which might be non-standard practice.

Question 2

Too trivial to post as an answer: A bit inconsistent with BIT vs BYTES... After REAL effort allowing user both "byte" and "bytes", a tiny file will be reported with 1 bytes : filename. Most software users quickly grow-out-of fussing about plural/singular. Best not to offer too much accommodation to language rules. (Those are arcane!!)

Question 3

Darth, "--size=<unit_of_byte>" leads to an interesting option idea, allow user to enter a unsigned long long number here. Say I wanted to know how many 8k byte blocks?, I could use "--size=8192". Might be good step toward future proofing. Your thoughts?

Question 4

@chux, Yeah, that could be a very practical option as well to the user

Question 5

@chux Re: 'blocksize' Still remember my 'mentor' rubbing his wounds after an 'oopsey' with the Unix util "dd" did what it was told to do. Thankfully, the damage was repairable.

Question 6

Multiple `puts()` calls

For the --help message, I get why you made a puts() call for each line, because having all that in one line separated with \n would not be readable. However, there is an even better way:

puts(
 "Usage: [option] [files]\n"
 "Options:\n"
 " --help Display this help message.\n"
 " --version Display the version of the app.\n"
 " --size=<unit_of_byte> Converts bytes into other units.\n"
 " =bit, bits, BIT, BITS Converts bytes into bits.\n"
 " =byte, bytes, BYTE, BYTES Default size.\n"
 " =kib, KIB Converts bytes into kibibytes.\n"
 " =mib, MIB Converts bytes into mebibytes.\n"
 " =gib, GIB Converts bytes into gibibytes.\n"
 " =kb, KB Converts bytes into kilobytes.\n"
 " =mb, MB Converts bytes into megabytes.\n"
 " =gb, GB Converts bytes into gigabytes."
);

This way, you do not call puts() multiple times unnecessarily, but the code is still readable. This works because adjacent string constants are automatically concatenated at compile time.

Repetitive capital checks for size units

Instead of manually checking for the all caps versions, another option is to make the arguments case-insensitive by converting them to lower case before checking:

char *arg = argv[i]; // Additionally, I would store this in a variable instead of indexing argv with i multiple times
for (int c = 0; arg[c]; ++c) {
 arg[c] = tolower((unsigned char)arg[c]); // #include <ctype.h>
}
if (strcmp(arg, "byte") || strcmp(arg, "bytes")) {
 // ...
} // else ...

Alternatively, instead of using an else if chain, you could store a list, either using an array of struct or a parallel array, and then either linear search using a loop or binary search if you ensure that the strings are in lexicographically increasing order.

struct SizeType {
 char *name;
 enum FILE_SIZE_TYPE type;
} size_types[] = {
 {"byte", BYTES},
 {"bytes", BYTES},
 // ...
}

`if (...) return true; else return false;`

Replace:

if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
 return true;
else
 return false;

With:

if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
else
 return (bool)(fileAttributes & FILE_ATTRIBUTE_DIRECTORY); // Explicitly cast to bool to ensure a value of 0 or 1

Or better yet:

return (fileAttributes != INVALID_FILE_ATTRIBUTES)
 && (fileAttributes & FILE_ATTRIBUTE_DIRECTORY);

Using standard logical operators is both more concise and generally more readable than a chain of if/else and return true/false.

Also the POSIX branch can be simplified:

#else
 struct stat buffer;
 if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode))
 return true;
#endif
 return false;

Can be:

#else
 struct stat buffer;
 return (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode));
#endif

In this case, the final return false; will be unreachable and can be omitted.

Question 7

Re: OP's "Usage:" puts()... My experience is argv[0] is echoed to the user, confirming the name of the program they've just run... Maybe need printf() here... And, why not use a char * ptr so the long-ish help text isn't buried as a parameter to an output function... And (being a grumpy sort), I wouldn't try to accommodate plural/singular as a parameter. "How many bits?" "One." "Oh, just one bits?" "Yes, and don't be a jerk!"... :-)

Question 8

@Fe2O3 printf("Usage: %s [option] [files] ...", argv[0]); You mean like that?

Question 9

Looks good to me! :-) Cheers!

Question 10

Also, check some standard man pages... The 1st filename shouldn't be shown as optional, but provide for multiple filenames...

Question 11

ALSO rather than letting OP print error for no arguments, no arguments should, perhaps, default to spewing the usage help... Just a thought... Nice answer, btw! :-)

Question 12

Invalid options are silently ignored. You should print a help message if the option is not recognized.

Along the same line, you should print a help message if a size unit is not recognized.

Together, it means that help deserves to be factored out into a function.
А filename --help is perfectly valid. However, the code can't compute its size. A standard way to deal with such problem is to have -- option, meaning end of options. Everything after it is a filename no matter what.
Short options are very user friendly. -v is simpler to type than --version. getopt is your friend.
Avoid magic constants. Consider
```
 argv[i] += strlen("--size=");
```
or something along this line.
Closing a file is done way too late. Better do it right after ftell. BTW, there is no need to fseek(fptr, 0, SEEK_SET);.
KIB, MIB, GIB values are unnecessarily cryptic. Better use hex constants, eg
```
 KIB = 0x400,
 MIB = 0x100000,
 GIB = 0x40000000,
```

Question 13

Re argv[i] += strlen("--size="); I would prefer argv[i] += sizeof("--size=")-1; because that is guaranteed to be evaluated at compile time.

Question 14

@CPlus This is a possibility. But honestly I equally dislike my version and your version (expicit --size== is as bad as a magic number). Hence a suggestion for getopt.

Question 15

@CPlus: Even with optimization disabled, GCC and Clang do optimize strlen("literal") to a compile-time constant. godbolt.org/z/Pb5vj3bcW So it's fine for real use, unless you're compiling with -ffreestanding, -fno-builtin, or -fno-builtin-strlen. MSVC needs optimization to be enabled, but we normally only care about quality of the machine code in optimized builds.

Question 16

Not bad for a beginner who wants to write something useful. Keep at it!

Mystical knowledge

The pointer to the array of pointers to character strings (*argv[]) CAN be manipulated by application code.

The "runtime loader" will add a NULL pointer to the end of the array. i.e. argv[ argc ] = NULL
(Some esoteric versions of the compiler may not provide this NULL. You'll likely see a leprechaun before you'll see one of those.)

We can use this fact to simplify the code a bit:

/*
int main(int argc, char **argv)
*/
int main(int argc, char *argv[]) // 'feels' like ptr to an array of ptrs.
{
 // Remember progname, and start advancing through argv[]
 char *progname = *argv; ++argv; // avoid precedence worries
 if (*argv == NULL || strcmp(*argv, "--help") == 0) // Always try to be useful!
 {
 puts("Usage: [option] [files]");
 puts("Options:");
 ...
 return EXIT_SUCCESS;
 }
 if (strcmp(*argv, "--version") == 0)
 {
 printf("%s: %s\n", progname, FSIZE_VER);
 return EXIT_SUCCESS;
 }
 enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES
 if (strncmp(*argv, "--size=", 7) == 0)
 {
 char *cp = strchr(*argv, '=') + 1; // ptr to just beyond that '='
 if (strcmp(cp, "bit") == 0 || strcmp(cp, "bits") == 0
 || strcmp(cp, "BIT") == 0 || strcmp(cp, "BITS") == 0) // Note where '||' moved to
 {
 type = BIT;
 }
 else if (strcmp(cp, "byte") == 0 || strcmp(cp, "bytes") == 0
 || strcmp(cp, "BYTE") == 0 || strcmp(cp, "BYTES") == 0)
 {
 type = BYTES;
 }
 ...
 else // see remark in description below
 {
 fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", cp);
 return EXIT_FAILURE;
 }
 argv++; // Done with this argument
 if (*argv == NULL) // OP! Do you want this outside enclosing if() block?
 {
 fprintf(stderr, "%s: err: no file was given\n", progname);
 return EXIT_FAILURE;
 }
 }
 for( ; *argv; ++argv) // until encountering NULL
 {
 ...
 FILE *fptr = fopen(*argv, "rb");
 if (!fptr)
 {
 fprintf(stderr, "err: fopen(): '%s': %s\n", *argv, strerror(errno));
 continue;
 }
 ...
 switch (type)
 {
 case BIT:
 printf("%zu bits : %s\n", len * BIT, *argv);
 break;
 case BYTES:
 printf("%zu bytes : %s\n", len, *argv);
 break;
 case KIB:
 printf("%0.3f kibibytes : %s \n", (float)len / KIB, *argv);
 break;
 ...
 }
 ...
 }
 ...
}

More practise and confidence with pointers (and arrays of pointers) will form a solid foundation to write more professional code. The above is merely an attempt to point the way.

Don't be too quick to scold the user and terminate the program. If I'd unknowingly entered
--size=bots instead of ...bits, consider how helpful the program could be if it showed me the help menu, rather than suggesting I ask for it... Be kind to users and they'll be kinder to you...

Final comment: type is such an overused term. Best to find another word for your variable. Perhaps mode as its meaning is the "mode of delivered information; KayBee or Kibbies. What's your pleasure?"

Also: look into stricmp() or strcasecmp() as two possibilities to use a library "compare" function that is case INsensitive...

DRY

 ...: %s \n", (float)len / KIB, argv[i]);
 ...: %s \n", (float)len / MIB, argv[i]);
 ...: %s \n", (float)len / GIB, argv[i]);
 ...: %s \n", (float)len / KB, argv[i]);
 ...: %s \n", (float)len / MB, argv[i]);
 ...: %s \n", (float)len / GB, argv[i]);

That's a lot of floating

 float flen = (float)len;
 ...: %s \n", flen / KIB, argv[i]);
 ...: %s \n", flen / MIB, argv[i]);
 ...: %s \n", flen / GIB, argv[i]);
 ...: %s \n", flen / KB, argv[i]);
 ...: %s \n", flen / MB, argv[i]);
 ...: %s \n", flen / GB, argv[i]);

Question 17

Compiling for Windows

I couldn't compile your code on MSVC.

I had to add

#define _CRT_SECURE_NO_WARNINGS

in order to use the strerror() function.

Also, I suggest you define

#define WIN32_LEAN_AND_MEAN

if you don't need most of the stuff that the Windows header gives you.

`i`

I like reserving i for for loops. Here, it can be named current_arg or similar.

Help

[files] is not described in help.

It's unclear whether options can be combined or not. The implementation does not allow it.

It sounds a bit strange to me that bit is a unit of bytes.

Output

I have never seen a program output "kibibytes" and similar. Maybe use "kiB" instead.

Constants

Someone has suggested to use 0x400, 0x100000 etc. for the KIB and MIB definitions. I'd prefer

KIB = 1<<10,
MIB = 1<<20,
GIB = 1<<30,

Consistency

I somehow don't like the idea that some of the values need to be used with multiplication while others need division.

enum FILE_SIZE_TYPE
{
 BIT = 8, // multiply
 BYTES = 1, // as-it-is BASE
 KIB = 1024, // divide

If everything were bits, then the calculation would always be the same. To do that, you need C23, because 8*1<<30 is 2^3*2^30 = 2^33, which does not fit into an int anymore. And :unsigned long is only available since C23. Visual Studio 2022 (17.13) does not have support for that... Meh...

But since I'll rework that anyways, it's not a big deal.

Redundant == true

if (is_directory(argv[i]) == true)

Moving Help into a method

Moving help into its own method makes main() shorter. If you put it to the top of the file, a reader will see it and understand the purpose of the program quickly.

I'll apply the string chaining technique proposed by @CPlus as well.

Moving unit parsing into its own method

Moving the unit parsing into its own method makes main() shorter. Many readers will start reading the main() method to get an overview.

Comments instead of names

size_t len = ftell(fptr); // size of file in bytes

can be

size_t size_in_bytes = ftell(fptr);

Maintainability: enums and switch/case

Enums are inherently bad for maintainability. Typically you need a switch/case or else/if cascade somewhere to handle all the cases. This means: if you add or remove an item from the list, you need to adapt all the switch/case or else/if cascades.

When removing an item from an enum, the compiler will help you finding all occurrences. But adding an item is dangerous, because the code will fall into the default branch.

Since this is a major redesign, simply look at the resulting code.

Suggested code

/**
 * @file fsize.c
 * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
 * @author Tushar Chaurasia (Dark-CodeX)
 */
/**
 * @file fsize.c
 * @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
 * @author Tushar Chaurasia (Dark-CodeX)
 */
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#include <ctype.h>
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#else
#include <sys/stat.h>
#include <sys/types.h>
#endif
#define FSIZE_VER "1.0.0"
void show_help(void)
{
 puts(
 "Usage: [option] [files]\n"
 "Options:\n"
 " --help Display this help message.\n"
 " --version Display the version of the app.\n"
 " --size=<unit_of_byte> Define output unit.\n"
 " =bit, bits Output in bits.\n"
 " =byte, bytes Output in bytes (default).\n"
 " =kib Output in kibibytes.\n"
 " =mib Output in mebibytes.\n"
 " =gib Output in gibibytes.\n"
 " =kb Output in kilobytes.\n"
 " =mb Output in megabytes.\n"
 " =gb Output in gigabytes.\n"
 );
}
bool is_directory(const char* loc)
{
 if (!loc)
 return false;
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
 DWORD fileAttributes = GetFileAttributesA(loc);
 if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
 return fileAttributes & FILE_ATTRIBUTE_DIRECTORY;
#else
 struct stat buffer;
 return stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode);
#endif
}
typedef struct UNIT
{
 const char* input_name;
 const char* output_name;
 const char* output_format;
 const unsigned long long divider;
} UNIT;
UNIT UNITS[] = {
 // Put the default unit first
 { "byte", "bytes", "%0.f", 8ULL },
 {"bit", "bits", "%0.f", 1ULL},
 {"bits", "bits", "%0.f", 1ULL},
 {"bytes", "bytes", "%0.f", 8ULL},
 {"kib", "kibibytes" , "%0.3f", 8ULL<<10},
 {"mib", "mebibytes", "%0.3f", 8ULL<<20},
 {"gib", "gibibytes", "%0.3f", 8ULL<<30},
 {"kb", "kilobytes", "%0.3f", 8ULL*1000},
 {"mb", "megabytes", "%0.3f", 8ULL*1000*1000},
 {"gb", "gigabytes", "%0.3f", 8ULL*1000*1000*1000},
 // Put the "no unit" last
 { NULL, NULL, NULL, 0 },
};
UNIT* parse_unit(char* unit_name, int const argc)
{
 UNIT* NO_UNIT = &UNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1];
 UNIT* result = &UNITS[0];
 // Parse unit as lower case
 for (int c = 0; unit_name[c]; ++c) {
 unit_name[c] = tolower((unsigned char)unit_name[c]);
 }
 // Unit lookup by input name
 bool found = false;
 for (size_t j = 0; UNITS[j].input_name != NULL; j++)
 {
 if (strcmp(unit_name, UNITS[j].input_name) == 0)
 {
 result = &UNITS[j];
 found = true;
 }
 }
 // Error handling
 if (!found)
 {
 fprintf(stderr, "err: '%s' is not a valid unit, try using '--help' flag\n", unit_name);
 return NO_UNIT;
 }
 if (argc == 2)
 {
 fprintf(stderr, "err: no file was given\n");
 return NO_UNIT;
 }
 return result;
}
void print_file_size(char* file_name, UNIT unit)
{
 if (is_directory(file_name))
 {
 fprintf(stderr, "err: '%s': Is a directory\n", file_name);
 return;
 }
 FILE* fptr = fopen(file_name, "rb");
 if (!fptr)
 {
 fprintf(stderr, "err: fopen(): '%s': %s\n", file_name, strerror(errno));
 return;
 }
 int result = fseek(fptr, 0, SEEK_END);
 if (result != 0)
 {
 fprintf(stderr, "err: fseek(): '%s': %s\n", file_name, strerror(errno));
 fclose(fptr);
 return;
 }
 size_t size_in_bytes = ftell(fptr);
 fclose(fptr);
 char format[] = "xxxxxx %s : %s\n";
 sprintf_s(format, sizeof(format) - 1, "%s %%s : %%s\n", unit.output_format);
 printf(format, 8.f * (float)size_in_bytes / (float)unit.divider, unit.output_name, file_name);
}
int main(int argc, char** argv)
{
 if (argc == 1)
 {
 fprintf(stderr, "err: no file was given\n");
 return EXIT_FAILURE;
 }
 int current_arg = 1;
 if (strcmp(argv[current_arg], "--help") == 0)
 {
 show_help();
 return EXIT_SUCCESS;
 }
 if (strcmp(argv[current_arg], "--version") == 0)
 {
 printf("%s: %s\n", argv[0], FSIZE_VER);
 return EXIT_SUCCESS;
 }
 UNIT* unit = &UNITS[0];
 if (strncmp(argv[current_arg], "--size=", sizeof("--size=") - 1) == 0)
 {
 unit = parse_unit(argv[current_arg] + sizeof("--size=") - 1, argc);
 if (unit->input_name == NULL) return EXIT_FAILURE;
 current_arg++;
 }
 for (; current_arg < argc; current_arg++)
 {
 print_file_size(argv[current_arg], *unit);
 }
 return EXIT_SUCCESS;
}

Result of the change

Before: 182 lines, 1 helper method, main has 130 lines.

After: 180 lines, 4 helper methods, main has 33 lines and the 5 steps of processing are clearly visible. No switch/case and no if/else cascade.

Question 18

Nice review. Detailed. Oops: show_help() :: puts() needs \n at the end of each string fragment... Oops 2: tolower() not quite right. Look at answer by @Chux... Braces inconsistent for that loop... Y'know, if loop counter j had larger scope, &UNITS[ j ] WOULD be that empty result to return. Less confusion... Oops 3: current_arg not used in strncmp( ..., "--size-", ....). Finally, (very, very few do this) fseek() can fail, returning an error code... Pedantry... Cheers! :-)

Question 19

Re: "I'd prefer ... MIB = 1<<20, GIB = 1<<30," has growth weaknesses when the shift is more than the 31. If the scale was a floating point, code could use 0x1.0p10, 0x1.0p20, 0x1.0p30. Of course FP impact other things, yet I find the enum approach limiting.

Question 20

Minor: rather than UNITS[sizeof(UNITS) / sizeof(UNIT) - 1], which obliges a check of an object and a type, consider UNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1] which only depends on the object and not its or any type.

Question 21

Thanks for the comments.

Question 22

Thomas Weller, on deeper review, I really like the convert it all to bits approach. To support bits and future files sizes, perhaps go to long long size, constants.

Question 23

Just a few left over points not yet reported:

Use a function to get file size

The fseek(... SEEK_END); len = ftell(fptr); trick to get the file size is weak and inadequate for large files. I am not suggesting you write a better implementation, yet realize it has short-comings.

When code has significant short-comings, yet one lacks good alternatives, make it a function call:

long long len = my_get_file_size(fptr);

Inside your my_get_file_size(), you can code the current fseek/ftell trick and allow for easier updates (e.g. only local impact).

1024 or 1000?

Help deserved a line of explanation to clearly indicate when the 1024 or 1000 scale is used.

Why stop at GB?

The metric prefixes are kMGTPEZYRQ. Even if one does not desire to support terabytes, etc. files, consider forming your code so it can gracefully grow.

Deeper: Consider your code base's lifetime. Do you expect users of this code today as well as a later version of fsize in 10 years just might need bigger files?
Perhaps not so much bigger: Hard drive capacity over time Hard Disk HDD capacity by year, by power of 10?

Improve error checking

ftell(fptr) returns a long and -1 on error. Detect that. long may or may not have a wider range than size_t. No need to inject the size_t here. size_t is for object memory sizes, not file lengths.

// size_t len = ftell(fptr);
long len = ftell(fptr);
if (len == -1) {
 Handle_Error();
}
if (len < 0) {
 TBD code();
}

Avoid overflow

len * BIT risks overflow. Use wider math.

//printf("%zu bits : %s\n", len * BIT, argv[i]);
printf("%ju bits : %s\n", (uintmax_t) len * BIT, argv[i]);

Avoid imprecision

(float)len / KIB, as a printf() argument, has weaknesses.

The float cast can readily loses precision for large values.

Strange that float was chosen as the quotient. (float)len / KIB is up converted to double before passing to printf(), so why not use double division right away?

Consider double

 printf("%0.3f kibibytes : %s \n", (double)len / KIB, argv[i]);
 printf("%0.3f kibibytes : %s \n", len / (double)KIB, argv[i]);
 ...
 // or maybe long double
 printf("%0.3Lf kibibytes : %s \n", len / (long double)KIB, argv[i]);

Question 24

You forgot Yotta.

Question 25

@OskarSkog Answer amended.

Question 26

This is a non-portable assumption:

 BIT = 8, // multiply

We should be using CHAR_BIT to ensure our units are consistent with the target platform.

Most of is_directory() is platform-dependent. Consider refactoring into two separate implementations - these might even move into two separate header files, especially if we add more functions to our filesystem abstraction:

#ifdef USE_WINAPI
#include <files_windows.h>
#elifdef POSIX_SOURCE
#include <files_unix.h>
#else
#error Unsupported target
#endif

(#elifdef requires C23 or later, but the legacy equivalent should be obvious if you need it)

The implementations become much more readable without conditional preprocessing to parse:

/* files_windows.h */
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
bool is_directory(const char *loc)
{
 if (!loc) { return false; }
 DWORD const fileAttributes = GetFileAttributesA(loc);
 return fileAttributes != INVALID_FILE_ATTRIBUTES
 && fileAttributes & FILE_ATTRIBUTE_DIRECTORY;
}

/* files_unix.h */
#include <sys/stat.h>
#include <sys/types.h>
bool is_directory(const char *loc)
{
 if (!loc) { return false; } /* Probably not needed as stat() will check */
 struct stat buffer;
 return stat(loc, &buffer) == 0
 && S_ISDIR(buffer.st_mode);
}

When one or more files cannot be handled, we emit a message to standard error, but still exit with a success status. I think most users would expect success only if all arguments were successfully processed.

CPlus CPlus 1,3957 silver badges28 bronze badges · Accepted Answer · 2025-01-30 22:09:00Z

Multiple `puts()` calls

For the --help message, I get why you made a puts() call for each line, because having all that in one line separated with \n would not be readable. However, there is an even better way:

puts(
 "Usage: [option] [files]\n"
 "Options:\n"
 " --help Display this help message.\n"
 " --version Display the version of the app.\n"
 " --size=<unit_of_byte> Converts bytes into other units.\n"
 " =bit, bits, BIT, BITS Converts bytes into bits.\n"
 " =byte, bytes, BYTE, BYTES Default size.\n"
 " =kib, KIB Converts bytes into kibibytes.\n"
 " =mib, MIB Converts bytes into mebibytes.\n"
 " =gib, GIB Converts bytes into gibibytes.\n"
 " =kb, KB Converts bytes into kilobytes.\n"
 " =mb, MB Converts bytes into megabytes.\n"
 " =gb, GB Converts bytes into gigabytes."
);

This way, you do not call puts() multiple times unnecessarily, but the code is still readable. This works because adjacent string constants are automatically concatenated at compile time.

Repetitive capital checks for size units

Instead of manually checking for the all caps versions, another option is to make the arguments case-insensitive by converting them to lower case before checking:

char *arg = argv[i]; // Additionally, I would store this in a variable instead of indexing argv with i multiple times
for (int c = 0; arg[c]; ++c) {
 arg[c] = tolower((unsigned char)arg[c]); // #include <ctype.h>
}
if (strcmp(arg, "byte") || strcmp(arg, "bytes")) {
 // ...
} // else ...

Alternatively, instead of using an else if chain, you could store a list, either using an array of struct or a parallel array, and then either linear search using a loop or binary search if you ensure that the strings are in lexicographically increasing order.

struct SizeType {
 char *name;
 enum FILE_SIZE_TYPE type;
} size_types[] = {
 {"byte", BYTES},
 {"bytes", BYTES},
 // ...
}

`if (...) return true; else return false;`

Replace:

if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
 return true;
else
 return false;

With:

if (fileAttributes == INVALID_FILE_ATTRIBUTES)
 return false;
else
 return (bool)(fileAttributes & FILE_ATTRIBUTE_DIRECTORY); // Explicitly cast to bool to ensure a value of 0 or 1

Or better yet:

return (fileAttributes != INVALID_FILE_ATTRIBUTES)
 && (fileAttributes & FILE_ATTRIBUTE_DIRECTORY);

Using standard logical operators is both more concise and generally more readable than a chain of if/else and return true/false.

Also the POSIX branch can be simplified:

#else
 struct stat buffer;
 if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode))
 return true;
#endif
 return false;

Can be:

#else
 struct stat buffer;
 return (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode));
#endif

In this case, the final return false; will be unreachable and can be omitted.

Re: OP's "Usage:" puts()... My experience is argv[0] is echoed to the user, confirming the name of the program they've just run... Maybe need printf() here... And, why not use a char * ptr so the long-ish help text isn't buried as a parameter to an output function... And (being a grumpy sort), I wouldn't try to accommodate plural/singular as a parameter. "How many bits?" "One." "Oh, just one bits?" "Yes, and don't be a jerk!"... :-)
@Fe2O3 printf("Usage: %s [option] [files] ...", argv[0]); You mean like that?
Also, check some standard man pages... The 1st filename shouldn't be shown as optional, but provide for multiple filenames...
ALSO rather than letting OP print error for no arguments, no arguments should, perhaps, default to spewing the usage help... Just a thought... Nice answer, btw! :-)

Stack Exchange Network

fsize: A Command-Line Tool for Checking File Sizes

6 Answers 6

Multiple `puts()` calls

Repetitive capital checks for size units

`if (...) return true; else return false;`

Mystical knowledge

DRY

Compiling for Windows

`i`

Help

Output

Constants

Consistency

Redundant == true

Moving Help into a method

Moving unit parsing into its own method

Comments instead of names

Maintainability: enums and switch/case

Suggested code

Result of the change

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

fsize: A Command-Line Tool for Checking File Sizes

6 Answers 6

Multiple puts() calls

Repetitive capital checks for size units

if (...) return true; else return false;

Mystical knowledge

DRY

Compiling for Windows

i

Help

Output

Constants

Consistency

Redundant == true

Moving Help into a method

Moving unit parsing into its own method

Comments instead of names

Maintainability: enums and switch/case

Suggested code

Result of the change

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions

Multiple `puts()` calls

`if (...) return true; else return false;`

`i`