I have created a cross-platform CLI tool to quickly retrieve file size and present it to the user in different units of data. The whole program is written in C and for Windows Win32 API is used to check if a location given is a directory or not. I have tried to be as minimal as I could be while coding this project.
/**
* @file fsize.c
* @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
* @author Tushar Chaurasia (Dark-CodeX)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
#include <Windows.h>
#else
#include <sys/stat.h>
#include <sys/types.h>
#endif
#define FSIZE_VER "1.0.0"
enum FILE_SIZE_TYPE
{
BIT = 8, // multiply
BYTES = 1, // as-it-is BASE
KIB = 1024, // divide
MIB = 1048576, // divide
GIB = 1073741824, // divide
KB = 1000, // divide
MB = 1000000, // divide
GB = 1000000000 // divide
};
bool is_directory(const char *loc)
{
if (!loc)
return false;
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
DWORD fileAttributes = GetFileAttributesA(loc);
if (fileAttributes == INVALID_FILE_ATTRIBUTES)
return false;
else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
return true;
else
return false;
#else
struct stat buffer;
if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode))
return true;
#endif
return false;
}
int main(int argc, char **argv)
{
if (argc == 1)
{
fprintf(stderr, "err: no file was given\n");
return EXIT_FAILURE;
}
int i = 1;
if (strcmp(argv[i], "--help") == 0)
{
puts("Usage: [option] [files]");
puts("Options:");
puts(" --help Display this help message.");
puts(" --version Display the version of the app.");
puts(" --size=<unit_of_byte> Converts bytes into other units.");
puts(" =bit, bits, BIT, BITS Converts bytes into bits.");
puts(" =byte, bytes, BYTE, BYTES Default size.");
puts(" =kib, KIB Converts bytes into kibibytes.");
puts(" =mib, MIB Converts bytes into mebibytes.");
puts(" =gib, GIB Converts bytes into gibibytes.");
puts(" =kb, KB Converts bytes into kilobytes.");
puts(" =mb, MB Converts bytes into megabytes.");
puts(" =gb, GB Converts bytes into gigabytes.");
return EXIT_SUCCESS;
}
if (strcmp(argv[i], "--version") == 0)
{
printf("%s: %s\n", argv[0], FSIZE_VER);
return EXIT_SUCCESS;
}
enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES
if (strncmp(argv[i], "--size=", 7) == 0)
{
argv[i] += 7; // skip `--size=` which is total of 7 characters/bytes excluding null-terminating character
if (strcmp(argv[i], "bit") == 0 || strcmp(argv[i], "bits") == 0 ||
strcmp(argv[i], "BIT") == 0 || strcmp(argv[i], "BITS") == 0)
{
type = BIT;
}
else if (strcmp(argv[i], "byte") == 0 || strcmp(argv[i], "bytes") == 0 ||
strcmp(argv[i], "BYTE") == 0 || strcmp(argv[i], "BYTES") == 0)
{
type = BYTES;
}
else if (strcmp(argv[i], "kib") == 0 || strcmp(argv[i], "KIB") == 0)
{
type = KIB;
}
else if (strcmp(argv[i], "mib") == 0 || strcmp(argv[i], "MIB") == 0)
{
type = MIB;
}
else if (strcmp(argv[i], "gib") == 0 || strcmp(argv[i], "GIB") == 0)
{
type = GIB;
}
else if (strcmp(argv[i], "kb") == 0 || strcmp(argv[i], "KB") == 0)
{
type = KB;
}
else if (strcmp(argv[i], "mb") == 0 || strcmp(argv[i], "MB") == 0)
{
type = MB;
}
else if (strcmp(argv[i], "gb") == 0 || strcmp(argv[i], "GB") == 0)
{
type = GB;
}
else
{
fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", argv[i]);
return EXIT_FAILURE;
}
i++;
if (i == argc)
{
fprintf(stderr, "err: no file was given\n");
return EXIT_FAILURE;
}
}
for (; i < argc; i++)
{
if (is_directory(argv[i]) == true)
{
fprintf(stderr, "err: '%s': Is a directory\n", argv[i]);
continue;
}
FILE *fptr = fopen(argv[i], "rb");
if (!fptr)
{
fprintf(stderr, "err: fopen(): '%s': %s\n", argv[i], strerror(errno));
continue;
}
fseek(fptr, 0, SEEK_END);
size_t len = ftell(fptr); // size of file in bytes
fseek(fptr, 0, SEEK_SET);
switch (type)
{
case BIT:
printf("%zu bits : %s\n", len * BIT, argv[i]);
break;
case BYTES:
printf("%zu bytes : %s\n", len, argv[i]);
break;
case KIB:
printf("%0.3f kibibytes : %s \n", (float)len / KIB, argv[i]);
break;
case MIB:
printf("%0.3f mebibytes : %s \n", (float)len / MIB, argv[i]);
break;
case GIB:
printf("%0.3f gibibytes : %s \n", (float)len / GIB, argv[i]);
break;
case KB:
printf("%0.3f kilobytes : %s \n", (float)len / KB, argv[i]);
break;
case MB:
printf("%0.3f megabytes : %s \n", (float)len / MB, argv[i]);
break;
case GB:
printf("%0.3f gigabytes : %s \n", (float)len / GB, argv[i]);
break;
}
fclose(fptr);
}
return EXIT_SUCCESS;
}
I'm open to any improvements and better practices, if I have done something here which might be non-standard practice.
6 Answers 6
Multiple puts()
calls
For the --help
message, I get why you made a puts()
call for each line, because having all that in one line separated with \n
would not be readable. However, there is an even better way:
puts(
"Usage: [option] [files]\n"
"Options:\n"
" --help Display this help message.\n"
" --version Display the version of the app.\n"
" --size=<unit_of_byte> Converts bytes into other units.\n"
" =bit, bits, BIT, BITS Converts bytes into bits.\n"
" =byte, bytes, BYTE, BYTES Default size.\n"
" =kib, KIB Converts bytes into kibibytes.\n"
" =mib, MIB Converts bytes into mebibytes.\n"
" =gib, GIB Converts bytes into gibibytes.\n"
" =kb, KB Converts bytes into kilobytes.\n"
" =mb, MB Converts bytes into megabytes.\n"
" =gb, GB Converts bytes into gigabytes."
);
This way, you do not call puts()
multiple times unnecessarily, but the code is still readable. This works because adjacent string constants are automatically concatenated at compile time.
Repetitive capital checks for size units
Instead of manually checking for the all caps versions, another option is to make the arguments case-insensitive by converting them to lower case before checking:
char *arg = argv[i]; // Additionally, I would store this in a variable instead of indexing argv with i multiple times
for (int c = 0; arg[c]; ++c) {
arg[c] = tolower((unsigned char)arg[c]); // #include <ctype.h>
}
if (strcmp(arg, "byte") || strcmp(arg, "bytes")) {
// ...
} // else ...
Alternatively, instead of using an else
if
chain, you could store a list, either using an array of struct
or a parallel array, and then either linear search using a loop or binary search if you ensure that the strings are in lexicographically increasing order.
struct SizeType {
char *name;
enum FILE_SIZE_TYPE type;
} size_types[] = {
{"byte", BYTES},
{"bytes", BYTES},
// ...
}
if (...) return true; else return false;
Replace:
if (fileAttributes == INVALID_FILE_ATTRIBUTES)
return false;
else if (fileAttributes & FILE_ATTRIBUTE_DIRECTORY)
return true;
else
return false;
With:
if (fileAttributes == INVALID_FILE_ATTRIBUTES)
return false;
else
return (bool)(fileAttributes & FILE_ATTRIBUTE_DIRECTORY); // Explicitly cast to bool to ensure a value of 0 or 1
Or better yet:
return (fileAttributes != INVALID_FILE_ATTRIBUTES)
&& (fileAttributes & FILE_ATTRIBUTE_DIRECTORY);
Using standard logical operators is both more concise and generally more readable than a chain of if
/else
and return true
/false
.
Also the POSIX branch can be simplified:
#else
struct stat buffer;
if (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode))
return true;
#endif
return false;
Can be:
#else
struct stat buffer;
return (stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode));
#endif
In this case, the final return false;
will be unreachable and can be omitted.
-
2\$\begingroup\$ Re: OP's "Usage:"
puts()
... My experience isargv[0]
is echoed to the user, confirming the name of the program they've just run... Maybe needprintf()
here... And, why not use achar *
ptr so the long-ish help text isn't buried as a parameter to an output function... And (being a grumpy sort), I wouldn't try to accommodate plural/singular as a parameter. "How many bits?" "One." "Oh, just one bits?" "Yes, and don't be a jerk!"...:-)
\$\endgroup\$Fe2O3– Fe2O32025年01月31日 03:01:20 +00:00Commented Jan 31 at 3:01 -
1\$\begingroup\$ @Fe2O3
printf("Usage: %s [option] [files] ...", argv[0]);
You mean like that? \$\endgroup\$CPlus– CPlus2025年01月31日 03:08:01 +00:00Commented Jan 31 at 3:08 -
1\$\begingroup\$ Looks good to me!
:-)
Cheers! \$\endgroup\$Fe2O3– Fe2O32025年01月31日 03:10:36 +00:00Commented Jan 31 at 3:10 -
1\$\begingroup\$ Also, check some standard
man
pages... The 1st filename shouldn't be shown as optional, but provide for multiple filenames... \$\endgroup\$Fe2O3– Fe2O32025年01月31日 03:12:34 +00:00Commented Jan 31 at 3:12 -
1\$\begingroup\$ ALSO rather than letting OP print error for no arguments, no arguments should, perhaps, default to spewing the usage help... Just a thought... Nice answer, btw!
:-)
\$\endgroup\$Fe2O3– Fe2O32025年01月31日 03:13:53 +00:00Commented Jan 31 at 3:13
Invalid options are silently ignored. You should print a help message if the option is not recognized.
Along the same line, you should print a help message if a size unit is not recognized.
Together, it means that
help
deserves to be factored out into a function.А filename
--help
is perfectly valid. However, the code can't compute its size. A standard way to deal with such problem is to have--
option, meaning end of options. Everything after it is a filename no matter what.Short options are very user friendly.
-v
is simpler to type than--version
.getopt
is your friend.Avoid magic constants. Consider
argv[i] += strlen("--size=");
or something along this line.
Closing a file is done way too late. Better do it right after
ftell
. BTW, there is no need tofseek(fptr, 0, SEEK_SET);
.KIB, MIB, GIB
values are unnecessarily cryptic. Better use hex constants, egKIB = 0x400, MIB = 0x100000, GIB = 0x40000000,
-
\$\begingroup\$ Re
argv[i] += strlen("--size=");
I would preferargv[i] += sizeof("--size=")-1;
because that is guaranteed to be evaluated at compile time. \$\endgroup\$CPlus– CPlus2025年01月31日 05:40:05 +00:00Commented Jan 31 at 5:40 -
2\$\begingroup\$ @CPlus This is a possibility. But honestly I equally dislike my version and your version (expicit
--size==
is as bad as a magic number). Hence a suggestion forgetopt
. \$\endgroup\$vnp– vnp2025年01月31日 05:47:24 +00:00Commented Jan 31 at 5:47 -
2\$\begingroup\$ @CPlus: Even with optimization disabled, GCC and Clang do optimize
strlen("literal")
to a compile-time constant. godbolt.org/z/Pb5vj3bcW So it's fine for real use, unless you're compiling with-ffreestanding
,-fno-builtin
, or-fno-builtin-strlen
. MSVC needs optimization to be enabled, but we normally only care about quality of the machine code in optimized builds. \$\endgroup\$Peter Cordes– Peter Cordes2025年02月02日 18:19:11 +00:00Commented Feb 2 at 18:19
Not bad for a beginner who wants to write something useful. Keep at it!
Mystical knowledge
The pointer to the array of pointers to character strings (*argv[]
) CAN be manipulated by application code.
The "runtime loader" will add a NULL pointer to the end of the array.
i.e. argv[ argc ] = NULL
(Some esoteric versions of the compiler may not provide this NULL. You'll likely see a leprechaun before you'll see one of those.)
We can use this fact to simplify the code a bit:
/*
int main(int argc, char **argv)
*/
int main(int argc, char *argv[]) // 'feels' like ptr to an array of ptrs.
{
// Remember progname, and start advancing through argv[]
char *progname = *argv; ++argv; // avoid precedence worries
if (*argv == NULL || strcmp(*argv, "--help") == 0) // Always try to be useful!
{
puts("Usage: [option] [files]");
puts("Options:");
...
return EXIT_SUCCESS;
}
if (strcmp(*argv, "--version") == 0)
{
printf("%s: %s\n", progname, FSIZE_VER);
return EXIT_SUCCESS;
}
enum FILE_SIZE_TYPE type = BYTES; // default type is BYTES
if (strncmp(*argv, "--size=", 7) == 0)
{
char *cp = strchr(*argv, '=') + 1; // ptr to just beyond that '='
if (strcmp(cp, "bit") == 0 || strcmp(cp, "bits") == 0
|| strcmp(cp, "BIT") == 0 || strcmp(cp, "BITS") == 0) // Note where '||' moved to
{
type = BIT;
}
else if (strcmp(cp, "byte") == 0 || strcmp(cp, "bytes") == 0
|| strcmp(cp, "BYTE") == 0 || strcmp(cp, "BYTES") == 0)
{
type = BYTES;
}
...
else // see remark in description below
{
fprintf(stderr, "err: '%s' is not a valid unit of byte, try using '--help' flag\n", cp);
return EXIT_FAILURE;
}
argv++; // Done with this argument
if (*argv == NULL) // OP! Do you want this outside enclosing if() block?
{
fprintf(stderr, "%s: err: no file was given\n", progname);
return EXIT_FAILURE;
}
}
for( ; *argv; ++argv) // until encountering NULL
{
...
FILE *fptr = fopen(*argv, "rb");
if (!fptr)
{
fprintf(stderr, "err: fopen(): '%s': %s\n", *argv, strerror(errno));
continue;
}
...
switch (type)
{
case BIT:
printf("%zu bits : %s\n", len * BIT, *argv);
break;
case BYTES:
printf("%zu bytes : %s\n", len, *argv);
break;
case KIB:
printf("%0.3f kibibytes : %s \n", (float)len / KIB, *argv);
break;
...
}
...
}
...
}
More practise and confidence with pointers (and arrays of pointers) will form a solid foundation to write more professional code. The above is merely an attempt to point the way.
Don't be too quick to scold the user and terminate the program. If I'd unknowingly entered--size=bots
instead of ...bits
, consider how helpful the program could be if it showed me the help menu, rather than suggesting I ask for it... Be kind to users and they'll be kinder to you...
Final comment: type
is such an overused term. Best to find another word for your variable. Perhaps mode
as its meaning is the "mode of delivered information; KayBee or Kibbies. What's your pleasure?"
Also: look into stricmp()
or strcasecmp()
as two possibilities to use a library "compare" function that is case INsensitive...
DRY
...: %s \n", (float)len / KIB, argv[i]);
...: %s \n", (float)len / MIB, argv[i]);
...: %s \n", (float)len / GIB, argv[i]);
...: %s \n", (float)len / KB, argv[i]);
...: %s \n", (float)len / MB, argv[i]);
...: %s \n", (float)len / GB, argv[i]);
That's a lot of floating
float flen = (float)len;
...: %s \n", flen / KIB, argv[i]);
...: %s \n", flen / MIB, argv[i]);
...: %s \n", flen / GIB, argv[i]);
...: %s \n", flen / KB, argv[i]);
...: %s \n", flen / MB, argv[i]);
...: %s \n", flen / GB, argv[i]);
Compiling for Windows
I couldn't compile your code on MSVC.
I had to add
#define _CRT_SECURE_NO_WARNINGS
in order to use the strerror()
function.
Also, I suggest you define
#define WIN32_LEAN_AND_MEAN
if you don't need most of the stuff that the Windows header gives you.
i
I like reserving i
for for
loops. Here, it can be named current_arg
or similar.
Help
[files]
is not described in help.
It's unclear whether options can be combined or not. The implementation does not allow it.
It sounds a bit strange to me that bit
is a unit of bytes.
Output
I have never seen a program output "kibibytes" and similar. Maybe use "kiB" instead.
Constants
Someone has suggested to use 0x400
, 0x100000
etc. for the KIB and MIB definitions. I'd prefer
KIB = 1<<10,
MIB = 1<<20,
GIB = 1<<30,
Consistency
I somehow don't like the idea that some of the values need to be used with multiplication while others need division.
enum FILE_SIZE_TYPE { BIT = 8, // multiply BYTES = 1, // as-it-is BASE KIB = 1024, // divide
If everything were bits, then the calculation would always be the same. To do that, you need C23, because 8*1<<30
is 2^3*2^30 = 2^33, which does not fit into an int
anymore. And :unsigned long
is only available since C23. Visual Studio 2022 (17.13) does not have support for that... Meh...
But since I'll rework that anyways, it's not a big deal.
Redundant == true
if (is_directory(argv[i]) == true)
Moving Help into a method
Moving help into its own method makes main()
shorter. If you put it to the top of the file, a reader will see it and understand the purpose of the program quickly.
I'll apply the string chaining technique proposed by @CPlus as well.
Moving unit parsing into its own method
Moving the unit parsing into its own method makes main()
shorter. Many readers will start reading the main()
method to get an overview.
Comments instead of names
size_t len = ftell(fptr); // size of file in bytes
can be
size_t size_in_bytes = ftell(fptr);
Maintainability: enums and switch/case
Enums are inherently bad for maintainability. Typically you need a switch/case or else/if cascade somewhere to handle all the cases. This means: if you add or remove an item from the list, you need to adapt all the switch/case or else/if cascades.
When removing an item from an enum, the compiler will help you finding all occurrences. But adding an item is dangerous, because the code will fall into the default branch.
Since this is a major redesign, simply look at the resulting code.
Suggested code
/**
* @file fsize.c
* @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
* @author Tushar Chaurasia (Dark-CodeX)
*/
/**
* @file fsize.c
* @license This file is licensed under the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. You may obtain a copy of this license at https://www.gnu.org/licenses/gpl-3.0.en.html.
* @author Tushar Chaurasia (Dark-CodeX)
*/
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#include <ctype.h>
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#else
#include <sys/stat.h>
#include <sys/types.h>
#endif
#define FSIZE_VER "1.0.0"
void show_help(void)
{
puts(
"Usage: [option] [files]\n"
"Options:\n"
" --help Display this help message.\n"
" --version Display the version of the app.\n"
" --size=<unit_of_byte> Define output unit.\n"
" =bit, bits Output in bits.\n"
" =byte, bytes Output in bytes (default).\n"
" =kib Output in kibibytes.\n"
" =mib Output in mebibytes.\n"
" =gib Output in gibibytes.\n"
" =kb Output in kilobytes.\n"
" =mb Output in megabytes.\n"
" =gb Output in gigabytes.\n"
);
}
bool is_directory(const char* loc)
{
if (!loc)
return false;
#if defined _WIN32 || defined _WIN64 || defined __CYGWIN__
DWORD fileAttributes = GetFileAttributesA(loc);
if (fileAttributes == INVALID_FILE_ATTRIBUTES)
return false;
return fileAttributes & FILE_ATTRIBUTE_DIRECTORY;
#else
struct stat buffer;
return stat(loc, &buffer) == 0 && S_ISDIR(buffer.st_mode);
#endif
}
typedef struct UNIT
{
const char* input_name;
const char* output_name;
const char* output_format;
const unsigned long long divider;
} UNIT;
UNIT UNITS[] = {
// Put the default unit first
{ "byte", "bytes", "%0.f", 8ULL },
{"bit", "bits", "%0.f", 1ULL},
{"bits", "bits", "%0.f", 1ULL},
{"bytes", "bytes", "%0.f", 8ULL},
{"kib", "kibibytes" , "%0.3f", 8ULL<<10},
{"mib", "mebibytes", "%0.3f", 8ULL<<20},
{"gib", "gibibytes", "%0.3f", 8ULL<<30},
{"kb", "kilobytes", "%0.3f", 8ULL*1000},
{"mb", "megabytes", "%0.3f", 8ULL*1000*1000},
{"gb", "gigabytes", "%0.3f", 8ULL*1000*1000*1000},
// Put the "no unit" last
{ NULL, NULL, NULL, 0 },
};
UNIT* parse_unit(char* unit_name, int const argc)
{
UNIT* NO_UNIT = &UNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1];
UNIT* result = &UNITS[0];
// Parse unit as lower case
for (int c = 0; unit_name[c]; ++c) {
unit_name[c] = tolower((unsigned char)unit_name[c]);
}
// Unit lookup by input name
bool found = false;
for (size_t j = 0; UNITS[j].input_name != NULL; j++)
{
if (strcmp(unit_name, UNITS[j].input_name) == 0)
{
result = &UNITS[j];
found = true;
}
}
// Error handling
if (!found)
{
fprintf(stderr, "err: '%s' is not a valid unit, try using '--help' flag\n", unit_name);
return NO_UNIT;
}
if (argc == 2)
{
fprintf(stderr, "err: no file was given\n");
return NO_UNIT;
}
return result;
}
void print_file_size(char* file_name, UNIT unit)
{
if (is_directory(file_name))
{
fprintf(stderr, "err: '%s': Is a directory\n", file_name);
return;
}
FILE* fptr = fopen(file_name, "rb");
if (!fptr)
{
fprintf(stderr, "err: fopen(): '%s': %s\n", file_name, strerror(errno));
return;
}
int result = fseek(fptr, 0, SEEK_END);
if (result != 0)
{
fprintf(stderr, "err: fseek(): '%s': %s\n", file_name, strerror(errno));
fclose(fptr);
return;
}
size_t size_in_bytes = ftell(fptr);
fclose(fptr);
char format[] = "xxxxxx %s : %s\n";
sprintf_s(format, sizeof(format) - 1, "%s %%s : %%s\n", unit.output_format);
printf(format, 8.f * (float)size_in_bytes / (float)unit.divider, unit.output_name, file_name);
}
int main(int argc, char** argv)
{
if (argc == 1)
{
fprintf(stderr, "err: no file was given\n");
return EXIT_FAILURE;
}
int current_arg = 1;
if (strcmp(argv[current_arg], "--help") == 0)
{
show_help();
return EXIT_SUCCESS;
}
if (strcmp(argv[current_arg], "--version") == 0)
{
printf("%s: %s\n", argv[0], FSIZE_VER);
return EXIT_SUCCESS;
}
UNIT* unit = &UNITS[0];
if (strncmp(argv[current_arg], "--size=", sizeof("--size=") - 1) == 0)
{
unit = parse_unit(argv[current_arg] + sizeof("--size=") - 1, argc);
if (unit->input_name == NULL) return EXIT_FAILURE;
current_arg++;
}
for (; current_arg < argc; current_arg++)
{
print_file_size(argv[current_arg], *unit);
}
return EXIT_SUCCESS;
}
Result of the change
Before: 182 lines, 1 helper method, main has 130 lines.
After: 180 lines, 4 helper methods, main has 33 lines and the 5 steps of processing are clearly visible. No switch/case and no if/else cascade.
-
1\$\begingroup\$ Nice review. Detailed. Oops:
show_help()
::puts()
needs\n
at the end of each string fragment... Oops 2:tolower()
not quite right. Look at answer by @Chux... Braces inconsistent for that loop... Y'know, if loop counterj
had larger scope,&UNITS[ j ]
WOULD be that empty result to return. Less confusion... Oops 3:current_arg
not used instrncmp( ..., "--size-", ....)
. Finally, (very, very few do this)fseek()
can fail, returning an error code... Pedantry... Cheers!:-)
\$\endgroup\$Fe2O3– Fe2O32025年02月01日 02:45:34 +00:00Commented Feb 1 at 2:45 -
\$\begingroup\$ Re: "I'd prefer ... MIB = 1<<20, GIB = 1<<30," has growth weaknesses when the shift is more than the 31. If the scale was a floating point, code could use
0x1.0p10, 0x1.0p20, 0x1.0p30
. Of course FP impact other things, yet I find theenum
approach limiting. \$\endgroup\$chux– chux2025年02月01日 22:29:08 +00:00Commented Feb 1 at 22:29 -
2\$\begingroup\$ Minor: rather than
UNITS[sizeof(UNITS) / sizeof(UNIT) - 1]
, which obliges a check of an object and a type, considerUNITS[sizeof(UNITS) / sizeof(UNITS[0]) - 1]
which only depends on the object and not its or any type. \$\endgroup\$chux– chux2025年02月01日 22:32:54 +00:00Commented Feb 1 at 22:32 -
1\$\begingroup\$ Thanks for the comments. \$\endgroup\$Thomas Weller– Thomas Weller2025年02月02日 16:17:36 +00:00Commented Feb 2 at 16:17
-
\$\begingroup\$ Thomas Weller, on deeper review, I really like the convert it all to bits approach. To support bits and future files sizes, perhaps go to
long long
size, constants. \$\endgroup\$chux– chux2025年02月02日 23:06:18 +00:00Commented Feb 2 at 23:06
Just a few left over points not yet reported:
Use a function to get file size
The fseek(... SEEK_END); len = ftell(fptr);
trick to get the file size is weak and inadequate for large files. I am not suggesting you write a better implementation, yet realize it has short-comings.
When code has significant short-comings, yet one lacks good alternatives, make it a function call:
long long len = my_get_file_size(fptr);
Inside your my_get_file_size()
, you can code the current fseek/ftell
trick and allow for easier updates (e.g. only local impact).
1024 or 1000?
Help deserved a line of explanation to clearly indicate when the 1024 or 1000 scale is used.
Why stop at GB?
The metric prefixes are kMGTPEZYRQ. Even if one does not desire to support terabytes, etc. files, consider forming your code so it can gracefully grow.
Deeper: Consider your code base's lifetime. Do you expect users of this code today as well as a later version of fsize
in 10 years just might need bigger files?
Perhaps not so much bigger: Hard drive capacity over time Hard Disk HDD capacity by year, by power of 10?
Improve error checking
ftell(fptr)
returns a long
and -1 on error. Detect that. long
may or may not have a wider range than size_t
. No need to inject the size_t
here. size_t
is for object memory sizes, not file lengths.
// size_t len = ftell(fptr);
long len = ftell(fptr);
if (len == -1) {
Handle_Error();
}
if (len < 0) {
TBD code();
}
Avoid overflow
len * BIT
risks overflow. Use wider math.
//printf("%zu bits : %s\n", len * BIT, argv[i]);
printf("%ju bits : %s\n", (uintmax_t) len * BIT, argv[i]);
Avoid imprecision
(float)len / KIB
, as a printf()
argument, has weaknesses.
The float
cast can readily loses precision for large values.
Strange that float
was chosen as the quotient. (float)len / KIB
is up converted to double
before passing to printf()
, so why not use double
division right away?
Consider double
printf("%0.3f kibibytes : %s \n", (double)len / KIB, argv[i]);
printf("%0.3f kibibytes : %s \n", len / (double)KIB, argv[i]);
...
// or maybe long double
printf("%0.3Lf kibibytes : %s \n", len / (long double)KIB, argv[i]);
-
\$\begingroup\$ You forgot Yotta. \$\endgroup\$Oskar Skog– Oskar Skog2025年02月03日 08:09:12 +00:00Commented Feb 3 at 8:09
-
\$\begingroup\$ @OskarSkog Answer amended. \$\endgroup\$chux– chux2025年02月03日 13:38:36 +00:00Commented Feb 3 at 13:38
This is a non-portable assumption:
BIT = 8, // multiply
We should be using CHAR_BIT
to ensure our units are consistent with the target platform.
Most of is_directory()
is platform-dependent. Consider refactoring into two separate implementations - these might even move into two separate header files, especially if we add more functions to our filesystem abstraction:
#ifdef USE_WINAPI
#include <files_windows.h>
#elifdef POSIX_SOURCE
#include <files_unix.h>
#else
#error Unsupported target
#endif
(#elifdef
requires C23 or later, but the legacy equivalent should be obvious if you need it)
The implementations become much more readable without conditional preprocessing to parse:
/* files_windows.h */
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
bool is_directory(const char *loc)
{
if (!loc) { return false; }
DWORD const fileAttributes = GetFileAttributesA(loc);
return fileAttributes != INVALID_FILE_ATTRIBUTES
&& fileAttributes & FILE_ATTRIBUTE_DIRECTORY;
}
/* files_unix.h */
#include <sys/stat.h>
#include <sys/types.h>
bool is_directory(const char *loc)
{
if (!loc) { return false; } /* Probably not needed as stat() will check */
struct stat buffer;
return stat(loc, &buffer) == 0
&& S_ISDIR(buffer.st_mode);
}
When one or more files cannot be handled, we emit a message to standard error, but still exit with a success status. I think most users would expect success only if all arguments were successfully processed.
BIT
vsBYTES
... After REAL effort allowing user both "byte" and "bytes", a tiny file will be reported with1 bytes : filename
. Most software users quickly grow-out-of fussing about plural/singular. Best not to offer too much accommodation to language rules. (Those are arcane!!) \$\endgroup\$unsigned long long
number here. Say I wanted to know how many 8k byte blocks?, I could use "--size=8192". Might be good step toward future proofing. Your thoughts? \$\endgroup\$