As part of a past assignment, I've expanded on this implementation of ls
to have more options available, including non-standard ones.
It currently supports these (described on this page):
-a
-d
-h
-i
-l
-p
-Q
-R
-S
-t
-U
Other than that, it doesn't list a total
field nor does it have coloring. It also doesn't have the same adaptive column alignments, but I have it done with fixed amounts so that it's still easier to read each column with -l
.
I've already ran this through Valgrind with and without different options, and there appears to be no memory leaks. I think it can still be structured better and simplified, though.
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <getopt.h>
#include <grp.h>
#include <pwd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sysexits.h>
#include <time.h>
#include <unistd.h>
static char* global_dir = ".";
struct Options
{
bool using_a;
bool using_d;
bool using_h;
bool using_i;
bool using_l;
bool using_p;
bool using_Q;
bool using_R;
bool using_S;
bool using_t;
bool using_U;
};
static void init_opts(struct Options* opts)
{
opts->using_a = false;
opts->using_d = false;
opts->using_h = false;
opts->using_i = false;
opts->using_l = false;
opts->using_p = false;
opts->using_Q = false;
opts->using_R = false;
opts->using_S = false;
opts->using_t = false;
opts->using_U = false;
}
struct Options get_opts(int count, char* args[])
{
struct Options opts;
init_opts(&opts);
int opt;
while ((opt = getopt(count, args, "adhilpQRStU")) != -1)
{
switch (opt)
{
case 'a': opts.using_a = true; break;
case 'd': opts.using_d = true; break;
case 'h': opts.using_h = true; break;
case 'i': opts.using_i = true; break;
case 'l': opts.using_l = true; break;
case 'p': opts.using_p = true; break;
case 'Q': opts.using_Q = true; break;
case 'R': opts.using_R = true; break;
case 'S': opts.using_S = true; break;
case 't': opts.using_t = true; break;
case 'U': opts.using_U = true; break;
case '?': exit(EX_USAGE);
}
}
return opts;
}
static void print_permissions(mode_t mode)
{
putchar((mode & S_IRUSR) ? 'r' : '-');
putchar((mode & S_IWUSR) ? 'w' : '-');
putchar((mode & S_IXUSR) ? 'x' : '-');
putchar((mode & S_IRGRP) ? 'r' : '-');
putchar((mode & S_IWGRP) ? 'w' : '-');
putchar((mode & S_IXGRP) ? 'x' : '-');
putchar((mode & S_IROTH) ? 'r' : '-');
putchar((mode & S_IWOTH) ? 'w' : '-');
putchar((mode & S_IXOTH) ? 'x' : '-');
}
static void print_filetype(mode_t mode)
{
switch (mode & S_IFMT)
{
case S_IFREG: putchar('-'); break;
case S_IFDIR: putchar('d'); break;
case S_IFLNK: putchar('l'); break;
case S_IFCHR: putchar('c'); break;
case S_IFBLK: putchar('b'); break;
case S_IFSOCK: putchar('s'); break;
case S_IFIFO: putchar('f'); break;
}
}
void readable_fs(double size, char* buf)
{
const char* units[] = { "", "K", "M", "G", "T" };
int i = 0;
while (size > 1024)
{
size /= 1024;
++i;
}
sprintf(buf, "%.*f%s", i, size, units[i]);
}
void print_time(time_t mod_time)
{
// get current time with year
time_t curr_time;
time(&curr_time);
struct tm* t = localtime(&curr_time);
const int curr_mon = t->tm_mon;
const int curr_yr = 1970 + t->tm_year;
// get mod time and year
t = localtime(&mod_time);
const int mod_mon = t->tm_mon;
const int mod_yr = 1970 + t->tm_year;
// determine format based on years
const char* format = ((mod_yr == curr_yr)
&& (mod_mon >= (curr_mon - 6)))
? "%b %e %H:%M"
: "%b %e %Y";
char time_buf[128];
strftime(time_buf, sizeof(time_buf), format, t);
printf("%s", time_buf);
}
struct stat get_stats(const char* filename)
{
char path[1024];
sprintf(path, "%s/%s", global_dir, filename);
struct stat sb;
if (lstat(path, &sb) < 0)
{
perror(path);
exit(EX_IOERR);
}
return sb;
}
bool is_dir(const char* filename)
{
struct stat sb = get_stats(filename);
if (lstat(filename, &sb) < 0)
{
perror(filename);
return false;
}
return (sb.st_mode & S_IFDIR) ? true : false;
}
bool is_in_dir(const char* dir, const char* filename)
{
DIR* dfd = opendir(dir);
if (!dfd)
{
perror(dir);
return false;
}
struct dirent* dp = readdir(dfd);
while (dp)
{
if (strcmp(filename, dp->d_name) == 0)
{
closedir(dfd);
return true;
}
dp = readdir(dfd);
}
fprintf(stderr, "file \'%s\' not found\n", filename);
closedir(dfd);
return false;
}
void print_name_or_link(const char* filename, struct Options opts, mode_t mode)
{
if (mode & S_IFLNK)
{
char link_buf[512];
int count = readlink(filename, link_buf, sizeof(link_buf));
if (count >= 0)
{
link_buf[count] = '0円';
if (opts.using_Q)
{
printf(" \"%s\" -> \"%s\"\n", filename, link_buf);
}
else
{
printf(" %s -> %s \n", filename, link_buf);
}
return;
}
}
if (opts.using_Q)
{
printf(" \"%s\"", filename);
}
else
{
printf(" %s", filename);
}
if (opts.using_p && is_dir(filename))
{
putchar('/');
}
putchar('\n');
}
void display_stats(char* dir, char* filename, struct Options opts)
{
if (!is_in_dir(dir, filename))
{
return;
}
if (!opts.using_l)
{
printf("%s\n", filename);
return;
}
global_dir = dir;
struct stat sb = get_stats(filename);
if (opts.using_i)
{
printf("%ld ", (long)sb.st_ino);
}
print_filetype(sb.st_mode);
print_permissions(sb.st_mode);
printf(" %d ", sb.st_nlink);
printf("%10s ", getpwuid(sb.st_uid)->pw_name);
printf("%10s", getgrgid(sb.st_gid)->gr_name);
if (opts.using_h)
{
char buf[10];
readable_fs(sb.st_size, buf);
printf(" %8s ", buf);
}
else
{
printf("%10ld ", (long)sb.st_size);
}
print_time(sb.st_mtime);
print_name_or_link(filename, opts, sb.st_mode);
}
bool can_recurse_dir(const char* parent, char* curr)
{
if (!strcmp(".", curr) || !strcmp("..", curr))
{
return false;
}
char path[2048];
sprintf(path, "%s/%s", parent, curr);
struct stat sb;
if (lstat(path, &sb) < 0)
{
perror(path);
exit(EX_IOERR);
}
return S_ISDIR(sb.st_mode);
}
void recurse_dirs(char* dir, struct Options opts)
{
DIR* dfd = opendir(dir);
struct dirent* dp = readdir(dfd);
printf("\n%s:\n", dir);
while ((dp = readdir(dfd)))
{
const bool omit_hidden = !opts.using_a && dp->d_name[0] == '.';
if (!omit_hidden)
{
if (opts.using_l)
{
display_stats(dir, dp->d_name, opts);
}
else
{
printf("%s\n", dp->d_name);
}
}
if (can_recurse_dir(dir, dp->d_name))
{
char next[1024];
sprintf(next, "%s/%s", dir, dp->d_name);
recurse_dirs(next, opts);
}
}
closedir(dfd);
}
static int cmp_lex(const void* p1, const void* p2)
{
const char* str1 = *(const void**)p1;
const char* str2 = *(const void**)p2;
return strcasecmp(str1, str2);
}
static int cmp_time(const void* p1, const void* p2)
{
const char* str1 = *(const char**)p1;
const char* str2 = *(const char**)p2;
time_t time1 = get_stats(str1).st_mtime;
time_t time2 = get_stats(str2).st_mtime;
return time1 < time2;
}
static int cmp_size(const void* p1, const void* p2)
{
const char* str1 = *(const char**)p1;
const char* str2 = *(const char**)p2;
long int size1 = get_stats(str1).st_size;
long int size2 = get_stats(str2).st_size;
return size1 < size2;
}
void display_dir(char* dir, struct Options opts)
{
DIR* dfd = opendir(dir);
struct dirent* dp = readdir(dfd);
long curr_alloc_amt = 30000;
char** dir_arr = malloc(curr_alloc_amt * sizeof(char*));
if (!dir_arr)
{
abort();
}
long int count = 0;
while (dp)
{
const bool omit_hidden = !opts.using_a && dp->d_name[0] == '.';
if (!omit_hidden)
{
if (count >= curr_alloc_amt)
{
curr_alloc_amt *= 2;
dir_arr = realloc(dir_arr, curr_alloc_amt * sizeof(char*));
if (!dir_arr)
{
abort();
}
}
dir_arr[count] = dp->d_name;
count++;
}
dp = readdir(dfd);
}
global_dir = dir;
if (!opts.using_U && opts.using_t)
{
qsort(dir_arr, count, sizeof(char*), cmp_time);
}
else if (!opts.using_U && opts.using_S)
{
qsort(dir_arr, count, sizeof(char*), cmp_size);
}
else if (!opts.using_U)
{
qsort(dir_arr, count, sizeof(char*), cmp_lex);
}
for (long int i = 0; i < count; ++i)
{
display_stats(dir, dir_arr[i], opts);
}
closedir(dfd);
free(dir_arr);
}
void scan_dirs(int count, char* args[], struct Options opts)
{
if (opts.using_d)
{
const bool no_dirs_given = (count - optind) == 0;
if (no_dirs_given)
{
display_stats(".", ".", opts);
}
// loop through directories
for (int i = optind; i < count; ++i)
{
display_stats(".", args[i], opts);
}
return;
}
// no other arguments
if (!opts.using_R && (optind == count))
{
display_dir(".", opts);
}
if (opts.using_R && !opts.using_d)
{
recurse_dirs(".", opts);
return;
}
const bool multiple_dirs = (count - optind) >= 2;
// loop through directories
for (int i = optind; i < count; ++i)
{
if (!is_dir(args[i]))
{
display_stats(".", args[i], opts);
continue;
}
// display directory name
// for multiple directories
if (multiple_dirs)
{
printf("\n%s:\n", args[i]);
}
if (!is_in_dir(".", args[i]))
{
continue;
}
display_dir(args[i], opts);
}
}
int main(int argc, char* argv[])
{
scan_dirs(argc, argv, get_opts(argc, argv));
return 0;
}
3 Answers 3
Range in
readable_fs()
.readable_fs()
should handle even larger sizes - consider future growth, so expand toK,M,G,T,P,E,Z,Y
. RefPedantically there is no safe guard with
double size
not being some number like1e100
and thus over-filingchar buf[10];
. To limit only local code that understands the limitations ofreadable_fs()
, make itstatic
and/or abort on insane numbers.Naked Magic numbers:
(curr_mon - 6)
. Maybe(curr_mon - OLD_DATE)
.can_recurse_dir() char path[2048];
Why 2048? Better to use a#define CAN_BUF_SIZE 2048
, etc.get_stats(const char* filename) char path[1024];
MaybeMAX_PATH_LEN
?char time_buf[128];
Why 128?print_name_or_link() char link_buf[512];
etc.long curr_alloc_amt = 30000;
(Why 30000. Whylong
and notsize_t
?)strftime
// Minor: () not needed // strftime(time_buf, sizeof(time_buf), format, t); // check result if (0 == strftime(time_buf, sizeof time_buf, format, t)) { // Handle failure somehow time_buf[0] = 0; abort(); }
No protection from over-run in various places. Suggest buffer scale by system macros
PATH_MAX
,NAME_MAX
, etc. Also considersnprintf()
and check its results.// char path[1024]; char path[PATH_MAX + 1 + NAME_MAX + 1]; sprintf(path, "%s/%s", global_dir, filename);
Incorrect use of
qsort()
The compare function expects
+,0,-
on less than, equal, or greater than, not 0,1. This can result in UB incmp_time()
and intcmp_size()
// return time1 < time2;` return time1 > time2 - time1 < time2; // Or reverse time1,time2
Consider an easier to code
*alloc()
style: Details// dir_arr = realloc(dir_arr, curr_alloc_amt * sizeof(char*)); dir_arr = realloc(dir_arr, sizeof *dir_arr * curr_alloc_amt);
Why use type
long
when field is typeoff_t
inst_size;
? Better to useoff_t
and not change type.main()
I'd expect the program to return useful codes on return than only 0 to indicate success/failure/type of failure.
const
Could use
const
, potentially better code and it conveys that*dir
is not modified.// display_dir(char* dir, display_dir(const char* dir,
Array elements are better indexed with
size_t
thanlong
inprint_name_or_link()
. See also #2: last magic number// long int count = 0; size_t count = 0;
[Edit]
Above idea also applies to
// long int size1 = get_stats(str1).st_size; // long int size2 = get_stats(str2).st_size; // return size1 < size2; off_t size1 = get_stats(str1).st_size; off_t size2 = get_stats(str2).st_size; return size1 > size2 - size1 < size2
To print types lacking a printf specifier, cast to the widest available integer of matching sign-ness. In this case, "ino_t shall be defined as unsigned integer" Ref and How to print types of unknown size like ino_t?
// printf("%ld ", (long)sb.st_ino); printf("%ju ", (uintmax_t) sb.st_ino);
-
\$\begingroup\$ There doesn't seem to be a specific way to format
off_t
inprintf()
, and other solutions I've looked at involves casting that value. \$\endgroup\$Jamal– Jamal2015年12月20日 23:53:07 +00:00Commented Dec 20, 2015 at 23:53 -
\$\begingroup\$ @Jamal Recommend: (best)
printf("%jd", (intmax) off_t_var);
orprintf("%lld", (long long) off_t_var);
or stackoverflow.com/questions/586928/… IAC, that is for printing. Continue to useoff_t
for math and related variables. \$\endgroup\$chux– chux2015年12月21日 00:10:20 +00:00Commented Dec 21, 2015 at 0:10 -
\$\begingroup\$ Thanks. I somehow didn't find the specific types for the others. As for the returning in
main()
, I can change the relevant display functions to returnint
s so that they can fall back tomain()
. \$\endgroup\$Jamal– Jamal2015年12月21日 00:39:34 +00:00Commented Dec 21, 2015 at 0:39
Looks pretty clean for the most part! Here's the source of the actual GNU implementation used on most systems for comparison (if you already haven't taken a look at it). Just a few notes:
Looks like part of this function could be converted into a
for
loop:void readable_fs(double size, char* buf) { const char* units[] = { "", "K", "M", "G", "T" }; int i = 0; while (size > 1024) { size /= 1024; ++i; } sprintf(buf, "%.*f%s", i, size, units[i]); }
Would still retain the same functionality, but reduce LOC and make it more readable/concise.
I also noticed you have a magic number in the above function (as well as some other places). I think it best in this situation to just place a comment above its usage to specify what that number is.
I'm not a big fan of the
using_t
name, since the_t
is reserved for POSIX type implementations.t_option
would be better? But if you change that one name you may as well be consistent and change all the others to be similar.Prefer
snprintf()
tosprintf()
.You should use
size_t
in yourfor
loop counter variable here:for (long int i = 0; i < count; ++i)
You could use this in other places too, as I see you are starting from 0 and only counting up (meaning you'd have no negative numbers, right?). I'm not saying
size_t
is a good fit for every situation though, use the smallest type that will fit the situation as appropriately as possible to.stdint.h
is your best friend here.No reason to
return 0;
, can/should be removed.
-
\$\begingroup\$ Any ideas about the global (even though that source uses a bunch)? it seems that I can remove its use in other places but
display_dir()
, otherwise the code would break if I specify a different directory. \$\endgroup\$Jamal– Jamal2015年12月19日 23:53:58 +00:00Commented Dec 19, 2015 at 23:53 -
\$\begingroup\$ @Jamal Here's a question for you: Is that REALLY needed at a global scope? Another way to pose the question: does it have the smallest scope possible? If you believe so, then I think it's permissible. \$\endgroup\$syb0rg– syb0rg2015年12月20日 00:11:32 +00:00Commented Dec 20, 2015 at 0:11
-
\$\begingroup\$ Yes, I believe it is. I could probably find a workaround if it were easier to pass around the directory, such as in the sorting functions. \$\endgroup\$Jamal– Jamal2015年12月20日 00:28:36 +00:00Commented Dec 20, 2015 at 0:28
Well written code. Good job.
One thing I'd change is to instead use some meaningful name for each flag, instead of the flag letter. I don't recall by memory what -U
does in the command, for instance, and so seeing a using_U
in the code will evoke the same issue. You might remember the meaning of each flag right now since you have just implemented it, but I guarantee you'll forget them pretty soon. So I'd instead write struct Options
someway like this:
struct Options
{
bool print_all; // -a, --all
bool list_dirs_only; // -d, --directory
bool human_readable_sizes; // -h, --human-readable
// etcetera
};
By the way, you're not handling the long form of each flag, which actually I don't think are supported everywhere, but as the link you've provided shows, seem to be defined. Should be easy for you to extend support for them using getop_long
if you're interested...
display_stats()
, display_dir()
, recurse_dirs()
should all take the dir
name string by const char *
. The present non-const signatures pass the wrong impression that the strings can be modified. It doesn't seems to be the case, as far as I can tell.
Also, global_dir
might be better named as current_dir
? That one also should be a const pointer, regardless.
I'm not sure why you marked some of the functions static
. All except main()
could be static
, but since this is a single file program, none really need to be static
. So in this case, I'd personally go all or nothing.
I suppose that here you actually meant to cast to char**
instead? It's all the same for the C compiler but might give the wrong idea to the readers:
static int cmp_lex(const void* p1, const void* p2) { const char* str1 = *(const void**)p1; // char** instead? const char* str2 = *(const void**)p2; return strcasecmp(str1, str2); }
This is silly, but that last line that breaks alignment really gives me OCD ;)
switch (mode & S_IFMT) { case S_IFREG: putchar('-'); break; case S_IFDIR: putchar('d'); break; case S_IFLNK: putchar('l'); break; case S_IFCHR: putchar('c'); break; case S_IFBLK: putchar('b'); break; case S_IFSOCK: putchar('s'); break; // <-- oh god... case S_IFIFO: putchar('f'); break; }
-
2\$\begingroup\$ I was hesitant about long options, but after just ten minutes or so, I think I figured them out. :-) I can work on that OCD spacing as well. ;-) I blame the Powers That Be for not making it
S_IFSOK
or something like that. \$\endgroup\$Jamal– Jamal2015年12月20日 03:41:55 +00:00Commented Dec 20, 2015 at 3:41