19
\$\begingroup\$

As part of a past assignment, I've expanded on this implementation of ls to have more options available, including non-standard ones.

It currently supports these (described on this page):

  • -a
  • -d
  • -h
  • -i
  • -l
  • -p
  • -Q
  • -R
  • -S
  • -t
  • -U

Other than that, it doesn't list a total field nor does it have coloring. It also doesn't have the same adaptive column alignments, but I have it done with fixed amounts so that it's still easier to read each column with -l.

I've already ran this through Valgrind with and without different options, and there appears to be no memory leaks. I think it can still be structured better and simplified, though.

#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <getopt.h>
#include <grp.h>
#include <pwd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sysexits.h>
#include <time.h>
#include <unistd.h>
static char* global_dir = ".";
struct Options
{
 bool using_a;
 bool using_d;
 bool using_h;
 bool using_i;
 bool using_l;
 bool using_p;
 bool using_Q;
 bool using_R;
 bool using_S;
 bool using_t;
 bool using_U;
};
static void init_opts(struct Options* opts)
{
 opts->using_a = false;
 opts->using_d = false;
 opts->using_h = false;
 opts->using_i = false;
 opts->using_l = false;
 opts->using_p = false;
 opts->using_Q = false;
 opts->using_R = false;
 opts->using_S = false;
 opts->using_t = false;
 opts->using_U = false;
}
struct Options get_opts(int count, char* args[])
{
 struct Options opts;
 init_opts(&opts);
 int opt;
 while ((opt = getopt(count, args, "adhilpQRStU")) != -1)
 {
 switch (opt)
 {
 case 'a': opts.using_a = true; break;
 case 'd': opts.using_d = true; break;
 case 'h': opts.using_h = true; break;
 case 'i': opts.using_i = true; break;
 case 'l': opts.using_l = true; break;
 case 'p': opts.using_p = true; break;
 case 'Q': opts.using_Q = true; break;
 case 'R': opts.using_R = true; break;
 case 'S': opts.using_S = true; break;
 case 't': opts.using_t = true; break;
 case 'U': opts.using_U = true; break;
 case '?': exit(EX_USAGE);
 }
 }
 
 return opts;
}
static void print_permissions(mode_t mode)
{
 putchar((mode & S_IRUSR) ? 'r' : '-');
 putchar((mode & S_IWUSR) ? 'w' : '-');
 putchar((mode & S_IXUSR) ? 'x' : '-');
 putchar((mode & S_IRGRP) ? 'r' : '-');
 putchar((mode & S_IWGRP) ? 'w' : '-');
 putchar((mode & S_IXGRP) ? 'x' : '-');
 putchar((mode & S_IROTH) ? 'r' : '-');
 putchar((mode & S_IWOTH) ? 'w' : '-');
 putchar((mode & S_IXOTH) ? 'x' : '-');
}
static void print_filetype(mode_t mode)
{
 switch (mode & S_IFMT)
 {
 case S_IFREG: putchar('-'); break;
 case S_IFDIR: putchar('d'); break;
 case S_IFLNK: putchar('l'); break;
 case S_IFCHR: putchar('c'); break;
 case S_IFBLK: putchar('b'); break;
 case S_IFSOCK: putchar('s'); break;
 case S_IFIFO: putchar('f'); break;
 }
}
void readable_fs(double size, char* buf)
{
 const char* units[] = { "", "K", "M", "G", "T" };
 int i = 0;
 
 while (size > 1024)
 {
 size /= 1024;
 ++i;
 }
 
 sprintf(buf, "%.*f%s", i, size, units[i]);
}
void print_time(time_t mod_time)
{
 // get current time with year
 time_t curr_time;
 time(&curr_time);
 struct tm* t = localtime(&curr_time);
 const int curr_mon = t->tm_mon;
 const int curr_yr = 1970 + t->tm_year;
 // get mod time and year
 t = localtime(&mod_time);
 const int mod_mon = t->tm_mon;
 const int mod_yr = 1970 + t->tm_year;
 // determine format based on years
 const char* format = ((mod_yr == curr_yr)
 && (mod_mon >= (curr_mon - 6)))
 ? "%b %e %H:%M"
 : "%b %e %Y";
 char time_buf[128];
 strftime(time_buf, sizeof(time_buf), format, t);
 printf("%s", time_buf);
}
struct stat get_stats(const char* filename)
{
 char path[1024];
 sprintf(path, "%s/%s", global_dir, filename);
 struct stat sb;
 if (lstat(path, &sb) < 0)
 { 
 perror(path);
 exit(EX_IOERR);
 }
 return sb;
}
bool is_dir(const char* filename)
{
 struct stat sb = get_stats(filename);
 if (lstat(filename, &sb) < 0)
 {
 perror(filename);
 return false;
 }
 return (sb.st_mode & S_IFDIR) ? true : false;
}
bool is_in_dir(const char* dir, const char* filename)
{
 DIR* dfd = opendir(dir);
 if (!dfd)
 {
 perror(dir);
 return false;
 }
 struct dirent* dp = readdir(dfd);
 while (dp)
 {
 if (strcmp(filename, dp->d_name) == 0)
 {
 closedir(dfd);
 return true;
 } 
 dp = readdir(dfd);
 }
 
 fprintf(stderr, "file \'%s\' not found\n", filename);
 
 closedir(dfd);
 return false;
}
void print_name_or_link(const char* filename, struct Options opts, mode_t mode)
{
 if (mode & S_IFLNK)
 {
 char link_buf[512];
 int count = readlink(filename, link_buf, sizeof(link_buf));
 if (count >= 0)
 {
 link_buf[count] = '0円';
 
 if (opts.using_Q)
 {
 printf(" \"%s\" -> \"%s\"\n", filename, link_buf);
 }
 else
 {
 printf(" %s -> %s \n", filename, link_buf);
 }
 
 return;
 }
 }
 if (opts.using_Q)
 {
 printf(" \"%s\"", filename);
 }
 else
 {
 printf(" %s", filename);
 }
 
 if (opts.using_p && is_dir(filename))
 {
 putchar('/');
 }
 
 putchar('\n');
}
void display_stats(char* dir, char* filename, struct Options opts)
{
 if (!is_in_dir(dir, filename))
 {
 return;
 }
 if (!opts.using_l)
 {
 printf("%s\n", filename);
 return;
 }
 global_dir = dir;
 struct stat sb = get_stats(filename);
 if (opts.using_i)
 {
 printf("%ld ", (long)sb.st_ino);
 }
 
 print_filetype(sb.st_mode);
 print_permissions(sb.st_mode);
 printf(" %d ", sb.st_nlink);
 printf("%10s ", getpwuid(sb.st_uid)->pw_name);
 printf("%10s", getgrgid(sb.st_gid)->gr_name);
 
 if (opts.using_h)
 {
 char buf[10];
 readable_fs(sb.st_size, buf);
 printf(" %8s ", buf);
 }
 else
 {
 printf("%10ld ", (long)sb.st_size);
 }
 
 print_time(sb.st_mtime);
 print_name_or_link(filename, opts, sb.st_mode);
}
bool can_recurse_dir(const char* parent, char* curr)
{
 if (!strcmp(".", curr) || !strcmp("..", curr))
 {
 return false;
 }
 
 char path[2048];
 sprintf(path, "%s/%s", parent, curr);
 struct stat sb;
 
 if (lstat(path, &sb) < 0)
 {
 perror(path);
 exit(EX_IOERR);
 }
 
 return S_ISDIR(sb.st_mode);
}
void recurse_dirs(char* dir, struct Options opts)
{
 DIR* dfd = opendir(dir);
 struct dirent* dp = readdir(dfd);
 printf("\n%s:\n", dir);
 
 while ((dp = readdir(dfd)))
 {
 const bool omit_hidden = !opts.using_a && dp->d_name[0] == '.';
 if (!omit_hidden)
 {
 if (opts.using_l)
 {
 display_stats(dir, dp->d_name, opts);
 }
 else
 {
 printf("%s\n", dp->d_name);
 }
 }
 
 if (can_recurse_dir(dir, dp->d_name))
 {
 char next[1024];
 sprintf(next, "%s/%s", dir, dp->d_name);
 recurse_dirs(next, opts);
 }
 }
 
 closedir(dfd);
}
static int cmp_lex(const void* p1, const void* p2)
{
 const char* str1 = *(const void**)p1;
 const char* str2 = *(const void**)p2;
 return strcasecmp(str1, str2);
}
static int cmp_time(const void* p1, const void* p2)
{
 const char* str1 = *(const char**)p1;
 const char* str2 = *(const char**)p2;
 time_t time1 = get_stats(str1).st_mtime;
 time_t time2 = get_stats(str2).st_mtime;
 return time1 < time2;
}
static int cmp_size(const void* p1, const void* p2)
{
 const char* str1 = *(const char**)p1;
 const char* str2 = *(const char**)p2;
 long int size1 = get_stats(str1).st_size;
 long int size2 = get_stats(str2).st_size;
 return size1 < size2;
}
void display_dir(char* dir, struct Options opts)
{
 DIR* dfd = opendir(dir);
 struct dirent* dp = readdir(dfd);
 long curr_alloc_amt = 30000;
 char** dir_arr = malloc(curr_alloc_amt * sizeof(char*));
 if (!dir_arr)
 {
 abort();
 }
 long int count = 0;
 while (dp)
 {
 const bool omit_hidden = !opts.using_a && dp->d_name[0] == '.';
 if (!omit_hidden)
 {
 if (count >= curr_alloc_amt)
 {
 curr_alloc_amt *= 2;
 dir_arr = realloc(dir_arr, curr_alloc_amt * sizeof(char*));
 if (!dir_arr)
 {
 abort();
 }
 }
 dir_arr[count] = dp->d_name;
 count++;
 }
 dp = readdir(dfd);
 }
 global_dir = dir;
 if (!opts.using_U && opts.using_t)
 {
 qsort(dir_arr, count, sizeof(char*), cmp_time);
 }
 else if (!opts.using_U && opts.using_S)
 {
 qsort(dir_arr, count, sizeof(char*), cmp_size);
 }
 else if (!opts.using_U)
 {
 qsort(dir_arr, count, sizeof(char*), cmp_lex);
 }
 for (long int i = 0; i < count; ++i)
 {
 display_stats(dir, dir_arr[i], opts);
 }
 closedir(dfd);
 free(dir_arr);
}
void scan_dirs(int count, char* args[], struct Options opts)
{
 if (opts.using_d)
 {
 const bool no_dirs_given = (count - optind) == 0;
 if (no_dirs_given)
 {
 display_stats(".", ".", opts);
 }
 // loop through directories
 for (int i = optind; i < count; ++i)
 {
 display_stats(".", args[i], opts);
 }
 return;
 }
 // no other arguments
 if (!opts.using_R && (optind == count))
 {
 display_dir(".", opts);
 }
 
 if (opts.using_R && !opts.using_d)
 {
 recurse_dirs(".", opts);
 return;
 }
 const bool multiple_dirs = (count - optind) >= 2;
 // loop through directories
 for (int i = optind; i < count; ++i)
 {
 if (!is_dir(args[i]))
 {
 display_stats(".", args[i], opts);
 continue;
 }
 // display directory name
 // for multiple directories
 if (multiple_dirs)
 {
 printf("\n%s:\n", args[i]);
 }
 if (!is_in_dir(".", args[i]))
 {
 continue;
 }
 display_dir(args[i], opts);
 }
}
int main(int argc, char* argv[])
{
 scan_dirs(argc, argv, get_opts(argc, argv));
 
 return 0;
}
mdfst13
22.4k6 gold badges34 silver badges70 bronze badges
asked Dec 19, 2015 at 20:50
\$\endgroup\$
0

3 Answers 3

6
\$\begingroup\$
  1. Range in readable_fs().

    readable_fs() should handle even larger sizes - consider future growth, so expand to K,M,G,T,P,E,Z,Y. Ref

    Pedantically there is no safe guard with double size not being some number like 1e100 and thus over-filing char buf[10];. To limit only local code that understands the limitations of readable_fs(), make it static and/or abort on insane numbers.

  2. Naked Magic numbers:

    (curr_mon - 6). Maybe (curr_mon - OLD_DATE).

    can_recurse_dir() char path[2048]; Why 2048? Better to use a #define CAN_BUF_SIZE 2048, etc.

    get_stats(const char* filename) char path[1024]; Maybe MAX_PATH_LEN?

    char time_buf[128]; Why 128?

    print_name_or_link() char link_buf[512]; etc.

    long curr_alloc_amt = 30000; (Why 30000. Why long and not size_t?)

  3. strftime

    // Minor: () not needed
    // strftime(time_buf, sizeof(time_buf), format, t);
    // check result
    if (0 == strftime(time_buf, sizeof time_buf, format, t)) {
     // Handle failure somehow
     time_buf[0] = 0;
     abort();
    }
    
  4. No protection from over-run in various places. Suggest buffer scale by system macros PATH_MAX, NAME_MAX, etc. Also consider snprintf() and check its results.

    // char path[1024];
    char path[PATH_MAX + 1 + NAME_MAX + 1];
    sprintf(path, "%s/%s", global_dir, filename);
    
  5. Incorrect use of qsort()

    The compare function expects +,0,- on less than, equal, or greater than, not 0,1. This can result in UB in cmp_time() and int cmp_size()

    // return time1 < time2;`
    return time1 > time2 - time1 < time2; // Or reverse time1,time2
    
  6. Consider an easier to code *alloc() style: Details

    // dir_arr = realloc(dir_arr, curr_alloc_amt * sizeof(char*));
    dir_arr = realloc(dir_arr, sizeof *dir_arr * curr_alloc_amt);
    
  7. Why use type long when field is type off_t in st_size;? Better to use off_t and not change type.

  8. main()

    I'd expect the program to return useful codes on return than only 0 to indicate success/failure/type of failure.

  9. const

    Could use const, potentially better code and it conveys that *dir is not modified.

    // display_dir(char* dir,
    display_dir(const char* dir,
    
  10. Array elements are better indexed with size_t than long in print_name_or_link(). See also #2: last magic number

    // long int count = 0;
    size_t count = 0;
    

    [Edit]

  11. Above idea also applies to

    // long int size1 = get_stats(str1).st_size;
    // long int size2 = get_stats(str2).st_size;
    // return size1 < size2;
    off_t size1 = get_stats(str1).st_size;
    off_t size2 = get_stats(str2).st_size;
    return size1 > size2 - size1 < size2
    
  12. To print types lacking a printf specifier, cast to the widest available integer of matching sign-ness. In this case, "ino_t shall be defined as unsigned integer" Ref and How to print types of unknown size like ino_t?

    // printf("%ld ", (long)sb.st_ino);
    printf("%ju ", (uintmax_t) sb.st_ino);
    
answered Dec 20, 2015 at 22:06
\$\endgroup\$
3
  • \$\begingroup\$ There doesn't seem to be a specific way to format off_t in printf(), and other solutions I've looked at involves casting that value. \$\endgroup\$ Commented Dec 20, 2015 at 23:53
  • \$\begingroup\$ @Jamal Recommend: (best) printf("%jd", (intmax) off_t_var); or printf("%lld", (long long) off_t_var); or stackoverflow.com/questions/586928/… IAC, that is for printing. Continue to use off_t for math and related variables. \$\endgroup\$ Commented Dec 21, 2015 at 0:10
  • \$\begingroup\$ Thanks. I somehow didn't find the specific types for the others. As for the returning in main(), I can change the relevant display functions to return ints so that they can fall back to main(). \$\endgroup\$ Commented Dec 21, 2015 at 0:39
10
\$\begingroup\$

Looks pretty clean for the most part! Here's the source of the actual GNU implementation used on most systems for comparison (if you already haven't taken a look at it). Just a few notes:

  • Looks like part of this function could be converted into a for loop:

    void readable_fs(double size, char* buf)
    {
     const char* units[] = { "", "K", "M", "G", "T" };
     int i = 0;
     while (size > 1024)
     {
     size /= 1024;
     ++i;
     }
     sprintf(buf, "%.*f%s", i, size, units[i]);
    }
    

    Would still retain the same functionality, but reduce LOC and make it more readable/concise.

  • I also noticed you have a magic number in the above function (as well as some other places). I think it best in this situation to just place a comment above its usage to specify what that number is.

  • I'm not a big fan of the using_t name, since the _t is reserved for POSIX type implementations. t_option would be better? But if you change that one name you may as well be consistent and change all the others to be similar.

  • Prefer snprintf() to sprintf().

  • You should use size_t in your for loop counter variable here:

    for (long int i = 0; i < count; ++i)
    

    You could use this in other places too, as I see you are starting from 0 and only counting up (meaning you'd have no negative numbers, right?). I'm not saying size_t is a good fit for every situation though, use the smallest type that will fit the situation as appropriately as possible to. stdint.h is your best friend here.

  • No reason to return 0;, can/should be removed.

answered Dec 19, 2015 at 23:47
\$\endgroup\$
3
  • \$\begingroup\$ Any ideas about the global (even though that source uses a bunch)? it seems that I can remove its use in other places but display_dir(), otherwise the code would break if I specify a different directory. \$\endgroup\$ Commented Dec 19, 2015 at 23:53
  • \$\begingroup\$ @Jamal Here's a question for you: Is that REALLY needed at a global scope? Another way to pose the question: does it have the smallest scope possible? If you believe so, then I think it's permissible. \$\endgroup\$ Commented Dec 20, 2015 at 0:11
  • \$\begingroup\$ Yes, I believe it is. I could probably find a workaround if it were easier to pass around the directory, such as in the sorting functions. \$\endgroup\$ Commented Dec 20, 2015 at 0:28
6
\$\begingroup\$

Well written code. Good job.

One thing I'd change is to instead use some meaningful name for each flag, instead of the flag letter. I don't recall by memory what -U does in the command, for instance, and so seeing a using_U in the code will evoke the same issue. You might remember the meaning of each flag right now since you have just implemented it, but I guarantee you'll forget them pretty soon. So I'd instead write struct Options someway like this:

struct Options
{
 bool print_all; // -a, --all
 bool list_dirs_only; // -d, --directory
 bool human_readable_sizes; // -h, --human-readable
 // etcetera
};

By the way, you're not handling the long form of each flag, which actually I don't think are supported everywhere, but as the link you've provided shows, seem to be defined. Should be easy for you to extend support for them using getop_long if you're interested...


display_stats(), display_dir(), recurse_dirs() should all take the dir name string by const char *. The present non-const signatures pass the wrong impression that the strings can be modified. It doesn't seems to be the case, as far as I can tell.

Also, global_dir might be better named as current_dir? That one also should be a const pointer, regardless.

I'm not sure why you marked some of the functions static. All except main() could be static, but since this is a single file program, none really need to be static. So in this case, I'd personally go all or nothing.


I suppose that here you actually meant to cast to char** instead? It's all the same for the C compiler but might give the wrong idea to the readers:

static int cmp_lex(const void* p1, const void* p2)
{
 const char* str1 = *(const void**)p1; // char** instead?
 const char* str2 = *(const void**)p2;
 return strcasecmp(str1, str2);
}

This is silly, but that last line that breaks alignment really gives me OCD ;)

switch (mode & S_IFMT)
{
 case S_IFREG: putchar('-'); break;
 case S_IFDIR: putchar('d'); break;
 case S_IFLNK: putchar('l'); break;
 case S_IFCHR: putchar('c'); break;
 case S_IFBLK: putchar('b'); break;
 case S_IFSOCK: putchar('s'); break; // <-- oh god...
 case S_IFIFO: putchar('f'); break;
}
answered Dec 20, 2015 at 2:59
\$\endgroup\$
1
  • 2
    \$\begingroup\$ I was hesitant about long options, but after just ten minutes or so, I think I figured them out. :-) I can work on that OCD spacing as well. ;-) I blame the Powers That Be for not making it S_IFSOK or something like that. \$\endgroup\$ Commented Dec 20, 2015 at 3:41

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.