4
\$\begingroup\$

I am trying to make a simple clone of the command line utility grep. I'm working on it just for fun and to practice Go. The thing is, I am trying to make it in a way that if several files are given at the command line, a different goroutine is generated to speed up the process. Even so, standard grep still gets results faster by an amount of around 60% less processing time.

Here it is the code for it. Can you please provide some opinions about it?

package main
import (
 "bufio"
 "flag"
 "fmt"
 "io"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
type result struct {
 Filename string
 Line string
 LineNumber int
 Error error
}
var strRex string
var filenames []string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults []result
var verbose = false
var recursive string
var recursiveFileList []string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool
func init() {
 var rexError error
 flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
 flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
 flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
 flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
 flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")
 flag.Parse()
 if strRex == "" {
 fmt.Fprintf(os.Stderr, "The '-r' (regular expression flag is mandatory)\n")
 os.Exit(1)
 }
 regRex, rexError = regexp.Compile(strRex)
 if rexError != nil {
 fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
 os.Exit(2)
 }
 rexfileFilter, rexError = regexp.Compile(fileFilter)
 if rexError != nil {
 fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", rexfileFilter, rexError.Error())
 os.Exit(3)
 }
 if recursive != "" {
 var err error
 filenames, err = walkDir(recursive)
 if err != nil {
 fmt.Fprintf(os.Stderr, "%s", err.Error())
 }
 } else {
 filenames = flag.Args()
 }
}
func main() {
 stat, err := os.Stdin.Stat()
 if err != nil {
 fmt.Fprintf(os.Stderr, "There is an error reading from stdin : %s", err)
 os.Exit(3)
 }
 if (stat.Mode() & os.ModeNamedPipe) != 0 {
 grepStdin(os.Stdin, regRex)
 } else {
 chResults := make(chan *result, 4)
 wg.Add(len(filenames))
 for _, fn := range filenames {
 go grep(fn, regRex, &wg, chResults)
 }
 go func(wait *sync.WaitGroup, ch chan<- *result) {
 wg.Wait()
 close(ch)
 }(&wg, chResults)
 for res := range chResults {
 if verbose {
 formatRes(res, 1)
 } else {
 formatRes(res, 2)
 }
 }
 }
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
 bf := bufio.NewScanner(ptr)
 var lineno = 1
 for bf.Scan() {
 // There is no XOR in Golang, so you ahve to do this :
 if line := bf.Text(); (reg.Match([]byte(line)) && !inverseSearch) || (!reg.Match([]byte(line)) && inverseSearch) {
 formatRes(&result{
 Line: line,
 LineNumber: lineno,
 Error: nil,
 }, 3)
 }
 lineno++
 }
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup, ch chan<- *result) {
 fd, err := os.Open(file)
 if err != nil {
 ch <- &result{
 Filename: file,
 Error: err,
 }
 }
 bf := bufio.NewScanner(fd)
 var lineno = 1
 for bf.Scan() {
 // There is no XOR in Golang, so you ahve to do this :
 if line := bf.Text(); (reg.Match([]byte(line)) && !inverseSearch) || (!reg.Match([]byte(line)) && inverseSearch) {
 ch <- &result{
 Filename: file,
 Line: line,
 LineNumber: lineno,
 Error: nil,
 }
 }
 lineno++
 }
 wg.Done()
}
func formatRes(r *result, format int) {
 if format == 1 {
 if r.Error == nil {
 fmt.Printf("%d - %s - %s\n", r.LineNumber, r.Filename, r.Line)
 } else {
 fmt.Fprintf(os.Stderr, "%s - %s \n", r.Filename, r.Error)
 }
 }
 if format == 2 {
 if r.Error == nil {
 fmt.Printf("%s\n", r.Line)
 } else {
 fmt.Fprintf(os.Stderr, "%s - %s \n", r.Filename, r.Error)
 }
 }
 if format == 3 {
 if r.Error == nil {
 fmt.Printf("%s\n", r.Line)
 } else {
 fmt.Fprintf(os.Stderr, "%s\n", r.Error)
 }
 }
}
func walkDir(path string) ([]string, error) {
 list := make([]string, 0, 50)
 err := filepath.Walk(".",
 func(path string, info os.FileInfo, err error) error {
 if err != nil {
 return err
 }
 if fileFilter != "" {
 if rexfileFilter.Match([]byte(filepath.Base(path))) {
 list = append(list, path)
 }
 } else {
 list = append(list, path)
 }
 return nil // Unreachable code
 })
 if err != nil {
 return nil, err
 }
 return list, nil
}
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Dec 30, 2018 at 7:11
\$\endgroup\$
1
  • 3
    \$\begingroup\$ Code must be correct. To be sure that code is correct, code must be readable. I find your double-spaced code to be unreadable. \$\endgroup\$ Commented Dec 30, 2018 at 13:06

2 Answers 2

4
\$\begingroup\$
if line := bf.Text(); (reg.Match([]byte(line)) && !inverseSearch) || (!reg.Match([]byte(line)) && inverseSearch) {

As in other C-like languages, golang evaluates binary logical operators conditionally left-to-right. As written, the program is often going to evaluate the reg.Match() twice, because it appears twice and in each subexpression it is tested before inverseSearch. As this is the program's most expensive operation, that's significant for performance.

if line := bf.Text(); (!inverseSearch && reg.Match([]byte(line))) || (inverseSearch && !reg.Match([]byte(line))) {

should avoid the double evaluation. Or write an xor helper function.

Other things:

Don't test for stdin being usable if you aren't reading from it. It's common for programs executing a program in a subprocess to close filehandles that the program shouldn't need to use. In other words /bin/grep foo filename <&- works, your ./grep -r foo filename <&- does not and should.

Rewrite/replace grepStdin() so that it reuses grep(). Nobody likes having the algorithm implemented twice in the same program. You can use /dev/stdin as a filename for stdin on *nix; or, keep the file opening part separate and have a common function for grepping over the opened file handle used by both codepaths.

The format parameter to formatRes uses magic constant values that one has to read its implementation to understand. It would be better to replace it with verbose bool so the verbose flag can be passed directly and its meaning is then obvious. The stdin case would be better not to treat specially here also.

answered Dec 30, 2018 at 16:23
\$\endgroup\$
0
6
\$\begingroup\$

The thing is, even so, standard grep still gets results faster by an amount of 60 % less processing time

For more information on what makes GNU grep fast, read this by the original author.


Here is some feedback on writing idiomatic Go.

Lines

Your code is oddly formatted.

It is very uncommon to have empty lines everywhere. In your code, empty lines more than double the length of your code. In most cases, I only add extra lines to separate functions, structs, if statements, for loops, etc.

package main
import (
 "bufio"
 "flag"
 "fmt"
 "io"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
type result struct {
 Filename string
 Line string
 LineNumber int
 Error error
}

Becomes:

package main
import (
 "bufio"
 "flag"
 "fmt"
 "io"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
type result struct {
 Filename string
 Line string
 LineNumber int
 Error error
}

Line length

Keep lines to a length of 80 characters. It makes code easier to read on smaller monitors or if you split your screen to view multiple things.

flag.StringVar(&strRex, "r", "", "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false, "It sets verbose output (Basically showing filename and line number for each match)")
flag.BoolVar(&inverseSearch, "i", false, "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "", "Recursively find all files starting from the current folder and apply the given search to them")
flag.StringVar(&fileFilter, "FF", "", "Filter to be applied to the filenames when used recursevily")

Becomes:

flag.StringVar(&strRex, "r", "",
 "Regular expresion to match against the input files")
flag.BoolVar(&verbose, "v", false,
 "It sets verbose output (Basically showing filename and line number "+
 "for each match)")
flag.BoolVar(&inverseSearch, "i", false,
 "It does what you might expect.. reverse the search")
flag.StringVar(&recursive, "R", "",
 "Recursively find all files starting from the current folder and "+
 "apply the given search to them")
flag.StringVar(&fileFilter, "FF", "",
 "Filter to be applied to the filenames when used recursevily")

Use a C-style for loop

In grepStdin() you initialize lineno, increment it and only use it in the for loop. That's a standard for loop. Once inside the for loop, we can rename it to l because it's purpose is clear from it's usage.

var lineno = 1
for bf.Scan() {
 // There is no XOR in Golang, so you ahve to do this:
 if line := bf.Text(); (reg.Match([]byte(line)) && !inverseSearch) || (!reg.Match([]byte(line)) && inverseSearch) {
 formatRes(&result{
 Line: line,
 LineNumber: lineno,
 Error: nil,
 }, 3)
 }
 lineno++
}

Becomes:

for l := 1; bf.Scan(); l++ {
 // There is no XOR in Golang, so you ahve to do this:
 if line := bf.Text(); (reg.Match([]byte(line)) && !inverseSearch) || (!reg.Match([]byte(line)) && inverseSearch) {
 formatRes(&result{
 Line: line,
 LineNumber: l,
 Error: nil,
 }, 3)
 }
}

Combine multiple var declarations

You can combine multiple variables declared with var as such:

var strRex string
var filenames []string
var regRex *regexp.Regexp
var wg sync.WaitGroup
var allResults []result
var verbose = false
var recursive string
var recursiveFileList []string
var fileFilter string
var rexfileFilter *regexp.Regexp
var inverseSearch bool

Becomes:

var (
 strRex string
 filenames []string
 regRex *regexp.Regexp
 wg sync.WaitGroup
 allResults []result
 verbose = false
 recursive string
 recursiveFileList []string
 fileFilter string
 rexfileFilter *regexp.Regexp
 inverseSearch bool
)

This is far more readable.

Use log to write to standard error

You can utilize the log package to print (possibly fatal) errors to standard error.

fmt.Fprintf(os.Stderr,
 "The '-r' (regular expression flag is mandatory)\n")
os.Exit(1)

Becomes:

if strRex == "" {
 log.Fatalln("The regular expression flag '-r' is mandatory")
}

And

if rexError != nil {
 fmt.Fprintf(os.Stderr, "Your regex '%s' cant compile. Error : %s", strRex, rexError.Error())
 os.Exit(2)
}

Becomes:

if rexError != nil {
 log.Fatalf("Your regex '%s' cant compile. Error : %s\n", strRex,
 rexError)
}

Notice that you don't need to call Error() to get the string from it.

By doing this, you won't get custom return values. I don't think they're very useful in lieu of a good error message.

Combine if and assignment

var err error
filenames, err = walkDir(recursive)
if err != nil {
 fmt.Fprintf(os.Stderr, "%s", err.Error())
}

Becomes:

var err error
if filenames, err = walkDir(recursive); err != nil {
 log.Println(err)
}

Move things out of the global scope

Your wg wait group is in the global scope. It doesn't need to be. You already pass wait to grep(), so use it.

allResults is never used. recursiveFileList is never used.

We can also move all of your flag variables to a common struct. This is just a preference, but it tells readers what is a flag and what isn't.

Use a switch statement

In formatRes you can use a switch statement instead of multiple if statements.

You can also clean up how you print things, like using Println instead of Printf.

switch format {
case 1:
 if r.Error == nil {
 fmt.Printf("%d - %s - %s\n", r.LineNumber, r.Filename, r.Line)
 } else {
 log.Printf("%s - %s\n", r.Filename, r.Error)
 }
 break
case 2:
 if r.Error == nil {
 fmt.Printf("%s\n", r.Line)
 } else {
 log.Printf("%s - %s\n", r.Filename, r.Error)
 }
 break
case 3:
 if r.Error == nil {
 fmt.Printf("%s\n", r.Line)
 } else {
 log.Println(r.Error)
 }
}

Move your condition to a separate function

Your condition

(reg.Match([]byte(line)) && !fl.inverseSearch) || (!reg.Match([]byte(line)) && fl.inverseSearch)

Is long, and as Colin points out, you can take advantage of short circuiting.

Go does not provide XOR, but it doesn't need to. I'll leave that for you to implement.

We can define a function as such:

func match(reg *regexp.Regexp, line string) bool {
 return !fl.inverseSearch && reg.Match([]byte(line)) || (fl.inverseSearch && !reg.Match([]byte(line)))
}

Use grepStdin() in grep()

As Colin says, they contain duplicate code. I'll leave that for you to implement.

Conclusion

There are many other places to clean up the code, but I think you'll get the gist. Here is the final code I ended up with:

package main
import (
 "bufio"
 "flag"
 "fmt"
 "io"
 "log"
 "os"
 "path/filepath"
 "regexp"
 "sync"
)
type result struct {
 Filename string
 Line string
 LineNumber int
 Error error
}
type flags struct {
 strRex string
 recursive string
 fileFilter string
 verbose bool
 inverseSearch bool
}
var (
 fl flags
 filenames []string
 regRex *regexp.Regexp
 rexfileFilter *regexp.Regexp
)
func init() {
 dfl := flags{
 strRex: "",
 verbose: false,
 inverseSearch: false,
 recursive: "",
 fileFilter: "",
 }
 var rexError error
 flag.StringVar(&fl.strRex, "r", dfl.strRex,
 "Regular expresion to match against the input files")
 flag.StringVar(&fl.recursive, "R", dfl.recursive,
 "Recursively find all files starting from the current folder and "+
 "apply the given search to them")
 flag.StringVar(&fl.fileFilter, "FF", dfl.fileFilter,
 "Filter to be applied to the filenames when used recursevily")
 flag.BoolVar(&fl.verbose, "v", dfl.verbose,
 "It sets verbose output (Basically showing filename and line number "+
 "for each match)")
 flag.BoolVar(&fl.inverseSearch, "i", dfl.inverseSearch,
 "It does what you might expect.. reverse the search")
 flag.Parse()
 if fl.strRex == "" {
 log.Fatalln("The regular expression flag '-r' is mandatory")
 }
 regRex, rexError = regexp.Compile(fl.strRex)
 if rexError != nil {
 log.Fatalf("Your regex '%s' cant compile. Error : %s\n", fl.strRex,
 rexError)
 }
 rexfileFilter, rexError = regexp.Compile(fl.fileFilter)
 if rexError != nil {
 log.Fatalf("Your regex '%s' cant compile. Error : %s", rexfileFilter,
 rexError)
 }
 if fl.recursive != "" {
 var err error
 if filenames, err = walkDir(fl.recursive); err != nil {
 log.Println(err)
 }
 } else {
 filenames = flag.Args()
 }
}
func main() {
 stat, err := os.Stdin.Stat()
 if err != nil {
 log.Fatalf("There is an error reading from stdin: %s", err)
 }
 var wait sync.WaitGroup
 if (stat.Mode() & os.ModeNamedPipe) != 0 {
 grepStdin(os.Stdin, regRex)
 } else {
 chResults := make(chan *result, 4)
 wait.Add(len(filenames))
 for _, fn := range filenames {
 go grep(fn, regRex, &wait, chResults)
 }
 go func(wait *sync.WaitGroup, ch chan<- *result) {
 wait.Wait()
 close(ch)
 }(&wait, chResults)
 for res := range chResults {
 if fl.verbose {
 formatRes(res, 1)
 } else {
 formatRes(res, 2)
 }
 }
 }
}
func match(reg *regexp.Regexp, line string) bool {
 return !fl.inverseSearch && reg.Match([]byte(line)) || (fl.inverseSearch && !reg.Match([]byte(line)))
}
func grepStdin(ptr io.Reader, reg *regexp.Regexp) {
 bf := bufio.NewScanner(ptr)
 for l := 1; bf.Scan(); l++ {
 if line := bf.Text(); match(reg, line) {
 formatRes(&result{
 Line: line,
 LineNumber: l,
 Error: nil,
 }, 3)
 }
 }
}
func grep(file string, reg *regexp.Regexp, wait *sync.WaitGroup,
 ch chan<- *result) {
 fd, err := os.Open(file)
 if err != nil {
 ch <- &result{
 Filename: file,
 Error: err,
 }
 }
 bf := bufio.NewScanner(fd)
 for l := 1; bf.Scan(); l++ {
 if line := bf.Text(); match(reg, line) {
 ch <- &result{
 Filename: file,
 Line: line,
 LineNumber: l,
 Error: nil,
 }
 }
 }
 wait.Done()
}
func formatRes(r *result, format int) {
 switch format {
 case 1:
 if r.Error == nil {
 fmt.Printf("%d - %s - %s\n", r.LineNumber, r.Filename, r.Line)
 } else {
 log.Printf("%s - %s\n", r.Filename, r.Error)
 }
 break
 case 2:
 if r.Error == nil {
 fmt.Println(r.Line)
 } else {
 log.Printf("%s - %s\n", r.Filename, r.Error)
 }
 break
 case 3:
 if r.Error == nil {
 fmt.Println(r.Line)
 } else {
 log.Println(r.Error)
 }
 }
}
func walkDir(path string) ([]string, error) {
 list := make([]string, 0, 50)
 err := filepath.Walk(".",
 func(path string, info os.FileInfo, err error) error {
 if err != nil {
 return err
 }
 if fl.fileFilter != "" {
 if rexfileFilter.Match([]byte(filepath.Base(path))) {
 list = append(list, path)
 }
 } else {
 list = append(list, path)
 }
 return nil
 })
 if err != nil {
 return nil, err
 }
 return list, nil
}

In my opinion, it's far more readable and to-the-point.

Hope this helps!

answered Dec 30, 2018 at 18:39
\$\endgroup\$
0

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.