I wrote a program for downloading files in concurrent / parallel (GOMAXPROCS> 1) manner.
This is my 2nd (non-toy) program written in Go. Please point out areas for improvement:
package main
import (
"errors"
"fmt"
"github.com/kennygrant/sanitize"
"github.com/nu7hatch/gouuid"
"github.com/op/go-logging"
"io"
"net/http"
"net/url"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
)
type TransferMessage struct {
urlstr string
content_length int64
transferred int64
finished bool
filename string
}
const read_chunk = 65536
func MaybeWarn(err error, format string, args ...interface{}) {
if err != nil {
format += " (%s)"
args = append(args, err)
log.Warning(format, args...)
}
}
func MaybeExitErr(err error, format string, args ...interface{}) {
if err != nil {
format += " (%s)"
args = append(args, err)
log.Error(format, args...)
os.Exit(1)
}
}
func GetContLen(resp *http.Response) (content_length int64) {
clen := resp.Header.Get("Content-Length")
content_length, err := strconv.ParseInt(clen, 10, 64)
if err != nil {
content_length = -1
MaybeWarn(err, "Cannot read content length for url: %s", resp.Request.URL)
}
return
}
func GetUrlFilePath(urlstr string) string {
urlp, err := url.Parse(urlstr)
fname := sanitize.Name(urlp.Path)
if err != nil || fname == "" {
uid, err := uuid.NewV4()
MaybeExitErr(err, "Cannot generate uid for filename, problem url: %s, error: %s", urlstr, err)
fname = uid.String()
}
cwd, _ := filepath.Abs(filepath.Dir(os.Args[0]))
return filepath.Join(cwd, fname)
}
func DownloadFile(urlstr string, c chan TransferMessage, resp *http.Response) {
content_length := GetContLen(resp)
fpath := GetUrlFilePath(urlstr)
fo, err := os.OpenFile(fpath, os.O_CREATE|os.O_WRONLY, 0666)
MaybeExitErr(err, "Problem opening file: %s", fpath)
defer fo.Close()
defer resp.Body.Close()
t := time.Now()
for readnum, total := int64(0), int64(0); ; readnum, err = io.CopyN(fo, resp.Body, read_chunk) {
total += readnum
msg := TransferMessage{content_length: content_length,
urlstr: urlstr,
transferred: total,
finished: false,
filename: fpath}
switch err {
case nil:
if time.Since(t).Seconds() > float64(0.2) {
log.Debug("Sending msg %s", msg)
c <- msg
t = time.Now()
}
case io.EOF:
log.Debug("Finished, sending msg %s", msg)
msg.finished = true
c <- msg
return
default:
MaybeWarn(err, "Problem copying file %s, skipping URL %s", fpath, urlstr)
msg.finished = true
c <- msg
return
}
}
}
func UrlRead(urlstr string, c chan TransferMessage) (start_success bool) {
hclient := new(http.Client)
req, err := http.NewRequest("GET", urlstr, nil)
MaybeWarn(err, "Problem making request for URL %s : %s, skipping this URL", urlstr, req)
resp, err := hclient.Do(req)
MaybeWarn(err, "Problem executing HTTP request for URL: %s, skipping this URL", urlstr)
if resp != nil {
switch resp.StatusCode {
case 200:
go DownloadFile(urlstr, c, resp)
return true
default:
MaybeWarn(errors.New(resp.Status), "Response status code: %s, skipping URL: %s", resp.StatusCode, urlstr)
}
}
return false
}
func SetupLog(level string) {
eff_level := logging.INFO
if strings.ToUpper(level) == "DEBUG" {
eff_level = logging.DEBUG
}
logging.SetLevel(eff_level, "")
}
func ConsumeAllNonBlocking(c chan TransferMessage) (msg TransferMessage, read bool) {
for {
flag := false
select {
case msg := <-c:
return msg, true
default:
flag = true
}
if flag {
break
}
}
return TransferMessage{}, false
}
func UpdateDownloadStatus(dchans *map[string]chan TransferMessage, dstatus *map[string]string) {
for urlstr, c := range *dchans {
last_msg, read_some := ConsumeAllNonBlocking(c)
log.Debug("last_msg %s read_some %s", last_msg, read_some)
if read_some {
switch last_msg.content_length {
case -1:
(*dstatus)[urlstr] = "?"
case 0:
(*dstatus)[urlstr] = "100%"
default:
(*dstatus)[urlstr] = fmt.Sprintf("%d%%", last_msg.transferred*100/last_msg.content_length)
}
if last_msg.finished {
(*dstatus)[urlstr] = "100%"
}
}
}
}
func PrintDownloadStatus(dchans *map[string]chan TransferMessage, dstatus *map[string]string) {
for urlstr, _ := range *dchans {
fmt.Printf("%s ", (*dstatus)[urlstr])
}
fmt.Println()
}
func CheckAllValues(dstatus map[string]string, val string) bool {
for _, v := range dstatus {
if v != val {
return false
}
}
return true
}
func WatchDownloads(dchans map[string]chan TransferMessage) {
dstatus := make(map[string]string)
for urlstr, _ := range dchans {
fmt.Printf("%s ", path.Base(GetUrlFilePath(urlstr)))
dstatus[urlstr] = "0%"
}
fmt.Println()
t1 := time.Now().Second()
for {
UpdateDownloadStatus(&dchans, &dstatus)
if CheckAllValues(dstatus, "100%") {
fmt.Println(strings.Repeat("- ", len(dstatus)))
PrintDownloadStatus(&dchans, &dstatus)
log.Debug("Completed %s", dstatus)
break
}
t2 := time.Now().Second()
if t2-t1 == 1 {
PrintDownloadStatus(&dchans, &dstatus)
t1 = t2
}
time.Sleep(time.Duration(100) * time.Millisecond)
}
}
func DispatchDownloads(urls []string) {
dchans := make(map[string]chan TransferMessage)
for _, urlstr := range urls {
c := make(chan TransferMessage, 10)
if UrlRead(urlstr, c) {
dchans[urlstr] = c
}
}
WatchDownloads(dchans)
}
var log = logging.MustGetLogger("")
func main() {
SetupLog("info")
DispatchDownloads(os.Args[1:])
}
1 Answer 1
It's super fine to use globals (like log
) in a small one-file program you've written. To make them more visible declare them at the very top of your file right after your types.
The global read_chunk
doesn't follow Go naming convention. readChunk
or readChunkSize
will do it better.
SetupLog
is used only once in the main function and it does unnecessarily job. Could be replaced with a single function call:
logging.SetLevel(logging.INFO, "")
I see how you've used MaybeWarn
, MaybeExitErr
and understand that it can be tedious to write if err != nil
after most standard library functions. But it is just how it is. I suggest you to get used to it, handle possible errors in the right place and remove Maybe*
functions.
ConsumeAllNonBlocking
has several pitfalls:
flag
variable is useless. You can usebreak
directly in switch body to break the for loop.- The
for
loop is not used, it can be omited. - receive operator has second parameter you might want to use
The function is used only once so I suggest you to write the select statement directly in place where you need it:
select {
case ret, ok := <- c:
default:
// will block
}
There is no a single comment line across all your code. This can be perfectly valid for a simple program like FizzBuzz, but some complex one like concurrent HTTP downloader deserves some explanation. Same stands for user experience: it's wise to provide a usage string and command line arguments like quiet, read links from stdin and so on.
There is several places were the code is not formatted properly. go fmt
is on the rescue.