\$\begingroup\$
\$\endgroup\$
2
This code succeeds in downloading log text files from multiple URLs. Can this be made more efficient?
let private downloadLogs toDirectory =
let createFileDirectories (url : string) =
let directory = $@"{toDirectory}\{url.Substring(13, 3)}"
try
if not (Directory.Exists(directory))
then
Directory.CreateDirectory(directory) |> ignore
with
| ex -> failwith ex.Message
let year = DateTime.Now.Year
let month = DateTime.Now.Month.ToString("d2")
let day = DateTime.Now.Day
let d01LogUrlWithDate = d01LogPartialUrl + $"{year}-{month}-{day}"
let d02LogUrlWithDate = d02LogPartialUrl + $"{year}-{month}-{day}"
let s01LogUrlWithDate = s01LogPartialUrl + $"{year}-{month}-{day}"
let s02LogUrlWithDate = s02LogPartialUrl + $"{year}-{month}-{day}"
let urlWithDateList = [d01LogUrlWithDate; d02LogUrlWithDate; s01LogUrlWithDate; s02LogUrlWithDate]
// create folder for each url file
urlWithDateList |> List.iter createFileDirectories
let buildFinalUrlAndRequest number url =
try
let finalUrl = if number = 0 then $"%s{url}-1.log" else $"%s{url}-{number}.log"
printfn $"Searching for URL: {finalUrl}"
let response = Http.AsyncRequestStream(finalUrl) |> Async.RunSynchronously
if response.StatusCode >= 200 && response.StatusCode <= 299
then
printfn $"URL Ready For Download: {url}"
Ok response
else // this is never triggered
printfn $"Error. Status Code: {response.StatusCode}"
Error $"Status Code: {response.StatusCode}"
with
| ex -> Error $"Request Error: {ex.Message}"
let buildRequestList (state : seq<Result<HttpResponseWithStream, string>>) t =
let range = seq{0..11}
let responseSeq =
seq{
for num in range do
let numberUpdated = num + 1
let response = buildFinalUrlAndRequest numberUpdated t
response
}
responseSeq |> Seq.filter (fun x -> x |> function | Ok _ -> true | Error _ -> false) |> Seq.append state
let getResponse urls =
urls
|> Seq.fold buildRequestList Seq.empty
|> Seq.choose (fun x ->
x |> function
| Ok response ->
Some (async{
try
let fileName = Path.GetFileName(response.ResponseUrl.Replace(@"/", @"\"))
let subDir = response.ResponseUrl.Substring(13,3)
let pathName = $"""{toDirectory}\{subDir}\{fileName}"""
use outputFile = new FileStream(pathName, FileMode.Create)
do! response.ResponseStream.CopyToAsync(outputFile) |> Async.AwaitTask
printfn $"File Downloaded: {pathName}"
return pathName
with
| ex ->
printfn $"Download error {ex.Message}"
return ""
}) | _ -> None
)
urlWithDateList
|> getResponse
|> Async.Parallel
|> Async.RunSynchronously |> Array.toList
asked Mar 26, 2021 at 17:38
-
2\$\begingroup\$ In what way? Execution speed? Memory Usage? Code Verbosity? Idomaticism? \$\endgroup\$Maslow– Maslow2021年03月29日 06:28:41 +00:00Commented Mar 29, 2021 at 6:28
-
1\$\begingroup\$ @Maslow Provide the best code review you can given the code. Optimize anything that you can see needs optimization and explain the trade offs. \$\endgroup\$pacmaninbw– pacmaninbw ♦2021年03月29日 12:44:45 +00:00Commented Mar 29, 2021 at 12:44
1 Answer 1
\$\begingroup\$
\$\endgroup\$
1
There appears to be more options for parallelization and async here, and I think the fold overcomplicated what you were trying to do.
#if false
#r $"FSharp.Data"
#endif
open System.Net
open System.IO
open FSharp.Data
let d01LogUrlWithDate, d02LogUrlWithDate, s01LogUrlWithDate, s02LogUrlWithDate = $"","","",""
module Async =
let map f x =
async {
let! x' = x
return f x'
}
let bind f x =
async{
let! x' = x
return! f x'
}
let private downloadLogs toDirectory =
let createFileDirectories (url : string) =
let directory = @"{toDirectory}\{url.Substring(13, 3)}"
// if it is going to blow up, let it blow up keeping the entire stack trace
if not <| Directory.Exists directory
// I prefer strongly typed ignore, less prone to refactoring issues
then Directory.CreateDirectory directory |> ignore<DirectoryInfo>
let urlWithDateList = // less clutter in scope
let year = DateTime.Now.Year
let month = DateTime.Now.Month.ToString("d2")
let day = DateTime.Now.Day
[
d01LogUrlWithDate
d02LogUrlWithDate
s01LogUrlWithDate
s02LogUrlWithDate
] |> List.map (fun x -> x + $"{year}-{month}-{day}")
// create folder for each url file
urlWithDateList |> List.iter createFileDirectories
let buildFinalUrlAndRequest number url =
async {
try // this area wasn't as complex, so I left the try alone
// I'd think this should be $"%s{url}-{number+1}.log" or else 1 would stop on zero's output
let finalUrl = if number = 0 then $"%s{url}-1.log" else $"%s{url}-{number}.log"
printfn $"Searching for URL: {finalUrl}"
let! response = FSharp.Data.Http.AsyncRequestStream finalUrl
if response.StatusCode >= 200 && response.StatusCode <= 299
then
printfn $"URL Ready For Download: {url}"
return Ok response
else // this is never triggered
printfn $"Error. Status Code: {response.StatusCode}"
return Error $"Status Code: {response.StatusCode}"
with
| ex -> return Error $"Request Error: {ex.Message}"
}
// if you are just throwing away the Result.Error options, then you don't need to use Results
let buildRequestList t : seq<Async<Result<HttpResponseWithStream,string>>> =
seq{0..11}
|> Seq.map(fun i ->
async {
let numberUpdated = i + 1
let! response = buildFinalUrlAndRequest numberUpdated t
return response
}
)
let getResponse urls =
urls
|> Seq.collect buildRequestList
|> Async.Parallel
// these two Async.map calls could be composed together
|> Async.map(Seq.choose(function | Ok x -> Some x | _ -> None))
|> Async.map (Seq.map (fun response ->
async{
let fileName = Path.GetFileName(response.ResponseUrl.Replace(@"/", "\"")) // fix syntax highlighting here on stack
let subDir = response.ResponseUrl.Substring(13,3)
let pathName = $@"{toDirectory}\{subDir}\{fileName}" // fix syntax highlighting here on Stack
use outputFile = new FileStream(pathName, FileMode.Create)
do! response.ResponseStream.CopyToAsync(outputFile) |> Async.AwaitTask
printfn $"File Downloaded: {pathName}"
return pathName
}
|> Async.Catch // this way, no try needed above
|> Async.map( // map the catch to option, and log the errors
function
| Choice1Of2 x -> Some x
| Choice2Of2 ex -> printfn $"Download error {ex.Message}"; None
)
))
|> Async.bind Async.Parallel
urlWithDateList
|> getResponse
|> Async.RunSynchronously
|> Seq.choose id // discard empty values
|> Seq.toList
-
\$\begingroup\$ Excellent! I want to learn all that I can in regards to writing idiomatic and efficient F# code. Thanks. \$\endgroup\$user1206480– user12064802021年03月29日 19:36:28 +00:00Commented Mar 29, 2021 at 19:36
lang-ml