I stumbled across something a long while ago, fetching the environments variables from say env under linux (say on a pod via argos), anyway, point is I could fetch something like that:
rabbitmq__port=a
rabbitmq__password=b
rabbitmq__ssl__enabled=b
rabbitmq__hostname=c
rabbitmq__username=d
rabbitmq__virtualhost=e
rabbitmq__ssl__serverName=f
and wanted to convert it to json format, using the __ as a nestedness separator:
{
"rabbitmq": {
"port": "a",
"password": "b",
"ssl": {
"enabled": "b",
"serverName": "f"
},
"hostname": "c",
"username": "d",
"virtualhost": "e"
}
}
I've come up with the implementation below, and while it works, I found it a wee too verbose for what it is doing, and I'm trying to figure out something that would make it shorter and less convoluted and more idiomatic:
module Playground
open System
open System.Text.Json
open System.Collections.Generic
open FSharpPlus
module EnvJson =
[<Literal>]
let private EnvKeySeparator = "__"
[<Literal>]
let private EnvLineKeyValueSeparator = "="
let private addKeyTokensToJsonDict jsonDict (envValue: string) envKeyTokens =
let folder (state: IDictionary<string, obj>) (i, envKeyToken) =
if i = Array.length envKeyTokens - 1 then
state[envKeyToken] <- envValue
jsonDict
else if Dict.containsKey envKeyToken state then
state[envKeyToken] :?> IDictionary<string, obj>
else
let nestedState = Dictionary()
state[envKeyToken] <- nestedState
nestedState
Array.indexed envKeyTokens |> Array.fold folder jsonDict
let private addLineToJsonDict jsonDict envLine =
let envLineTokens =
String.trimWhiteSpaces envLine
|> String.split [ EnvLineKeyValueSeparator ]
|> Seq.toArray
let envLineKey, envLineValue =
Array.item 0 envLineTokens, Array.item 1 envLineTokens
envLineKey
|> String.split [ EnvKeySeparator ]
|> Seq.toArray
|> addKeyTokensToJsonDict jsonDict envLineValue
let ofLines options source =
JsonSerializer.Serialize(value = Seq.fold addLineToJsonDict (Dictionary()) source, options = options)
let ofText options source =
source
|> String.split [ Environment.NewLine ]
|> Seq.toArray
|> Array.filter (String.IsNullOrWhiteSpace >> not)
|> ofLines options
[<EntryPoint>]
let main _ =
"""
rabbitmq__port=a
rabbitmq__password=b
rabbitmq__ssl__enabled=b
rabbitmq__hostname=c
rabbitmq__username=d
rabbitmq__virtualhost=e
rabbitmq__ssl__serverName=f
"""
|> EnvJson.ofText JsonSerializerOptions.Default
|> printfn "%s"
```
2 Answers 2
Gebb has shown you a good approach using immutable data structures, but there's an alternative that's not too dissimilar to yours but is perhaps slightly more readable. This approach uses a few alternative techniques outlined below.
Recursion
Your function addKeyTokensToJsonDict is traversing down through nested dictionaries (creating them if necessary) before finally adding the value once it reaches the final key in the array. There are a few signs that your Seq.fold operation may not be the best fit for this kind of traversal.
You're using indices within the iteration, in particular to identify the special case where you're at the last key in the array.
Your state object is always the dictionary at the current level of nesting, except for the very final iteration where you return the original dictionary.
Recursion is a good alternative fit for this kind of traversal. It allows for much more elegant handling of the special case where you're at the last key and - unlike Seq.fold - doesn't require that you return anything.
Declarative Programming
In some cases it can be clearer to use a declarative programming style rather than a functional one. In particular, your use of Seq.fold to construct the top level dictionary is easy enough to understand, but actually since the state object reference never changes you could replace the Seq.fold operation with a simple for loop, i.e.
let dict = Dictionary()
for line in lines do
addLineToJsonDict dict line
JsonSerializer.Serialize(dict, options)
Regular Expressions
Regular expressions can be very useful for parsing text input, particularly because they allow you to perform both validation and parsing at the same time. The text input parsing part of your code can be simplified by trying to parse each line using a regex that matches text in the form key=value and extracting both the key and the value when a match is found.
Putting this all together you end up with something like the following:
open System
open System.Collections.Generic
open System.Text.Json
open System.Text.RegularExpressions
module EnvJson =
let [<Literal>] private EnvKeySeparator = "__"
// Regular expression that will match a single environment variable in format
// "name=value". The name and value are captured as separate character groups
// so that they can be extracted once the match has been performed.
// ^ :: start of string
// (\w+) :: one or more alphanumeric or underscore characters (capture group #1)
// = :: the '=' character
// (\w+) :: one or more alphanumeric or underscore characters (capture group #2)
// $ :: end of string
let private EnvVarRegex = Regex(@"^(\w+)=(\w+)$")
/// Add a value to a dictionary at the level of nesting specified by a list of keys.
/// New nested dictionaries will be created where required in order to traverse down
/// to the final key.
let rec private addNestedValue keys (value: string) (dict: Dictionary<string, obj>) =
match keys with
| [] -> failwith "no key provided"
| [ key ] ->
// If there is a single key in the list then we just add the value to the
// current dictionary. Note that this will overwrite any existing value
// stored against this key. It may be worth checking for an existing value
// beforehand and raising an exception if one is found.
dict[key] <- value
| key :: otherKeys ->
// If there are multiple keys then we need the value for the first key in
// the list to be a dictionary. If there's no object in the current
// dictionary for this key then add a new nested dictionary.
if not (dict.ContainsKey(key)) then
dict[key] <- Dictionary<string, obj>()
// Retrieve the nested dictionary. This will either be the dictionary we
// just created above or will be an object that already existed in the
// current dictionary. Note that if it's an existing object we assume it's
// a dictionary. If a string value has already been added for this key then
// an exception will be thrown when the type cast is attempted. It may be
// worth checking the type of the object before performing the cast so that
// a more meaningful exception can be raised.
let nestedDict = dict[key] :?> Dictionary<string, obj>
// Add the value to the nested dictionary based on the remaining keys.
addNestedValue otherKeys value nestedDict
let ofText (serializerOptions: JsonSerializerOptions) (text: string) =
let jsonDict = Dictionary()
for line in text.Split(Environment.NewLine) do
let matchObj = EnvVarRegex.Match(line.Trim())
if matchObj.Success then
let name = string matchObj.Groups[1]
let value = string matchObj.Groups[2]
let keys = name.Split(EnvKeySeparator) |> List.ofArray
addNestedValue keys value jsonDict
JsonSerializer.Serialize(jsonDict, serializerOptions)
[<EntryPoint>]
let main _ =
"""
rabbitmq__port=a
rabbitmq__password=b
rabbitmq__ssl__enabled=b
rabbitmq__hostname=c
rabbitmq__username=d
rabbitmq__virtualhost=e
rabbitmq__ssl__serverName=f
"""
|> EnvJson.ofText JsonSerializerOptions.Default
|> printfn "%s"
0
-
\$\begingroup\$ I'm speechless, and in a good way, thanks a LOT for this thorough review and detailed answer! \$\endgroup\$Natalie Perret– Natalie Perret2024年10月16日 12:01:42 +00:00Commented Oct 16, 2024 at 12:01
Here's a less verbose and perhaps more idiomatic version. It uses an immutable data structure instead of the dictionary.
module EnvJson =
[<Literal>]
let private EnvKeySeparator = "__"
[<Literal>]
let private EnvLineKeyValueSeparator = '='
let private parseLine (line: string) =
match line.Trim().Split EnvLineKeyValueSeparator with
| [| path; value |] -> path.Split EnvKeySeparator |> Array.toList, value
| _ -> failwith $"Invalid line format: {line}"
let private parsePaths (input: string) =
input.Split "\n"
|> Array.filter ((<>)"")
|> Array.map parseLine
let rec private pathsToTree (paths: (string list * string)[]) : obj =
match paths with
| [||] -> invalidArg (nameof paths) "Empty paths array"
// Leaf
| [| [], value |] -> value
// Node
| _ ->
paths
|> Array.groupBy (fst >> List.head)
|> Array.map (fun (key, group) ->
key, pathsToTree [| for path, v in group -> List.tail path, v |])
|> Map
|> box
let ofText (text: string) =
parsePaths text
|> pathsToTree
|> JsonSerializer.Serialize