I'm a relative bash novice but have found it along with jq a great way to manipulate json scraped from various sources. I couldn't find any ready-made scripts to convert arbitrarily deep json to bash arrays, so wrote my own.
My jq script kept in a separate file looks like this:
def recursive_function(key; val): val |
if (type == "object") or (type == "array") then
path(.[]) as $p | getpath($p) as $v | recursive_function(key+$p; $v)
else
"\(key | @json | sub("\"";"\\\"";"g"))=\(val | @sh)"
end;
path(.[]) as $p | getpath($p) as $v | recursive_function($p; $v)
Then my bash functions look like this:
function json2array {
eval "declare -Ag arr=($(jq -r -f recursive_script.jq <<< "$schema"))"
}
function array2json {
json='{}'
for ks in "${!arr[@]}"; do
val="${arr["$ks"]}"
val="${val//\"/\"}"
json="$(jq -r "setpath([$ks]; \"$val\")" <<<"$json")"
done;
}
Using some json taken from schema.org as an example:
{
"@context": "https://schema.org",
"@type": "MusicEvent",
"location": {
"@type": "MusicVenue",
"name": "Chicago Symphony Center",
"address": "220 S. Michigan Ave, Chicago, Illinois, USA"
},
"name": "Shostakovich Leningrad",
"offers": {
"@type": "Offer",
"url": "/examples/ticket/12341234",
"price": "40",
"priceCurrency": "USD",
"availability": "https://schema.org/InStock"
},
"performer": [
{
"@type": "MusicGroup",
"name": "Chicago Symphony Orchestra",
"sameAs": [
"http://cso.org/",
"http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
]
},
{
"@type": "Person",
"image": "/examples/jvanzweden_s.jpg",
"name": "Jaap van Zweden",
"sameAs": "http://www.jaapvanzweden.com/"
}
],
"startDate": "2014年05月23日T20:00",
"workPerformed": [
{
"@type": "CreativeWork",
"name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
"sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
},
{
"@type": "CreativeWork",
"name": "Shostakovich Symphony No. 7 (Leningrad)",
"sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
}
]
}
the json2array script would produce an associative array that would look like:
declare -A arr=(
[\"@context\"]="https://schema.org"
[\"@type\"]="MusicEvent"
[\"location\",\"@type\"]="MusicVenue"
[\"location\",\"name\"]="Chicago Symphony Center"
[\"location\",\"address\"]="220 S. Michigan Ave, Chicago, Illinois, USA"
[\"name\"]="Shostakovich Leningrad"
[\"offers\",\"@type\"]="Offer"
[\"offers\",\"url\"]="/examples/ticket/12341234"
[\"offers\",\"price\"]="40"
[\"offers\",\"priceCurrency\"]="USD"
[\"offers\",\"availability\"]="https://schema.org/InStock"
[\"performer\",0,\"@type\"]="MusicGroup"
[\"performer\",0,\"name\"]="Chicago Symphony Orchestra"
[\"performer\",0,\"sameAs\",0]="http://cso.org/"
[\"performer\",0,\"sameAs\",1]="http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
[\"performer\",1,\"@type\"]="Person"
[\"performer\",1,\"image\"]="/examples/jvanzweden_s.jpg"
[\"performer\",1,\"name\"]="Jaap van Zweden"
[\"performer\",1,\"sameAs\"]="http://www.jaapvanzweden.com/"
[\"startDate\"]="2014年05月23日T20:00"
[\"workPerformed\",0,\"@type\"]="CreativeWork"
[\"workPerformed\",0,\"name\"]="Britten Four Sea Interludes and Passacaglia from Peter Grimes"
[\"workPerformed\",0,\"sameAs\"]="http://en.wikipedia.org/wiki/Peter_Grimes"
[\"workPerformed\",1,\"@type\"]="CreativeWork"
[\"workPerformed\",1,\"name\"]="Shostakovich Symphony No. 7 (Leningrad)"
[\"workPerformed\",1,\"sameAs\"]="http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
)
Something that tripped me up is bash removing unescaped quotes, and in my initial version which I've edited, I went through a palaver of converting the path string to a bash array, quoted keys and left index numbers unquoted, but decided it's simpler to get jq to put the keys in escaped quotes.
This way jq's getpath and setpath make it fairly to easy to map to-and-from bash.
The reason for the val="${val//\"/\"}"
substitution is I discovered the hard way the escaping of quotes inside JSON text got mangled, and I want the final data rendered in HTML anyway.
Anyways, I'm fairly happy with my result, but would be interested to hear of suggestions and improvements from more experienced bash coders.
-
\$\begingroup\$ Please do not update the code in your question to incorporate feedback from answers, doing so goes against the Question + Answer style of Code Review. This is not a forum where you should keep the most updated version in your question. Please see What should I do when someone answers my question? as well as what you may and may not do after receiving answers. \$\endgroup\$Sᴀᴍ Onᴇᴌᴀ– Sᴀᴍ Onᴇᴌᴀ ♦2024年01月23日 19:09:25 +00:00Commented Jan 23, 2024 at 19:09
-
\$\begingroup\$ My improvements to the above got discarded for reasons which have nothing to do with the feedback got discarded. Anyone interested, please go to frontiersoftware.co.za/bash/associative-arrays where I keep notes unencumbered by the weird rules here. \$\endgroup\$joeblog– joeblog2024年01月23日 20:47:01 +00:00Commented Jan 23, 2024 at 20:47
1 Answer 1
Here is a Unit Tested alternate implementation.
It does not use eval
, but jq's builtin @sh
filter extensively to safely convert JSON data to properly escaped shell strings.
For the conversion back into JSON, it passes the path elements as shell arguments to jq, so there is no need to escape and reprocess it.
The conversion back to JSON also takes care of recreating numerical indexes.
To make Unit Test possible, it uses jq's -S
option to sort keys and -c
compact output to minimize length of strings to compare.
#!/usr/bin/env bash
read -rd '' obj2bash <<'JQ'
# Collects each leaf paths into a $path array
leaf_paths as $path | {
# Path array into bash string of space-delimited bash strings
"key": ($path | @sh) | @sh,
# Value into a bash sting
"value": getpath($path) | @sh
} | (
# Construct bash associative array element delcaration
"[" + .key + "]=" + .value
)
JQ
read -rd '' bash2obj <<'JQ'
# Json object is passed as variable j
$j |
# Each path element is passed as positional argument array
setpath(
[
# Convert back path element to number if it is a number
$ARGS.positional[] | tonumber? // .
];
# Value is passed as variable v
$v
)
JQ
json2assoc() {
jq -r "$obj2bash"
}
assoc2json() {
# shellcheck disable=SC2034 # shellcheck is nameref illiterate
local -n ass=${1:?} && [ "${assoc@a}" = A ] || exit 2
local -- json='{}'
local -- k
for k in "${!ass[@]}"; do
local -a path="($k)"
local -- v="${ass[$k]}"
json=$(jq -ncS --argjson j "$json" --arg v "$v" "$bash2obj" --args "${path[@]}")
done
printf '%s\n' "$json"
}
jsonFile=${1:?}
# shellcheck disable=SC2155 # Dynamically built assoc declaration
declare -A assoc="($(json2assoc < "$jsonFile"))"
# Unit Test
generatedJson=$(assoc2json assoc)
inputJson=$(jq -cS . < "$jsonFile")
printf 'Input:\n%s\n' "$inputJson"
printf 'Generated:\n%s\n' "$generatedJson"
# Simple assertion function
assert() { eval "$*" || printf 'Assertion fail $?=%d: %s\n' "$?" "$*" >&2;}
assert '[ "$generatedJson" = "$inputJson" ]' || exit 1
Another Variant that merge the path array with /
into a bash string to make it more practicable to use, reference and navigate those keys within Bash than with serialized Bash array declarations.
It adds a constraint that JSON keys must not contain /
:
#!/usr/bin/env bash
read -rd '' obj2bash <<'JQ'
leaf_paths as $path | {
"key": $path | join("/") | @sh,
"value": getpath($path) | @sh
} | ("[" + .key + "]=" + .value)
JQ
# shellcheck disable=SC2016 # Not shell variables
bash2obj='$j | setpath([$k | split("/")[] | tonumber? // .]; $v)'
json2assoc() { jq -r "$obj2bash";}
assoc2json() {
# shellcheck disable=SC2034 # shellcheck is nameref illiterate
local -n ass=${1:?} && [ "${assoc@a}" = A ] || exit 2
local -- json='{}' k
for k in "${!ass[@]}"; do
json=$(jq -cnS --argjson j "$json" \
--arg k "$k" --arg v "${ass[$k]}" "$bash2obj")
done
printf '%s\n' "$json"
}
jsonFile=${1:?}
# shellcheck disable=SC2155 # Dynamically built assoc declaration
declare -A assoc="($(json2assoc < "$jsonFile"))"
# Debug generated associative array
declare -p assoc
# Unit Test
generatedJson=$(assoc2json assoc)
inputJson=$(jq -cS . < "$jsonFile")
# Simple assertion function
assert() { eval "$*" || printf 'Assertion fail $?=%d: %s\n' "$?" "$*" >&2;}
assert '[ "$generatedJson" = "$inputJson" ]' || exit 1