I've written a comparison function between two json values. I'd like to know if it can be improved at all in any way. Thanks again for the help.
/*
JSON will compare in the following order:
- null
- bool
- number
- string
- array (on values, then on array length (shorter is less)
- object (convert to array of sorted k-v pairs, and compare as array)
Note that JSON and Structs treat object/struct keys differently.
In JSON, {'a': 1, 'A': 2} contains two keys, whereas in SQL it would only have one.
For example, BigQuery and Postgres treat structs keys case-insensitively.
When comparing among JSON values, object-keys will always be treated as case-sensitive.
By default we will compare strings case-sensitively. However there will be a
flag to allow case-sensitive sorts on strings.
The signature is:
compareJsonValues(
val1: null|bool|number|array|object,
val2 null|bool|number|array|object,
case_insensitive: bool
) -> -1, 0, or 1
EXAMPLES
Note: every below output will evaluate to true.
Every value is implied to be json. For example, 2 < 3 in SQL would be JSON 2 < JSON 3.
null < false, false < 2, 2 < "h", "h" < [], [] < [1], [1] < [[]], [1] < [1,2],
[] < {}, {} < {"a": 1}, {"a": 1} < {"a": {}}, {"a": 1, "b": 2} = {"b": 2, "a": 1},
{"a": 0, "z": 1} < {"a": 1}, {"a": 0} < {"a": 0, "b": 0}
*/
"use strict";
const TYPE_RANK = {
"null": 1,
"boolean": 2,
"number": 3,
"string": 4,
"array": 5,
"object": 6
};
function getType(val) {
/* acceptable types are: ["boolean", "number", "string", "object"]
otherwise return "other" */
switch (typeof val) {
case "boolean":
case "number":
case "string":
return typeof val;
case "object":
return val === null ? "null" : Array.isArray(val) ? "array" : "object"
default:
return "other"
}
};
function comparisonToNumberHelper(val1, val2) {
return val1 === val2 ? 0 : val1 > val2 ? 1 : -1;
}
function compareJsonValues(val1, val2, case_insensitive) {
// 1 -- make sure the inputs are of acceptable type
let [type1, type2] = [getType(val1), getType(val2)];
if (type1 === "other" || type2 === "other") {
throw new Error(`Unacceptable type supplied -- Val1: "${type1}" | Val2: "${type2}"`);
};
// 2 -- return result if of different types
if (type1 !== type2) {
return comparisonToNumberHelper(TYPE_RANK[type1], TYPE_RANK[type2]);
}
// 3 -- now compare them for real if of the same type
if (type1 === "null" || type1 === "boolean" || type1 === "number" || type1 === "string" && !case_insensitive)
return comparisonToNumberHelper(val1, val2);
else if (type1 === "string" && case_insensitive) {
return comparisonToNumberHelper(val1.toLowerCase(), val2.toLowerCase());
}
else if (type1 === "array") {
for (let i=0; i < val1.length; i++) {
// Because we are using the length of val1.
// If we have gotten to an index where val1 has
// a value, but val2 does not, return 1 (val1 > val2)
if (val2[i] === undefined) {
return 1;
}
let subres = compareJsonValues(val1[i], val2[i], case_insensitive);
if (subres !== 0) return subres;
}
// All (if any) values compared so far are equal
// So return 0 if they two arrays are also the same length
return val1.length === val2.length ? 0 : -1;
}
else if (type1 === "object") {
// Compare are an array of sorted k-v pairs.
// Note again that keys are always sorted case-insensitively.
// Example:
// {"a": 1, "b": 2} = {"b": 2, "a": 1}
// [["a", 1], ["b", 2]] = [["a", 1], ["b", 2]]
let entries1 = Object.entries(val1).sort((a,b)=> a[0].localeCompare(b[0]));
let entries2 = Object.entries(val2).sort((a,b)=> a[0].localeCompare(b[0]));
return compareJsonValues(entries1, entries2, case_insensitive);
}
}
const compareJsonValuesCI = (val1, val2) => compareJsonValues(val1, val2, 1);
const compareJsonValuesCS = (val1, val2) => compareJsonValues(val1, val2, 0);
///////////////////////////////////////////////////////////////////////////////////////
// Test code
let x =
[1,2,3,"a", null, [], {}, "B", 9, [1], [1,2], [1,3], ["a"], [{}], [[]],
{"name": "david", "age": 20}, {"name": "david", "age": 18}, {"name": "david"}, {"age": 18}, {"age": 30}, {"age": 20}, {"age": {}}, {"age": 20, "bage": 24}
];
console.log(x.sort(compareJsonValuesCI));
const tests = [
// (val1, val2, case_insensitive, result)
[null, null, 0, 0],
[true, false, 0, 1],
[2, 4, 0, -1 ],
["b", "a", 0, 1],
["B", "a", 0, -1],
[[], [], 0, 0],
[{}, {}, 0, 0]
]
for (let [val1, val2, case_insensitive, expected_res] of tests) {
let res = compareJsonValues(val1, val2, case_insensitive);
if (res === expected_res) {
console.log('OK')
} else {
console.log(`Error: Expected ${expected_res} but got ${res}.`)
}
}
1 Answer 1
The array
check could be cleaned up. You should probably check length before looping over either of the lists (if (val1.length !== val2.length) return val1.length < val2.length ? 1 : -1;
):
- If
val1.length + 1 === val2.length
(and ifval2.length
is a large N) currently it will loop several times before returning, this doesn't need to loop at all if the lengths aren't equal - if
val1.length > val2.length
you don't need to loop and then also do the check after the loop - if the lengths aren't equal the contents don't matter
Then you can remove the loop check for undefined
and change the return to be just 0 as you have checked length and content equality already
else if (type1 === "array") {
if (val1.length !== val2.length) return val1.length < val2.length ? 1 : -1; // ADDED
for (let i=0; i < val1.length; i++) {
// REMOVED
let subres = compareJsonValues(val1[i], val2[i], case_insensitive);
if (subres !== 0) return subres;
}
// All (if any) values compared so far are equal
// So return 0
return 0; //CHANGED
}
Full code:
/*
JSON will compare in the following order:
- null
- bool
- number
- string
- array (on values, shorter length is less)
- object (convert to array of k-v pairs, and compare as array)
Note that JSON and Structs treat object/struct keys differently.
In JSON, {'a': 1, 'A': 2} contains two keys, whereas in SQL it would only have one.
For example, BigQuery and Postgres treat structs keys case-insensitively.
When comparing among JSON values, object-keys will always be treated as case-sensitive.
By default we will compare strings case-sensitively. However there will be a
flag to allow case-sensitive sorts on strings.
The signature is:
compareJsonValues(
val1: null|bool|number|array|object,
val2 null|bool|number|array|object,
case_insensitive: bool
) -> -1, 0, or 1
EXAMPLES
Note: every below output will evaluate to true.
Every value is implied to be json. For example, 2 < 3 in SQL would be JSON 2 < JSON 3.
null < false, false < 2, 2 < "h", "h" < [], [] < [1], [1] < [[]], [1] < [1,2],
[] < {}, {} < {"a": 1}, {"a": 1} < {"a": {}}, {"a": 1, "b": 2} = {"b": 2, "a": 1},
{"a": 0, "z": 1} < {"a": 1}, {"a": 0} < {"a": 0, "b": 0}
*/
"use strict";
const TYPE_RANK = {
"null": 1,
"boolean": 2,
"number": 3,
"string": 4,
"array": 5,
"object": 6
};
function getType(val) {
/* acceptable types are: ["boolean", "number", "string", "object"]
otherwise return "other" */
switch (typeof val) {
case "boolean":
case "number":
case "string":
return typeof val;
case "object":
return val === null ? "null" : Array.isArray(val) ? "array" : "object"
default:
return "other"
}
};
function comparisonToNumberHelper(val1, val2) {
return val1 === val2 ? 0 : val1 > val2 ? 1 : -1;
}
function compareJsonValues(val1, val2, case_insensitive) {
// 1 -- make sure the inputs are of acceptable type
let [type1, type2] = [getType(val1), getType(val2)];
if (type1 === "other" || type2 === "other") {
throw new Error(`Unacceptable type supplied -- Val1: "${type1}" | Val2: "${type2}"`);
};
// 2 -- return result if of different types
if (type1 !== type2) {
return comparisonToNumberHelper(TYPE_RANK[type1], TYPE_RANK[type2]);
}
// 3 -- now compare them for real if of the same type
if (type1 === "null" || type1 === "boolean" || type1 === "number" || type1 === "string" && !case_insensitive)
return comparisonToNumberHelper(val1, val2);
else if (type1 === "string" && case_insensitive) {
return comparisonToNumberHelper(val1.toLowerCase(), val2.toLowerCase());
}
else if (type1 === "array") {
if (val1.length !== val2.length) return val1.length < val2.length ? 1 : -1; // ADDED
for (let i=0; i < val1.length; i++) {
// REMOVED
let subres = compareJsonValues(val1[i], val2[i], case_insensitive);
if (subres !== 0) return subres;
}
// All (if any) values compared so far are equal
// So return 0 if they two arrays are also the same length
return 0; //CHANGED
}
else if (type1 === "object") {
// Compare are an array of sorted k-v pairs. For example:
// {"a": 1, "b": 2} = {"b": 2, "a": 1}
// [["a", 1], ["b", 2]] = [["a", 1], ["b", 2]]
let entries1 = Object.entries(val1).sort((a,b)=> a[0].localeCompare(b[0]));
let entries2 = Object.entries(val2).sort((a,b)=> a[0].localeCompare(b[0]));
return compareJsonValues(entries1, entries2, case_insensitive);
}
}
const compareJsonValuesCI = (val1, val2) => compareJsonValues(val1, val2, 1);
const compareJsonValuesCS = (val1, val2) => compareJsonValues(val1, val2, 0);
///////////////////////////////////////////////////////////////////////////////////////
// Test code
let x =
[1,2,3,"a", null, [], {}, "B", 9, [1], [1,2], [1,3], ["a"], [{}], [[]],
{"name": "david", "age": 20}, {"name": "david", "age": 18}, {"name": "david"}, {"age": 18}, {"age": 30}, {"age": 20}, {"age": {}}, {"age": 20, "bage": 24}
];
console.log(x.sort(compareJsonValuesCI));
const tests = [
// (val1, val2, case_insensitive, result)
[null, null, 0, 0],
[true, false, 0, 1],
[2, 4, 0, -1 ],
["b", "a", 0, 1],
["B", "a", 0, -1],
[[], [], 0, 0],
[{}, {}, 0, 0],
[[2], [2], 0, 0],
[[2,2], [2,3], 0, -1],
[[2], [], 0, 1],
[[2], [3,3], 0, 1],
[[2,8], [3], 0, 1],
[{name:'john'}, {}, 0, 1],
[{name:'john'}, {name:'jim'}, 0, 1],
]
for (let [val1, val2, case_insensitive, expected_res] of tests) {
let res = compareJsonValues(val1, val2, case_insensitive);
if (res === expected_res) {
console.log('OK')
} else {
console.log(`Error: Expected ${expected_res} but got ${res}.`)
}
}
-
\$\begingroup\$ we still need to compare if the lengths are different to see whether the first one is less than the second one. For example,
[1] < [2,3]
and[3,4] > [2]
. \$\endgroup\$David542– David5422023年05月18日 16:49:38 +00:00Commented May 18, 2023 at 16:49 -
\$\begingroup\$ @David542 updated return to return value based on length with ternary operation \$\endgroup\$depperm– depperm2023年05月19日 10:22:41 +00:00Commented May 19, 2023 at 10:22
-
\$\begingroup\$ Ok, but look at the comment above. We compare element-wise first then length. Yours compares length first and then elements second.
[1,2]
should return less than[3]
. Does that make sense? Yours returns the opposite. \$\endgroup\$David542– David5422023年05月19日 19:00:52 +00:00Commented May 19, 2023 at 19:00
typeof
suggests a more fundamental design flaw. What use case is this code for? \$\endgroup\$