I need to write some JavaScript code that will take a JSON string, parse it, and return the names of the most-deeply nested properties. For example, for this input:
var json =
"{\
foo: {\
bar: 'something',\
baz: {\
jack: 'other',\
},\
bob: {\
bill: 'hello',\
bilbo: 11,\
baggins: {\
fizz: 'buzz'\
finger: 'bang'\
}\
}\
}\
}";
it should return ['fizz', 'finger']
. There are a couple of caveats:
- the parsing must be done "manually"- i.e. I can't use
eval
or a JS library to parse the JSON - the JSON property values are guaranteed to be strings, numbers, or objects, i.e. no arrays
Here's what I've come up with so far. The main function is findDeepestLeaveNodes
. At the moment the code is a bit inefficient as it has to iterate twice over the whole input String. I'd like to improve this if possible, and am also looking for suggestions for improving the code quality.
var constants = {
BLOCK_START: '{',
BLOCK_END: '}'
};
function findDeepestLeaveNodes(json) {
var maxNesting = getMaxNesting(json);
var currentNestLevel = 0;
var results = [];
var jsonLength = json.length;
for (var currentCharIndex = 0; currentCharIndex < jsonLength; currentCharIndex++) {
var currentChar = json.charAt(currentCharIndex);
//console.log("Nesting level " + currentNestLevel + " at character '" + currentChar + "'");
if (currentChar == constants.BLOCK_START) {
// FIXME The following parsing is fairly fragile. It doesn't handle the possibility
// that a '}' or ',' way occur inside a String and therefore do always close a block or
// separate sibling JSON properties. To handle this we'd need to write a proper JSON parser.
if (++currentNestLevel == maxNesting) {
// read the content of the current block
var blockEndIndex = json.indexOf(constants.BLOCK_END, currentCharIndex);
var currentBlock = json.substring(currentCharIndex + 1, blockEndIndex);
// Each position in the properties array will contain a property name and it's value
var properties = currentBlock.split(',');
for (var i = 0; i < properties.length; i++) {
// parse out the property name
var property = properties[i];
var separatorPosition = properties[i].indexOf(':');
var propertyName = property.substring(0, separatorPosition);
results.push(propertyName.trim());
}
}
} else if (currentChar == constants.BLOCK_END) {
--currentNestLevel;
}
}
return results;
}
function getMaxNesting(json) {
var jsonlength = json.length;
var currentNestLevel = 0;
var maxNestLevel = 0;
for (var i = 0; i < jsonlength; i++) {
var currentChar = json.charAt(i);
if (currentChar == constants.BLOCK_START) {
++currentNestLevel;
} else if (currentChar == constants.BLOCK_END) {
// update maxNestLevel if necessary
maxNestLevel = Math.max(currentNestLevel, maxNestLevel);
--currentNestLevel;
}
}
return maxNestLevel;
}
-
2\$\begingroup\$ Technically that isn't valid JSON, the keys aren't strings since they don't have quotes around them. \$\endgroup\$Alex– Alex2011年08月26日 03:29:38 +00:00Commented Aug 26, 2011 at 3:29
-
\$\begingroup\$ Why are you not allowed to use library code? \$\endgroup\$Frames Catherine White– Frames Catherine White2012年06月09日 08:01:36 +00:00Commented Jun 9, 2012 at 8:01
2 Answers 2
I'd write it as a recursive descent parser since this is quite simple:
http://en.wikipedia.org/wiki/Recursive_descent_parser
Actually I already did this a couple of years back, when I wanted to create something that simply formats and highlights JSON a bit. If you can understand it, then you can modify it to your needs:
<html>
<head>
<style type="text/css">
.str
{
color: green;
}
.obj
{
font-weight: bold;
}
.num
{
color: red;
}
</style>
<script language="javascript" type="text/javascript">
String.prototype.repeat = function(n) {
result = '';
for (var i = 0; i < n; i++) result += this;
return result;
}
function jsonFormater() {
this.reset = function() {
this.txt = '';
this.pos = 0;
this.result = '';
this.indent = 0;
this.classes = Array();
};
this.undoindent = function() {
this.indent -= 4;
this.nline();
};
this.doindent = function() {
this.indent += 4;
this.nline();
};
this.nline = function() {
this.result += '<br />' + ' '.repeat(this.indent);
};
this.chClass = function(neu) {
if (this.classes.length > 0) this.result += '</span>';
this.result += '<span class="' + neu + '">';
this.classes.push(neu);
};
this.endClass = function() {
this.classes.pop();
this.result += '</span>';
if (this.classes.length > 0) this.result += '<span class="' + this.classes[this.classes.length - 1] + '">';
};
this.formatJson = function(txt) {
this.txt = txt;
this.pos = 0;
this.result = '';
while (this.pos < this.txt.length) {
if (this.txt[this.pos] == '{') this.parseObj();
else if (this.txt[this.pos] =='[') this.parseArray();
this.pos++;
}
return this.result;
}
this.parseObj = function(ende) {
if (typeof ende =='undefined') var ende = '}';
this.chClass('obj');
do {
if ((this.txt[this.pos] == '{') || (this.txt[this.pos] == '[')) this.nline();
this.result += this.txt[this.pos];
if (this.txt[this.pos] == ',') this.nline();
if ((this.txt[this.pos] == '{') || (this.txt[this.pos] == '[')) this.doindent();
this.pos++;
if (this.txt[this.pos] == '{') this.parseObj();
if (this.txt[this.pos] == '[') this.parseArray();
if (this.txt[this.pos] == '"') this.parseString();
if (/\d/.test(this.txt[this.pos])) this.parseNum();
if ((this.txt[this.pos] == '}') || (this.txt[this.pos] == ']')) this.undoindent();
} while ((this.pos < this.txt.length) && (this.txt[this.pos] != ende));
this.result += this.txt[this.pos];
this.pos++;
this.endClass();
};
this.parseArray = function() {
this.parseObj(']');
};
this.parseString = function() {
this.chClass('str');
do {
this.result += this.htmlEscape(this.txt[this.pos]);
this.pos++;
} while ((this.pos < this.txt.length) && ((this.txt[this.pos] != '"') || (this.txt[this.pos - 1] == '\\')));
this.result += this.htmlEscape(this.txt[this.pos]);
this.pos++;
this.endClass();
};
this.parseNum = function() {
this.chClass('num');
do {
this.result += this.txt[this.pos];
this.pos++;
} while ((this.pos < this.txt.length) && (/[\d\.]/.test(this.txt[this.pos])));
this.endClass();
};
this.htmlEscape = function(txt) {
return txt.replace(/&/,'&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
};
this.reset();
}
var parser = new jsonFormater();
function go(txt) {
document.getElementById('ausgabe').innerHTML = parser.formatJson(txt);
parser.reset();
}
</script>
</head>
<body onLoad="go(document.getElementById('thetextarea').value);">
<textarea id="thetextarea" rows="25" cols="70" onKeyUp="go(this.value);">[{"Dies":"Ist ein Beispiel...","mit":["Arrays","und","so"]},{"alles":"schön verschachtelt..."},"tippt einfach json-zeugs in dem grossen Feld.","Die Anzeige aktualisiert sich sofort...","Die Formatierungen sind als <style> gespeichert. Ihr könnt sie so beliebig ändern.",{"Zahlen":1,"sind":[1,4,55.67],"auch":"schön"}]</textarea>
<div id="ausgabe"></div>
</body>
</html>
I would do this in two steps:
- Parse the json
- Find the deepest node of that object.
When you separate your code like that, you wind up with two reusable functions instead of one single-use function, which is always a good thing.
I wrote the json parser as a self executing anonymous function that returns an object. What this does is encapsulate all the helper functions so they aren't exposed to outside functions and results in a object with just one method: decode
.
var JSON = (function() {
function charIsLetter(c) {
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
function charIsNumber(c) {
return '0' <= c && c <= '9';
}
function decodeInt(s) {
for ( //iterate through the string while it is a number
var i = 0, c = s.charAt(0);
i < s.length && charIsNumber(c);
c = s.charAt(++i)
);
//return [integer, the rest of the string]
return [parseInt(s.substring(0,i), 10), s.substring(i)];
}
function decodeString(s) {
var q = s.charAt(0), //what quotation wraps the string?
str = "";
for (var i=1;i<s.length;i++) { //iterate through the string
c = s.charAt(i);
if (c == "\\") {//if the next quotation is escaped, skip it
i++;
continue;
}
if (c == q)
return [str, s.substring(i+1)]; //return [the string, what comes after it]
str += c;
}
throw "String doesn't have closing quote ("+q+") at: " + s;
}
function decodeObject(s) {
s = s.substring(1); //remove first {
var ob = {}, key, val;
while (true) {
if (s.length == 0)
throw "Reached end of string while looking for '}'";
s = s.replace(/^\s+/m, ""); //remove excess whitespace
if (s.charAt(0) == "}")
return [ob, s.substring(1)]; //return the object and what's left over
key = decode2(s); //key = [decoded string/number/etc, string remaining]
s = key[1].substring(1); //s is now the leftovers, remove ":"
val = decode2(s); //val = [decoded string/number/etc, string remaining]
s = val[1]; //s is now the leftovers
if (s.charAt(0) == ",") //if there is a comma after the value, remove it
s = s.substring(1);
ob[key[0]] = val[0];
}
}
function decodeImproperString(s) {
for ( //iterate the string while the character is a letter
var i = 0, c = s.charAt(0);
i < s.length && charIsLetter(c);
c = s.charAt(++i)
);
return [s.substring(0,i), s.substring(i)]; //return [the string, what comes after it]
}
function decode2(s) {
s = s.replace(/^\s+/m, ""); //remove whitespace from the beginning of the string
var c = s.charAt(0);
if ('0' <= c && c <= '9') //value is a number
return decodeInt(s);
if (c == "'" || c == '"') //value is a string
return decodeString(s);
if (c == '{') //value is an object
return decodeObject(s);
if (charIsLetter(c))
return decodeImproperString(s);
throw "Unexpected character " + c + " at:" + s;
}
return {
decode: function(s) {
var result = decode2(s);
return result[0];
}
};
})();
Now that we've got a way to parse the json, we need a way to find the deepest child. The way I went about doing this is a recursive function that returns the depth of it's children, 0 if it has no object as a child. It increments this depth for it's children and returns the child with the maximum depth.
function deepestObject(ob) {
var ar = []; //array of objects and their depth
for (var key in ob) {
if (ob.hasOwnProperty(key)) {
if (Object.toType(ob[key]) == "object") {
var child = deepestObject(ob[key]);
child.depth++;
ar.push(child);
}
}
}
var max = {depth: 0, children: ob};
for (var i=0; i<ar.length;i++) {
if (ar[i].depth > max.depth)
max = ar[i];
}
return max;
}
I use a helper function is this, Object.toType
. This is a wonderful function thought up by Angus Croll at the Javascript Weblog.
Object.toType = function(obj) {
return ({}).toString.call(obj).match(/\s([a-z|A-Z]+)/)[1].toLowerCase();
}
The deepestObject function doesn't return multiple children if they have the same depth though, this can be changed by using the following instead:
function deepestObject(ob) {
var ar = []; //array of objects and their depth
for (var key in ob) {
if (ob.hasOwnProperty(key)) {
if (Object.toType(ob[key]) == "object") {
var children = deepestObject(ob[key]); //array of deepest children
for (var i=0;i<children.length;i++) { //for each child
var child = children[i];
//console.log(child)
child.depth++;
ar.push(child);
}
}
}
}
var max = [{depth: 0, children: ob}];
for (var i=0; i<ar.length;i++) {
if (ar[i].depth > max[0].depth)
max = [ar[i]];
else if (ar[i].depth == max[0].depth)
max.push(ar[i]);
}
return max;
}
You can see both working here: single child and mulitple child.
Explore related questions
See similar questions with these tags.