Formatting XML strings in JavaScript for readability

Question 1

I've created a vanilla JS function to indent XML strings so they can be more easily read. It uses some pretty nasty regex...yes, I know it's a cardinal sin for XML/HTML, but it works. For instance, this string...

<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" /></soapenv:Body></soapenv:Envelope>

...would look like this after being passed through the function:

<?xml version='1.0' encoding='UTF-8'?>
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
 <soapenv:Body>
 <ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" />
 </soapenv:Body>
</soapenv:Envelope>

Here's the function itself. What can I do to simplify it?

function formatXML(input) {
 // PART 1: Add \n where necessary
 // A) add \n between sets of angled brackets without content between them
 // B) remove \n between opening and closing tags of the same node if no content is between them
 // C) add \n between a self-closing set of angled brackets and the next set
 // D) split it into an array
 xmlString = input.trim()
 .replace(/>\s*</g,'>\n<') 
 .replace(/(<[^\/>].*>)\n(<[\/])/g,'1ドル2ドル') 
 .replace(/(<\/[^>]+>|<[^>]+\/>)(<[^>]+>)/g,'1ドル\n2ドル'); 
 xmlArr = xmlString.split('\n');
 // PART 2: indent each line appropriately
 var tabs = ''; //store the current indentation
 var start = 0; //starting line
 if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
 for (var i = start; i < xmlArr.length; i++) { //for each line
 var line = xmlArr[i].trim(); //trim it just in case
 if (/^<[/]/.test(line)) { // if the line is a closing tag 
 // remove one tab from the store
 // add the tabs at the beginning of the line
 tabs = tabs.replace(/.$/, '');
 xmlArr[i] = tabs + line; 
 } else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) { // if the line contains an entire node 
 // leave the store as is
 // add the tabs at the beginning of the line
 xmlArr[i] = tabs + line;
 } else { // if the line starts with an opening tag and does not contain an entire node 
 // add the tabs at the beginning of the line
 // and add one tab to the store
 xmlArr[i] = tabs + line; 
 tabs += '\t';
 } 
 }
 //rejoin the array to a string and return it
 return xmlArr.join('\n');
}

Question 2

I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:

<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>

My proposition for this problem is:

1) change spliting code in this way:

.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML

2) little modify third statement else if and add one more:

 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }

So finaly, our function will be:

function formatXML(input,indent)
{
 indent = indent || '\t'; //you can set/define other ident than tabs
 //PART 1: Add \n where necessary
 xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
 xmlString = input
 .replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
 .replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
 .replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
 .replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
 .replace(/\?></, "?>\n<") //detect a header of XML
 xmlArr = xmlString.split('\n'); //split it into an array (for analise each line separately)
 //PART 2: indent each line appropriately
 var tabs = ''; //store the current indentation
 var start = 0; //starting line
 if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
 for (var i = start; i < xmlArr.length; i++) //for each line
 { 
 var line = xmlArr[i].replace(/^\s+|\s+$/g, ''); //trim it (just in case)
 if (/^<[/]/.test(line)) //if the line is a closing tag
 {
 tabs = tabs.replace(indent, ''); //remove one indent from the store
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) //if the line contains an entire node
 {
 //leave the store as is
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }
 }
 //PART 3: return formatted string (source)
 return xmlArr.join('\n'); //rejoin the array to a string and return it
}

Question 3

Another concept is to read the string nodes by nodes and split it on the fly:

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m){ 
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the matche
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(line) from header
 }
 );
 //Additional fixes
//result = result.replace(/(<[^\/>]*>)\n\s*(<[\/])/g, "1ドル2ドル"); //remove \n between opening and closing tags of the same node if no content is between them
 result = result.replace(/(<[^\/>]*)>\n\s*(<[\/][^>]*>)/g, "1ドル />"); //remove \n and join opening with closing tags of the same node to one entire node if no content is between them
 result = result.replace(/(<([a-zA-Z:]+\b)[^>]*>)\n\s*([^<]+)\n\s*(<\/2円>)/g, "1ドル3ドル4ドル"); //remove \n between opening, content and closing tags of the same node (to display in one line)
 return result;
}

Version II - catch also opening and closing tags of the same node as one node (no needed additional fixes at the end):

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<[^>\/]*>[^<>]*<\/[^>]*>|\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m,i)
 {
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match just in case
 if(i<38)
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is or
 m = m.replace(/(<[^\/>]*)><[\/][^>]*>/g, "1ドル />"); //join opening with closing tags of the same node to one entire node if no content is between them
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the match
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(match) from header
 }//anonymous function
 );//replace
 return result;
}

markier markier 515 bronze badges · Answer 1 · 2017-08-15 08:35:37Z

I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:

<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>

My proposition for this problem is:

1) change spliting code in this way:

.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML

2) little modify third statement else if and add one more:

 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }

So finaly, our function will be:

function formatXML(input,indent)
{
 indent = indent || '\t'; //you can set/define other ident than tabs
 //PART 1: Add \n where necessary
 xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
 xmlString = input
 .replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
 .replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
 .replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
 .replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
 .replace(/\?></, "?>\n<") //detect a header of XML
 xmlArr = xmlString.split('\n'); //split it into an array (for analise each line separately)
 //PART 2: indent each line appropriately
 var tabs = ''; //store the current indentation
 var start = 0; //starting line
 if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
 for (var i = start; i < xmlArr.length; i++) //for each line
 { 
 var line = xmlArr[i].replace(/^\s+|\s+$/g, ''); //trim it (just in case)
 if (/^<[/]/.test(line)) //if the line is a closing tag
 {
 tabs = tabs.replace(indent, ''); //remove one indent from the store
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) //if the line contains an entire node
 {
 //leave the store as is
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }
 }
 //PART 3: return formatted string (source)
 return xmlArr.join('\n'); //rejoin the array to a string and return it
}

markier markier 515 bronze badges · Answer 2 · 2017-08-16 09:27:42Z

Another concept is to read the string nodes by nodes and split it on the fly:

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m){ 
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the matche
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(line) from header
 }
 );
 //Additional fixes
//result = result.replace(/(<[^\/>]*>)\n\s*(<[\/])/g, "1ドル2ドル"); //remove \n between opening and closing tags of the same node if no content is between them
 result = result.replace(/(<[^\/>]*)>\n\s*(<[\/][^>]*>)/g, "1ドル />"); //remove \n and join opening with closing tags of the same node to one entire node if no content is between them
 result = result.replace(/(<([a-zA-Z:]+\b)[^>]*>)\n\s*([^<]+)\n\s*(<\/2円>)/g, "1ドル3ドル4ドル"); //remove \n between opening, content and closing tags of the same node (to display in one line)
 return result;
}

Version II - catch also opening and closing tags of the same node as one node (no needed additional fixes at the end):

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<[^>\/]*>[^<>]*<\/[^>]*>|\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m,i)
 {
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match just in case
 if(i<38)
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is or
 m = m.replace(/(<[^\/>]*)><[\/][^>]*>/g, "1ドル />"); //join opening with closing tags of the same node to one entire node if no content is between them
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the match
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(match) from header
 }//anonymous function
 );//replace
 return result;
}

Stack Exchange Network

Formatting XML strings in JavaScript for readability

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Formatting XML strings in JavaScript for readability

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions