6
\$\begingroup\$

I've created a vanilla JS function to indent XML strings so they can be more easily read. It uses some pretty nasty regex...yes, I know it's a cardinal sin for XML/HTML, but it works. For instance, this string...

<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" /></soapenv:Body></soapenv:Envelope>

...would look like this after being passed through the function:

<?xml version='1.0' encoding='UTF-8'?>
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
 <soapenv:Body>
 <ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" />
 </soapenv:Body>
</soapenv:Envelope>

Here's the function itself. What can I do to simplify it?

function formatXML(input) {
 // PART 1: Add \n where necessary
 // A) add \n between sets of angled brackets without content between them
 // B) remove \n between opening and closing tags of the same node if no content is between them
 // C) add \n between a self-closing set of angled brackets and the next set
 // D) split it into an array
 xmlString = input.trim()
 .replace(/>\s*</g,'>\n<') 
 .replace(/(<[^\/>].*>)\n(<[\/])/g,'1ドル2ドル') 
 .replace(/(<\/[^>]+>|<[^>]+\/>)(<[^>]+>)/g,'1ドル\n2ドル'); 
 xmlArr = xmlString.split('\n');
 // PART 2: indent each line appropriately
 var tabs = ''; //store the current indentation
 var start = 0; //starting line
 if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
 for (var i = start; i < xmlArr.length; i++) { //for each line
 var line = xmlArr[i].trim(); //trim it just in case
 if (/^<[/]/.test(line)) { // if the line is a closing tag 
 // remove one tab from the store
 // add the tabs at the beginning of the line
 tabs = tabs.replace(/.$/, '');
 xmlArr[i] = tabs + line; 
 } else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) { // if the line contains an entire node 
 // leave the store as is
 // add the tabs at the beginning of the line
 xmlArr[i] = tabs + line;
 } else { // if the line starts with an opening tag and does not contain an entire node 
 // add the tabs at the beginning of the line
 // and add one tab to the store
 xmlArr[i] = tabs + line; 
 tabs += '\t';
 } 
 }
 //rejoin the array to a string and return it
 return xmlArr.join('\n');
}
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Sep 28, 2016 at 16:22
\$\endgroup\$

2 Answers 2

3
\$\begingroup\$

I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:

<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>

My proposition for this problem is:

1) change spliting code in this way:

.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML

2) little modify third statement else if and add one more:

 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }

So finaly, our function will be:

function formatXML(input,indent)
{
 indent = indent || '\t'; //you can set/define other ident than tabs
 //PART 1: Add \n where necessary
 xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
 xmlString = input
 .replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
 .replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
 .replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
 .replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
 .replace(/\?></, "?>\n<") //detect a header of XML
 xmlArr = xmlString.split('\n'); //split it into an array (for analise each line separately)
 //PART 2: indent each line appropriately
 var tabs = ''; //store the current indentation
 var start = 0; //starting line
 if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
 for (var i = start; i < xmlArr.length; i++) //for each line
 { 
 var line = xmlArr[i].replace(/^\s+|\s+$/g, ''); //trim it (just in case)
 if (/^<[/]/.test(line)) //if the line is a closing tag
 {
 tabs = tabs.replace(indent, ''); //remove one indent from the store
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) //if the line contains an entire node
 {
 //leave the store as is
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 }
 else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
 {
 xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
 tabs += indent; //and add one indent to the store
 }
 else //if the line contain a text node
 {
 xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
 }
 }
 //PART 3: return formatted string (source)
 return xmlArr.join('\n'); //rejoin the array to a string and return it
}
answered Aug 15, 2017 at 8:35
\$\endgroup\$
2
\$\begingroup\$

Another concept is to read the string nodes by nodes and split it on the fly:

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m){ 
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the matche
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(line) from header
 }
 );
 //Additional fixes
//result = result.replace(/(<[^\/>]*>)\n\s*(<[\/])/g, "1ドル2ドル"); //remove \n between opening and closing tags of the same node if no content is between them
 result = result.replace(/(<[^\/>]*)>\n\s*(<[\/][^>]*>)/g, "1ドル />"); //remove \n and join opening with closing tags of the same node to one entire node if no content is between them
 result = result.replace(/(<([a-zA-Z:]+\b)[^>]*>)\n\s*([^<]+)\n\s*(<\/2円>)/g, "1ドル3ドル4ドル"); //remove \n between opening, content and closing tags of the same node (to display in one line)
 return result;
}

Version II - catch also opening and closing tags of the same node as one node (no needed additional fixes at the end):

function XMLTree( xmlString , indent )
{
 indent = indent || "\t"; //can be specified by second argument of the function
 var tabs = ""; //store the current indentation
 var result = xmlString.replace(
 /\s*<[^>\/]*>[^<>]*<\/[^>]*>|\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
 function(m,i)
 {
 m = m.replace(/^\s+|\s+$/g, ""); //trim the match just in case
 if(i<38)
 if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
 if (/^<[/]/.test(m)) //if the match is a closing tag
 {
 tabs = tabs.replace(indent, ""); //remove one indent from the store
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(m)) //if the match contains an entire node
 {
 //leave the store as is or
 m = m.replace(/(<[^\/>]*)><[\/][^>]*>/g, "1ドル />"); //join opening with closing tags of the same node to one entire node if no content is between them
 m = tabs + m; //add the tabs at the beginning of the match
 }
 else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
 {
 m = tabs + m; //add the tabs at the beginning of the match
 tabs += indent; //and add one indent to the store
 }
 else //if the match contain a text node
 {
 m = tabs + m; // add the tabs at the beginning of the match
 }
 //return m+"\n";
 return "\n"+m; //content has additional space(match) from header
 }//anonymous function
 );//replace
 return result;
}
answered Aug 16, 2017 at 9:27
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.