I've created a vanilla JS function to indent XML strings so they can be more easily read. It uses some pretty nasty regex...yes, I know it's a cardinal sin for XML/HTML, but it works. For instance, this string...
<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" /></soapenv:Body></soapenv:Envelope>
...would look like this after being passed through the function:
<?xml version='1.0' encoding='UTF-8'?>
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<soapenv:Body>
<ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" xmlns:ax212="http://course.ws.blackboard/xsd" xmlns:ax211="http://ws.platform.blackboard/xsd" />
</soapenv:Body>
</soapenv:Envelope>
Here's the function itself. What can I do to simplify it?
function formatXML(input) {
// PART 1: Add \n where necessary
// A) add \n between sets of angled brackets without content between them
// B) remove \n between opening and closing tags of the same node if no content is between them
// C) add \n between a self-closing set of angled brackets and the next set
// D) split it into an array
xmlString = input.trim()
.replace(/>\s*</g,'>\n<')
.replace(/(<[^\/>].*>)\n(<[\/])/g,'1ドル2ドル')
.replace(/(<\/[^>]+>|<[^>]+\/>)(<[^>]+>)/g,'1ドル\n2ドル');
xmlArr = xmlString.split('\n');
// PART 2: indent each line appropriately
var tabs = ''; //store the current indentation
var start = 0; //starting line
if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
for (var i = start; i < xmlArr.length; i++) { //for each line
var line = xmlArr[i].trim(); //trim it just in case
if (/^<[/]/.test(line)) { // if the line is a closing tag
// remove one tab from the store
// add the tabs at the beginning of the line
tabs = tabs.replace(/.$/, '');
xmlArr[i] = tabs + line;
} else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) { // if the line contains an entire node
// leave the store as is
// add the tabs at the beginning of the line
xmlArr[i] = tabs + line;
} else { // if the line starts with an opening tag and does not contain an entire node
// add the tabs at the beginning of the line
// and add one tab to the store
xmlArr[i] = tabs + line;
tabs += '\t';
}
}
//rejoin the array to a string and return it
return xmlArr.join('\n');
}
2 Answers 2
I've been looking something like this, nice idea. But Your function couldn't parse text nodes. Try insert some text, example:
<?xml version="1.0" encoding="UTF-8"?><soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>textNode<soapenv:temp>innerText</soapenv:temp><ns:temp><ns:getCourseResponse xmlns:ns="http://course.ws.blackboard" /></ns:temp></soapenv:Body></soapenv:Envelope>
My proposition for this problem is:
1) change spliting code in this way:
.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML
2) little modify third statement else if
and add one more:
else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}
So finaly, our function will be:
function formatXML(input,indent)
{
indent = indent || '\t'; //you can set/define other ident than tabs
//PART 1: Add \n where necessary
xmlString = input.replace(/^\s+|\s+$/g, ''); //trim it (just in case) {method trim() not working in IE8}
xmlString = input
.replace( /(<([a-zA-Z]+\b)[^>]*>)(?!<\/2円>|[\w\s])/g, "1ドル\n" ) //add \n after tag if not followed by the closing tag of pair or text node
.replace( /(<\/[a-zA-Z]+[^>]*>)/g, "1ドル\n") //add \n after closing tag
.replace( />\s+(.+?)\s+<(?!\/)/g, ">\n1ドル\n<") //add \n between sets of angled brackets and text node between them
.replace( />(.+?)<([a-zA-Z])/g, ">\n1ドル\n<2ドル") //add \n between angled brackets and text node between them
.replace(/\?></, "?>\n<") //detect a header of XML
xmlArr = xmlString.split('\n'); //split it into an array (for analise each line separately)
//PART 2: indent each line appropriately
var tabs = ''; //store the current indentation
var start = 0; //starting line
if (/^<[?]xml/.test(xmlArr[0])) start++; //if the first line is a header, ignore it
for (var i = start; i < xmlArr.length; i++) //for each line
{
var line = xmlArr[i].replace(/^\s+|\s+$/g, ''); //trim it (just in case)
if (/^<[/]/.test(line)) //if the line is a closing tag
{
tabs = tabs.replace(indent, ''); //remove one indent from the store
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
}
else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(line)) //if the line contains an entire node
{
//leave the store as is
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
}
else if (/<.*>/.test(line)) //if the line starts with an opening tag and does not contain an entire node
{
xmlArr[i] = tabs + line; //add the tabs at the beginning of the line
tabs += indent; //and add one indent to the store
}
else //if the line contain a text node
{
xmlArr[i] = tabs + line; // add the tabs at the beginning of the line
}
}
//PART 3: return formatted string (source)
return xmlArr.join('\n'); //rejoin the array to a string and return it
}
Another concept is to read the string nodes by nodes and split it on the fly:
function XMLTree( xmlString , indent )
{
indent = indent || "\t"; //can be specified by second argument of the function
var tabs = ""; //store the current indentation
var result = xmlString.replace(
/\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
function(m){
m = m.replace(/^\s+|\s+$/g, ""); //trim the match
if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
if (/^<[/]/.test(m)) //if the match is a closing tag
{
tabs = tabs.replace(indent, ""); //remove one indent from the store
m = tabs + m; //add the tabs at the beginning of the match
}
else if (/<.*[^>]\/>/.test(m)) //if the match contains an entire node
{
//leave the store as is
m = tabs + m; //add the tabs at the beginning of the match
}
else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
{
m = tabs + m; //add the tabs at the beginning of the matche
tabs += indent; //and add one indent to the store
}
else //if the match contain a text node
{
m = tabs + m; // add the tabs at the beginning of the match
}
//return m+"\n";
return "\n"+m; //content has additional space(line) from header
}
);
//Additional fixes
//result = result.replace(/(<[^\/>]*>)\n\s*(<[\/])/g, "1ドル2ドル"); //remove \n between opening and closing tags of the same node if no content is between them
result = result.replace(/(<[^\/>]*)>\n\s*(<[\/][^>]*>)/g, "1ドル />"); //remove \n and join opening with closing tags of the same node to one entire node if no content is between them
result = result.replace(/(<([a-zA-Z:]+\b)[^>]*>)\n\s*([^<]+)\n\s*(<\/2円>)/g, "1ドル3ドル4ドル"); //remove \n between opening, content and closing tags of the same node (to display in one line)
return result;
}
Version II - catch also opening and closing tags of the same node as one node (no needed additional fixes at the end):
function XMLTree( xmlString , indent )
{
indent = indent || "\t"; //can be specified by second argument of the function
var tabs = ""; //store the current indentation
var result = xmlString.replace(
/\s*<[^>\/]*>[^<>]*<\/[^>]*>|\s*<.+?>|\s*[^<]+/g , //pattern to match nodes (angled brackets or text)
function(m,i)
{
m = m.replace(/^\s+|\s+$/g, ""); //trim the match just in case
if(i<38)
if (/^<[?]xml/.test(m)) return m+"\n"; //if the match is a header, ignore it
if (/^<[/]/.test(m)) //if the match is a closing tag
{
tabs = tabs.replace(indent, ""); //remove one indent from the store
m = tabs + m; //add the tabs at the beginning of the match
}
else if (/<.*>.*<\/.*>|<.*[^>]\/>/.test(m)) //if the match contains an entire node
{
//leave the store as is or
m = m.replace(/(<[^\/>]*)><[\/][^>]*>/g, "1ドル />"); //join opening with closing tags of the same node to one entire node if no content is between them
m = tabs + m; //add the tabs at the beginning of the match
}
else if (/<.*>/.test(m)) //if the match starts with an opening tag and does not contain an entire node
{
m = tabs + m; //add the tabs at the beginning of the match
tabs += indent; //and add one indent to the store
}
else //if the match contain a text node
{
m = tabs + m; // add the tabs at the beginning of the match
}
//return m+"\n";
return "\n"+m; //content has additional space(match) from header
}//anonymous function
);//replace
return result;
}