2
\$\begingroup\$

I made a basic XML parser supposed to be work in many browsers by using most older functions. What it must do is return an AST (Abstract Syntax Tree) of a XML text in a string.

(it's not cool for HTML because it doesn't recognize names of elements like input, div, etc...)

The following string:

"<!DOCTYPE html><html><Item1 Attribute1="Value"/></html>"

Turns into the following representation array when used in first parameter of call XMLAST:

[
 {
 type:"List",
 name:"!DOCTYPE",
 attributes=[
 {
 name:"html",
 value:true
 }
 ]
 },
 {
 type:"List",
 name:"html"
 },
 {
 type:"Item",
 name:"Item1",
 attributes:[
 {
 name:"Attribute1",
 value:"Value"
 }
 ]
 },
 {
 type:"End"
 }
]

Both interpreting styles. I removed regular expressions because I care about the support, then just made a loop to remove the unallowed characters from tags or attributes name.

XMLAST=function(text){
 var tree,
 expect,
 textend,
 reserve;
 tree=[];
 expect={};
 reserve={};
 reserve.path=-1;
 reserve.textf="";
 reserve.allowed=[
 "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z",
 "1","2","3","4","5","6","7","8","9","0","-","@","!","#",":",
 "A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"
 ];
 textend=function(){
 if(expect.textend){
 if(expect.tag){
 if(expect.tagname){
 reserve.length=reserve.textf.length;
 reserve.tagname="";
 for(reserve.i=0;reserve.length>reserve.i;reserve.i++){
 reserve.cchar=reserve.textf.charAt(reserve.i);
 for(reserve.b=0;67>reserve.b;reserve.b++){
 if(reserve.cchar===reserve.allowed[reserve.b]){
 reserve.tagname+=reserve.cchar;
 break
 }
 }
 }
 expect.tagname=false;
 if(reserve.curchar===">"){
 tree.push({
 type:"List",
 name:reserve.tagname,
 attributes:[]
 });
 expect.tag=false;
 reserve.path++
 }else if(reserve.curchar==="/"){
 tree.push({
 type:"Item",
 name:reserve.tagname,
 attributes:[]
 });
 expect.tag=false;
 reserve.path++;
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex===-1){
 text=""
 }else{
 text=text.substring(reserve.closureindex)
 }
 expect.tag=false
 }
 }else if(expect.closuretagname){
 expect.closuretagname=false;
 expect.tag=false;
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex!==-1){
 text=text.substring(reserve.closureindex);
 reserve.parentitem={
 type:null
 };
 reserve.expectend=0;
 //Parent item (list) of current item search.
 for(reserve.i=reserve.path;reserve.i>-1;reserve.i--){
 if(tree[reserve.i].type==="List"){
 if(reserve.expectend===0){
 reserve.parentitem=tree[reserve.i];
 break
 }else{
 reserve.expectend--
 }
 }else if(tree[reserve.i].type==="End"){
 reserve.expectend++
 }
 }
 reserve.expectend=null;
 if(reserve.textf===reserve.parentitem.name){
 tree.push({
 type:"End"
 });
 reserve.path++;
 expect.tag=false
 }
 reserve.parentitem=null
 }
 }else{
 /*
 ** ATTRIBUTE DETECTED **
 ** NAME FORWARD **
 */
 reserve.length=reserve.textf.length;
 reserve.attrname="";
 for(reserve.i=0;reserve.length>reserve.i;reserve.i++){
 reserve.cchar=reserve.textf.charAt(reserve.i);
 for(reserve.b=0;67>reserve.b;reserve.b++){
 if(reserve.cchar===reserve.allowed[reserve.b]){
 reserve.attrname+=reserve.cchar;
 break
 }
 }
 }
 reserve.attributes.push({
 name:reserve.attrname,
 value:true
 });
 if(reserve.curchar==="="){
 /*
 * CHECK IF ASSIGN VALUE IN QUOTES *
 */
 reserve.valueindex=text.charAt(1);
 if(reserve.valueindex==='"'||reserve.valueindex==="'"){
 reserve.valuestart=text.substring(2);
 if((reserve.valuendindex=reserve.valuestart.indexOf(reserve.valueindex))===-1){
 text="";
 expect.tag=false
 }else{
 reserve.attributes[reserve.attributes.length-1].value=reserve.valuestart.substring(0,reserve.valuendindex);
 text=text.substring(2);
 text=text.substring(reserve.valuendindex)
 }
 }
 }else if(reserve.curchar===">"||reserve.curchar==="/"){
 tree.push({
 type:(reserve.curchar==="/"?"Item":"List"),
 name:reserve.tagname,
 attributes:reserve.attributes
 });
 reserve.path++;
 expect.tag=false;
 if(reserve.curchar==="/"){
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex!==-1){
 text=text.substring(reserve.closureindex);
 reserve.closureindex=null
 }
 }
 }
 }
 }else{//Add text as a tree part.
 tree.push({
 type:"Text",
 value:reserve.textf
 });
 reserve.path++
 }
 }else if(expect.tag){
 if(expect.tagname){
 expect.tag=false
 }else if(reserve.curchar==="/"){
 tree.push({
 type:"Item",
 name:reserve.tagname,
 attributes:reserve.attributes
 });
 reserve.path++;
 expect.tag=false;
 text=text.substring(1);
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex!==-1){
 text=text.substring(reserve.closureindex)
 }
 }else if(reserve.curchar===">"){
 tree.push({
 type:"List",
 name:reserve.tagname,
 attributes:reserve.attributes
 });
 expect.tag=false;
 reserve.path++
 }
 }
 expect.textend=false
 };
 for(;;){
 reserve.curchar=text.charAt(0);
 if(reserve.curchar===""){//End of all. Ignore unfinished rest.
 textend();
 break
 }else if(reserve.curchar==="<"){//Start element.
 textend();
 expect.closuretagname=false;
 if(text.substring(1,4)==="!--"){//comment
 /*
 <!--* COMMENT *-->
 */
 expect.tag=false;
 reserve.closureindex=text.indexOf("-->");
 tree.push({
 type:"Comment",
 value:(
 reserve.closureindex===-1?//Has closure?
 text.substring(4,text.length)//Takes to the end.
 ://Has closure!
 text.substring(4,reserve.closureindex)//Takes to the index of -->.
 )
 });
 reserve.path++;
 if(reserve.closureindex!==-1){//Is not it infinite?
 text=text.substring(reserve.closureindex+2)
 }else{
 break//Stop because comment is infinite.
 }
 }else{
 /*
 <* TAG START *...
 */
 expect.tag=true;
 expect.tagname=true;
 //Expect for item end and name.
 reserve.attributes=[];//Reset attributes.
 reserve.tagname="";//Reset tag name.
 }
 }else if(expect.tag&&(reserve.curchar==="/"||reserve.curchar===">"||reserve.curchar==="="||reserve.curchar===" "||reserve.curchar===" ")){
 if(reserve.curchar==="/"){
 if(expect.tagname&&!expect.textend){
 expect.closuretagname=true;
 expect.tagname=false;
 expect.textend=false
 }else if(expect.tag){//closure <.../*>
 textend()
 }
 }else if(reserve.curchar===">"||reserve.curchar==="="||reserve.curchar===" "||reserve.curchar===" "){
 textend()
 }
 }else{
 if(expect.textend){
 reserve.textf+=reserve.curchar
 }else{
 reserve.textf=reserve.curchar;
 expect.textend=true
 }
 }
 text=text.substring(1)
 };
 expect=null;
 textend=null;
 text=null;
 reserve=null;
 return tree
}

What can be improved?

asked Mar 14, 2016 at 2:45
\$\endgroup\$

1 Answer 1

2
+50
\$\begingroup\$
 expect.tag=false;
 reserve.path++;
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex===-1){
 text=""
 }else{
 text=text.substring(reserve.closureindex)
 }
 expect.tag=false

Here you've got a duplicate setting of expect.tag.

 expect.tag=false;
 reserve.closureindex=text.indexOf(">");
 if(reserve.closureindex!==-1){
 text=text.substring(reserve.closureindex);
 reserve.parentitem={
 type:null
 };
 reserve.expectend=0;
 //Parent item (list) of current item search.
 for(reserve.i=reserve.path;reserve.i>-1;reserve.i--){
 if(tree[reserve.i].type==="List"){
 if(reserve.expectend===0){
 reserve.parentitem=tree[reserve.i];
 break
 }else{
 reserve.expectend--
 }
 }else if(tree[reserve.i].type==="End"){
 reserve.expectend++
 }
 }
 reserve.expectend=null;
 if(reserve.textf===reserve.parentitem.name){
 tree.push({
 type:"End"
 });
 reserve.path++;
 expect.tag=false
 }
 reserve.parentitem=null
 }

Same in here.


expect=null;
textend=null;
text=null;
reserve=null;
return tree

There is no need to set everything to null; When you return the tree the other variables are no longer in scope and will be deleted.

 reserve.tagname="";
 for(reserve.i=0;reserve.length>reserve.i;reserve.i++){
 reserve.cchar=reserve.textf.charAt(reserve.i);
 for(reserve.b=0;67>reserve.b;reserve.b++){
 if(reserve.cchar===reserve.allowed[reserve.b]){
 reserve.tagname+=reserve.cchar;
 break
 }
 }
 }

This code, at the start of the textend function, and this code,

 reserve.attrname="";
 for(reserve.i=0;reserve.length>reserve.i;reserve.i++){
 reserve.cchar=reserve.textf.charAt(reserve.i);
 for(reserve.b=0;67>reserve.b;reserve.b++){
 if(reserve.cchar===reserve.allowed[reserve.b]){
 reserve.attrname+=reserve.cchar;
 break
 }
 }
 }

somewhere half way of that function, do pretty much the same. You should wrap it in a function.

answered Mar 18, 2016 at 14:11
\$\endgroup\$
1
  • 1
    \$\begingroup\$ I made this loop to remove unallowed characters into a function called removeunallowed, better, right? \$\endgroup\$ Commented Mar 18, 2016 at 14:29

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.