[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.38

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This module implements an HTStream object. To parse an
1.1 timbl 8: **   SGML file, create this object which is a parser. The object
1.2 timbl 9: **   is (currently) created by being passed a DTD structure,
 10: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **   
1.19 duns 12: **   6 Feb 93    Binary seraches used. Intreface modified.
 13: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 14: */
 15: 
1.25 frystyk 16: /* Library include files */
1.38 ! frystyk 17: #include "sysdep.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22: 
1.2 timbl 23: #define INVALID (-1)
 24: 
1.1 timbl 25: /*   The State (context) of the parser
 26: **
1.2 timbl 27: **   This is passed with each call to make the parser reentrant
1.1 timbl 28: **
 29: */
 30: 
1.16 frystyk 31: 
1.2 timbl 32: 
 33:    
 34: /*       Element Stack
 35: **       -------------
 36: **   This allows us to return down the stack reselcting styles.
 37: **   As we return, attribute values will be garbage in general.
 38: */
 39: typedef struct _HTElement HTElement;
 40: struct _HTElement {
 41:    HTElement *   next;  /* Previously nested element or 0 */
 42:    HTTag*     tag;  /* The tag at this level */
 43: };
 44: 
 45: 
1.21 frystyk 46: typedef enum _sgml_state {
 47:   S_text, S_literal, S_tag, S_tag_gap, 
 48:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 49:   S_nl, S_nl_tago,
 50:   S_ero, S_cro,
 51: #ifdef ISO_2022_JP
 52:   S_esc, S_dollar, S_paren, S_nonascii_text,
 53: #endif
 54:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 55: } sgml_state;
 56: 
 57: 
1.2 timbl 58: /*   Internal Context Data Structure
 59: **   -------------------------------
 60: */
 61: struct _HTStream {
 62: 
1.38 ! frystyk 63:   const HTStreamClass *   isa;      /* inherited from HTStream */
1.2 timbl 64:   
1.38 ! frystyk 65:   const SGML_dtd       *dtd;
1.2 timbl 66:   HTStructuredClass *actions;    /* target class */
 67:   HTStructured    *target;    /* target object */
 68: 
1.1 timbl 69:   HTTag       *current_tag;
1.2 timbl 70:   int        current_attribute_number;
1.1 timbl 71:   HTChunk      *string;
 72:   HTElement     *element_stack;
1.21 frystyk 73:   sgml_state     state;
1.2 timbl 74:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 75:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 76: } ;
 77: 
 78: 
 79: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 80: 
1.1 timbl 81: 
1.17 timbl 82: /*   Find Attribute Number
 83: **   ---------------------
 84: */
 85: 
1.38 ! frystyk 86: PUBLIC int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 87: {
 88:   attr* attributes = tag->attributes;
 89: 
 90:   int high, low, i, diff;      /* Binary search for attribute name */
 91:   for(low=0, high=tag->number_of_attributes;
 92:        high > low ;
 93:        diff < 0 ? (low = i+1) : (high = i) ) {
 94:    i = (low + (high-low)/2);
 95:    diff = strcasecomp(attributes[i].name, s);
 96:    if (diff==0) return i;         /* success: found it */
 97:   } /* for */
 98:   
 99:   return -1;
 100: }
 101: 
1.1 timbl 102: 
 103: /*   Handle Attribute
 104: **   ----------------
 105: */
1.38 ! frystyk 106: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 107: 
1.38 ! frystyk 108: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 109: {
1.2 timbl 110: 
 111:   HTTag * tag = context->current_tag;
 112: 
1.17 timbl 113:   int i = SGMLFindAttribute(tag, s);
 114:   if (i>=0) {
 115:    context->current_attribute_number = i;
 116:    context->present[i] = YES;
 117:    if (context->value[i]) {
1.36 frystyk 118:      HT_FREE(context->value[i]);
1.17 timbl 119:      context->value[i] = NULL;
 120:    }
 121:    return;
 122:   } /* if */
1.2 timbl 123:    
1.20 frystyk 124:   if (SGML_TRACE)
1.37 eric 125:    HTTrace("SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 126:      s, context->current_tag->name);
 127:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 128: }
 129: 
 130: 
 131: /*   Handle attribute value
 132: **   ----------------------
 133: */
1.38 ! frystyk 134: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 135: {
1.2 timbl 136:   if (context->current_attribute_number != INVALID) {
 137:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 138:   } else {
1.37 eric 139:     if (SGML_TRACE) HTTrace("SGML: Attribute value %s ignored\n", s);
1.1 timbl 140:   }
1.2 timbl 141:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 142: }
 143: 
1.2 timbl 144: 
1.1 timbl 145: /*   Handle entity
 146: **   -------------
 147: **
 148: ** On entry,
 149: **   s    contains the entity name zero terminated
 150: ** Bugs:
 151: **   If the entity name is unknown, the terminator is treated as
 152: **   a printable non-special character in all cases, even if it is '<'
 153: */
1.31 frystyk 154: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 155: {
1.2 timbl 156: 
1.38 ! frystyk 157:   const char ** entities = context->dtd->entity_names;
 ! 158:   const char *s = context->string->data;
1.2 timbl 159:   
 160:   int high, low, i, diff;
 161:   for(low=0, high = context->dtd->number_of_entities;
 162:        high > low ;
 163:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 164:    i = (low + (high-low)/2);
 165:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 166:    if (diff==0) {         /* success: found it */
 167:      (*context->actions->put_entity)(context->target, i);
 168:      return;
1.1 timbl 169:    }
 170:   }
 171:   /* If entity string not found, display as text */
1.20 frystyk 172:   if (SGML_TRACE)
1.37 eric 173:    HTTrace("SGML: Unknown entity %s\n", s); 
1.2 timbl 174:   PUTC('&');
1.1 timbl 175:   {
1.38 ! frystyk 176:    const char *p;
1.1 timbl 177:    for (p=s; *p; p++) {
1.2 timbl 178:      PUTC(*p);
1.1 timbl 179:    }
 180:   }
1.2 timbl 181:   PUTC(term);
1.1 timbl 182: }
 183: 
1.35 frystyk 184: /*
 185: **   Helper function to check if the tag is on the stack
 186: */
 187: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
 188: {
 189:   HTElement* elem;
 190:   for (elem = stack; elem != NULL; elem = elem->next)
 191:   {
 192:     if (elem->tag == tag) return YES;
 193:   }
 194:   return NO;
 195: }
1.2 timbl 196: 
1.1 timbl 197: /*   End element
1.2 timbl 198: **   -----------
1.1 timbl 199: */
1.31 frystyk 200: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 201: {
1.37 eric 202:   if (SGML_TRACE) HTTrace("SGML: End  </%s>\n", old_tag->name);
1.2 timbl 203:   if (old_tag->contents == SGML_EMPTY) {
1.37 eric 204:     if (SGML_TRACE) HTTrace("SGML: Illegal end tag </%s> found.\n",
1.1 timbl 205:        old_tag->name);
 206:    return;
 207:   }
 208:   while (context->element_stack)   {/* Loop is error path only */
 209:    HTElement * N = context->element_stack;
 210:    HTTag * t = N->tag;
 211:    
 212:    if (old_tag != t) {       /* Mismatch: syntax error */
1.35 frystyk 213:      /*
 214:      ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
 215:      ** See explanation in ../User/Patch/lib_4.0_1.fix
 216:      */
 217:       if (context->element_stack->next  /* This is not the last level */
 218:        && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 219:        if (SGML_TRACE) HTTrace(
1.1 timbl 220:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 221:          old_tag->name, t->name, t->name);
 222:      } else {          /* last level */
1.37 eric 223:        if (SGML_TRACE) HTTrace(
1.1 timbl 224:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 225:          old_tag->name, t->name, old_tag->name);
 226:        return;         /* Ignore */
 227:      }
 228:    }
 229:    
 230:    context->element_stack = N->next;        /* Remove from stack */
1.36 frystyk 231:    HT_FREE(N);
1.2 timbl 232:    (*context->actions->end_element)(context->target,
 233:         t - context->dtd->tags);
1.1 timbl 234:    if (old_tag == t) return; /* Correct sequence */
 235:    
 236:    /* Syntax error path only */
 237:    
 238:   }
1.37 eric 239:   if (SGML_TRACE) HTTrace(
1.1 timbl 240:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 241: }
 242: 
 243: 
1.17 timbl 244: /*   Start an element
 245: **   ----------------
1.1 timbl 246: */
1.31 frystyk 247: PRIVATE void start_element (HTStream * context)
1.1 timbl 248: {
 249:   HTTag * new_tag = context->current_tag;
 250:   
1.37 eric 251:   if (SGML_TRACE) HTTrace("SGML: Start <%s>\n", new_tag->name);
1.2 timbl 252:   (*context->actions->start_element)(
 253:    context->target,
 254:    new_tag - context->dtd->tags,
 255:    context->present,
1.38 ! frystyk 256:    (const char**) context->value); /* coerce type for think c */
1.2 timbl 257:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.36 frystyk 258:    HTElement * N;
 259:    if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
 260:      HT_OUTOFMEM("start_element");
1.1 timbl 261:    N->next = context->element_stack;
 262:    N->tag = new_tag;
 263:    context->element_stack = N;
 264:   }
 265: }
 266: 
 267: 
1.2 timbl 268: /*       Find Tag in DTD tag list
 269: **       ------------------------
1.1 timbl 270: **
 271: ** On entry,
1.2 timbl 272: **   dtd   points to dtd structire including valid tag list
 273: **   string points to name of tag in question
1.1 timbl 274: **
1.2 timbl 275: ** On exit,
 276: **   returns:
1.7 timbl 277: **       NULL      tag not found
 278: **       else      address of tag structure in dtd
1.2 timbl 279: */
1.38 ! frystyk 280: PUBLIC HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 281: {
 282:   int high, low, i, diff;
 283:   for(low=0, high=dtd->number_of_tags;
 284:        high > low ;
 285:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 286:    i = (low + (high-low)/2);
1.3 timbl 287:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 288:    if (diff==0) {         /* success: found it */
1.7 timbl 289:      return &dtd->tags[i];
1.2 timbl 290:    }
 291:   }
1.7 timbl 292:   return NULL;
1.2 timbl 293: }
 294: 
 295: /*________________________________________________________________________
 296: **           Public Methods
1.1 timbl 297: */
 298: 
1.2 timbl 299: 
 300: /*   Could check that we are back to bottom of stack! @@ */
1.31 frystyk 301: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 302: {
 303:   while (context->element_stack) {
 304:    HTElement *ptr = context->element_stack;
 305:    if (SGML_TRACE)
1.37 eric 306:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 307:          context->element_stack->tag->name);
 308:    context->element_stack = ptr->next;
1.36 frystyk 309:    HT_FREE(ptr);
1.26 frystyk 310:   }
 311:   return (*context->actions->flush)(context->target);
 312: }
1.1 timbl 313: 
1.31 frystyk 314: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 315: {
1.26 frystyk 316:   int status;
1.14 frystyk 317:   int cnt;
1.15 frystyk 318:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 319:    HTElement *ptr = context->element_stack;
 320: 
1.26 frystyk 321:    if (SGML_TRACE)
1.37 eric 322:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 323:          context->element_stack->tag->name);
1.15 frystyk 324:    context->element_stack = ptr->next;
1.36 frystyk 325:    HT_FREE(ptr);
1.15 frystyk 326:   }
1.26 frystyk 327:   if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 328:    return status;
1.33 frystyk 329:   HTChunk_delete(context->string);
1.15 frystyk 330:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 331:    if(context->value[cnt])
1.36 frystyk 332:      HT_FREE(context->value[cnt]);
 333:   HT_FREE(context);
1.26 frystyk 334:   return HT_OK;
1.1 timbl 335: }
 336: 
1.31 frystyk 337: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 338: {
1.14 frystyk 339:   int cnt;
1.15 frystyk 340:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 341:    HTElement *ptr = context->element_stack;
1.26 frystyk 342:    if (SGML_TRACE)
1.37 eric 343:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 344:          context->element_stack->tag->name);
1.15 frystyk 345:    context->element_stack = ptr->next;
1.36 frystyk 346:    HT_FREE(ptr);
1.15 frystyk 347:   }
1.8 timbl 348:   (*context->actions->abort)(context->target, e);
1.33 frystyk 349:   HTChunk_delete(context->string);
1.14 frystyk 350:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 351:    if(context->value[cnt])
1.36 frystyk 352:      HT_FREE(context->value[cnt]);
 353:   HT_FREE(context);
1.26 frystyk 354:   return HT_ERROR;
1.1 timbl 355: }
 356: 
1.31 frystyk 357: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 358: 
 359: {
1.38 ! frystyk 360:   const SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 361:   HTChunk  *string =    context->string;
 362: 
 363:   switch(context->state) {
1.18 timbl 364:   
 365:   case S_after_open: /* Strip one trainling newline
 366:            only after opening nonempty element. - SGML:Ugh! */
 367:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 368:      break;
 369:    }
 370:    context->state = S_text;
 371:    goto normal_text;
 372:    /* (***falls through***) */
 373:    
1.1 timbl 374:   case S_text:
1.18 timbl 375: normal_text:
 376: 
1.13 timbl 377: #ifdef ISO_2022_JP
 378:    if (c=='033円') {
 379:      context->state = S_esc;
 380:      PUTC(c);
 381:      break;
 382:    }
 383: #endif /* ISO_2022_JP */
1.6 timbl 384:    if (c=='&' && (!context->element_stack || (
 385:             context->element_stack->tag &&
 386:             ( context->element_stack->tag->contents == SGML_MIXED
 387:              || context->element_stack->tag->contents ==
 388:                             SGML_RCDATA)
 389:            ))) {
1.1 timbl 390:      string->size = 0;
 391:      context->state = S_ero;
 392:      
 393:    } else if (c=='<') {
 394:      string->size = 0;
 395:      context->state = (context->element_stack &&
1.13 timbl 396:        context->element_stack->tag &&
 397:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 398:                S_literal : S_tag;
1.18 timbl 399:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 400:      context->state = S_nl;
1.2 timbl 401:    } else PUTC(c);
1.1 timbl 402:    break;
1.13 timbl 403: 
1.18 timbl 404:   case S_nl:
 405:     if (c=='<') {
 406:      string->size = 0;
 407:      context->state = (context->element_stack &&
 408:        context->element_stack->tag &&
 409:        context->element_stack->tag->contents == SGML_LITERAL) ?
 410:                S_literal : S_nl_tago;
 411:    } else {
 412:      PUTC('\n');
 413:      context->state = S_text;
 414:      goto normal_text;
 415:    }
 416:    break;
 417: 
 418:   case S_nl_tago:      /* Had newline and tag opener */
 419:     if (c != '/') {
 420:      PUTC('\n');     /* Only ignore newline before </ */
 421:    }
 422:    context->state = S_tag;
 423:    goto handle_S_tag;
 424: 
1.13 timbl 425: #ifdef ISO_2022_JP
 426:   case S_esc:
 427:    if (c=='$') {
 428:      context->state = S_dollar;
 429:    } else if (c=='(') {
 430:      context->state = S_paren;
 431:    } else {
 432:      context->state = S_text;
 433:    }
 434:    PUTC(c);
 435:    break;
 436:   case S_dollar:
 437:    if (c=='@' || c=='B') {
 438:      context->state = S_nonascii_text;
 439:    } else {
 440:      context->state = S_text;
 441:    }
 442:    PUTC(c);
 443:    break;
 444:   case S_paren:
 445:    if (c=='B' || c=='J') {
 446:      context->state = S_text;
 447:    } else {
 448:      context->state = S_text;
 449:    }
 450:    PUTC(c);
 451:    break;
 452:   case S_nonascii_text:
 453:    if (c=='033円') {
 454:      context->state = S_esc;
 455:      PUTC(c);
 456:    } else {
 457:      PUTC(c);
 458:    }
 459:    break;
 460: #endif /* ISO_2022_JP */
1.1 timbl 461: 
1.12 timbl 462: /*   In literal mode, waits only for specific end tag!
1.2 timbl 463: **   Only foir compatibility with old servers.
1.1 timbl 464: */
1.12 timbl 465:   case S_literal :
1.33 frystyk 466:    HTChunk_putc(string, c);
1.1 timbl 467:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 468:        : context->element_stack->tag->name[string->size-2])) {
 469:      int i;
 470:      
1.12 timbl 471:      /* If complete match, end literal */
1.1 timbl 472:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 473:        end_element(context, context->element_stack->tag);
 474:        string->size = 0;
1.2 timbl 475:        context->current_attribute_number = INVALID;
1.1 timbl 476:        context->state = S_text;
 477:        break;
 478:      }      /* If Mismatch: recover string. */
1.2 timbl 479:      PUTC( '<');
1.1 timbl 480:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 481:        PUTC(
1.1 timbl 482:                       string->data[i]);
 483:      context->state = S_text;  
 484:    }
 485:    
 486:     break;
 487: 
 488: /*   Character reference or Entity
 489: */
 490:  case S_ero:
 491:    if (c=='#') {
 492:      context->state = S_cro; /*  &# is Char Ref Open */ 
 493:      break;
 494:    }
 495:    context->state = S_entity;  /* Fall through! */
 496:    
 497: /*   Handle Entities
 498: */
 499:   case S_entity:
 500:    if (isalnum(c))
1.33 frystyk 501:      HTChunk_putc(string, c);
1.1 timbl 502:    else {
1.33 frystyk 503:      HTChunk_terminate(string);
1.1 timbl 504:      handle_entity(context, c);
 505:      context->state = S_text;
 506:    }
 507:    break;
 508: 
 509: /*   Character reference
 510: */
 511:   case S_cro:
 512:    if (isalnum(c))
1.33 frystyk 513:      HTChunk_putc(string, c);  /* accumulate a character NUMBER */
1.1 timbl 514:    else {
 515:      int value;
1.33 frystyk 516:      HTChunk_terminate(string);
1.1 timbl 517:      if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 518:        PUTC((char) value);
1.1 timbl 519:      context->state = S_text;
 520:    }
 521:    break;
 522: 
 523: /*       Tag
 524: */     
 525:   case S_tag:                /* new tag */
1.18 timbl 526: handle_S_tag:
 527: 
1.1 timbl 528:    if (isalnum(c))
1.33 frystyk 529:      HTChunk_putc(string, c);
1.1 timbl 530:    else {             /* End of tag name */
1.7 timbl 531:      HTTag * t;
1.1 timbl 532:      if (c=='/') {
1.20 frystyk 533:        if (SGML_TRACE) if (string->size!=0)
1.37 eric 534:          HTTrace("SGML: `<%s/' found!\n", string->data);
1.1 timbl 535:        context->state = S_end;
 536:        break;
 537:      }
1.33 frystyk 538:      HTChunk_terminate(string) ;
1.2 timbl 539: 
1.10 timbl 540:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 541:      if (!t) {
1.37 eric 542:        if(SGML_TRACE) HTTrace("SGML: *** Unknown element %s\n",
1.1 timbl 543:            string->data);
 544:        context->state = (c=='>') ? S_text : S_junk_tag;
 545:        break;
 546:      }
1.7 timbl 547:      context->current_tag = t;
1.2 timbl 548:      
 549:      /* Clear out attributes
 550:      */
1.1 timbl 551:      
1.2 timbl 552:      {
 553:        int i;
 554:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 555:          context->present[i] = NO;
1.1 timbl 556:      }
 557:      string->size = 0;
1.2 timbl 558:      context->current_attribute_number = INVALID;
1.1 timbl 559:      
 560:      if (c=='>') {
 561:        if (context->current_tag->name) start_element(context);
1.18 timbl 562:        context->state = S_after_open;
1.1 timbl 563:      } else {
 564:        context->state = S_tag_gap;
 565:      }
 566:    }
 567:    break;
 568: 
 569:        
 570:   case S_tag_gap:      /* Expecting attribute or > */
 571:    if (WHITE(c)) break;  /* Gap between attributes */
 572:    if (c=='>') {      /* End of tag */
 573:      if (context->current_tag->name) start_element(context);
1.18 timbl 574:      context->state = S_after_open;
1.1 timbl 575:      break;
 576:    }
1.33 frystyk 577:    HTChunk_putc(string, c);
1.1 timbl 578:    context->state = S_attr;        /* Get attribute */
 579:    break;
 580:    
 581:                /* accumulating value */
 582:   case S_attr:
 583:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
1.33 frystyk 584:      HTChunk_terminate(string) ;
1.1 timbl 585:      handle_attribute_name(context, string->data);
 586:      string->size = 0;
 587:      if (c=='>') {        /* End of tag */
 588:        if (context->current_tag->name) start_element(context);
1.18 timbl 589:        context->state = S_after_open;
1.1 timbl 590:        break;
 591:      }
 592:      context->state = (c=='=' ? S_equals: S_attr_gap);
 593:    } else {
1.33 frystyk 594:      HTChunk_putc(string, c);
1.1 timbl 595:    }
 596:    break;
 597:        
 598:   case S_attr_gap:      /* Expecting attribute or = or > */
 599:    if (WHITE(c)) break;  /* Gap after attribute */
 600:    if (c=='>') {      /* End of tag */
 601:      if (context->current_tag->name) start_element(context);
1.18 timbl 602:      context->state = S_after_open;
1.1 timbl 603:      break;
 604:    } else if (c=='=') {
 605:      context->state = S_equals;
 606:      break;
 607:    }
1.33 frystyk 608:    HTChunk_putc(string, c);
1.1 timbl 609:    context->state = S_attr;        /* Get next attribute */
 610:    break;
 611:    
 612:   case S_equals:           /* After attr = */ 
 613:    if (WHITE(c)) break;  /* Before attribute value */
 614:    if (c=='>') {      /* End of tag */
1.37 eric 615:      if (SGML_TRACE) HTTrace("SGML: found = but no value\n");
1.1 timbl 616:      if (context->current_tag->name) start_element(context);
1.18 timbl 617:      context->state = S_after_open;
1.1 timbl 618:      break;
 619:      
 620:    } else if (c=='\'') {
 621:      context->state = S_squoted;
 622:      break;
 623: 
 624:    } else if (c=='"') {
 625:      context->state = S_dquoted;
 626:      break;
 627:    }
1.33 frystyk 628:    HTChunk_putc(string, c);
1.1 timbl 629:    context->state = S_value;
 630:    break;
 631:    
 632:   case S_value:
 633:    if (WHITE(c) || (c=='>')) {       /* End of word */
1.33 frystyk 634:      HTChunk_terminate(string) ;
1.1 timbl 635:      handle_attribute_value(context, string->data);
 636:      string->size = 0;
 637:      if (c=='>') {        /* End of tag */
 638:        if (context->current_tag->name) start_element(context);
1.18 timbl 639:        context->state = S_after_open;
1.1 timbl 640:        break;
 641:      }
 642:      else context->state = S_tag_gap;
 643:    } else {
1.33 frystyk 644:      HTChunk_putc(string, c);
1.1 timbl 645:    }
 646:    break;
 647:        
 648:   case S_squoted:      /* Quoted attribute value */
 649:    if (c=='\'') {     /* End of attribute value */
1.33 frystyk 650:      HTChunk_terminate(string) ;
1.1 timbl 651:      handle_attribute_value(context, string->data);
 652:      string->size = 0;
 653:      context->state = S_tag_gap;
 654:    } else {
1.33 frystyk 655:      HTChunk_putc(string, c);
1.1 timbl 656:    }
 657:    break;
 658:    
 659:   case S_dquoted:      /* Quoted attribute value */
 660:    if (c=='"') {      /* End of attribute value */
1.33 frystyk 661:      HTChunk_terminate(string) ;
1.1 timbl 662:      handle_attribute_value(context, string->data);
 663:      string->size = 0;
 664:      context->state = S_tag_gap;
 665:    } else {
1.33 frystyk 666:      HTChunk_putc(string, c);
1.1 timbl 667:    }
 668:    break;
 669:    
 670:   case S_end:                    /* </ */
 671:    if (isalnum(c))
1.33 frystyk 672:      HTChunk_putc(string, c);
1.1 timbl 673:    else {             /* End of end tag name */
1.7 timbl 674:      HTTag * t;
1.33 frystyk 675:      HTChunk_terminate(string) ;
1.7 timbl 676:      if (!*string->data) {    /* Empty end tag */
 677:        t = context->element_stack->tag;
 678:      } else {
1.10 timbl 679:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 680:      }
1.7 timbl 681:      if (!t) {
1.37 eric 682:        if(SGML_TRACE) HTTrace(
1.1 timbl 683:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 684:      } else {
1.7 timbl 685:        context->current_tag = t;
1.2 timbl 686:        end_element( context, context->current_tag);
1.1 timbl 687:      }
1.2 timbl 688: 
1.1 timbl 689:      string->size = 0;
1.2 timbl 690:      context->current_attribute_number = INVALID;
1.7 timbl 691:      if (c!='>') {
1.20 frystyk 692:        if (SGML_TRACE && !WHITE(c))
1.37 eric 693:          HTTrace("SGML: `</%s%c' found!\n",
1.7 timbl 694:            string->data, c);
 695:        context->state = S_junk_tag;
 696:      } else {
 697:        context->state = S_text;
 698:      }
1.1 timbl 699:    }
 700:    break;
 701: 
 702:        
 703:   case S_junk_tag:
 704:    if (c=='>') {
 705:      context->state = S_text;
 706:    }
 707:   } /* switch on context->state */
1.26 frystyk 708:   return HT_OK;
 709: }
1.2 timbl 710: 
 711: 
1.38 ! frystyk 712: PUBLIC int SGML_string (HTStream * context, const char* s)
1.2 timbl 713: {
1.26 frystyk 714:   while (*s)
 715:     SGML_character(context, *s++);
 716:   return HT_OK;
1.2 timbl 717: }
 718: 
 719: 
1.38 ! frystyk 720: PUBLIC int SGML_write (HTStream * context, const char* b, int l)
1.2 timbl 721: {
1.26 frystyk 722:   while (l-- > 0)
 723:     SGML_character(context, *b++);
 724:   return HT_OK;
1.2 timbl 725: }
 726: 
 727: /*_______________________________________________________________________
 728: */
 729: 
 730: /*   Structured Object Class
 731: **   -----------------------
 732: */
1.38 ! frystyk 733: PRIVATE const HTStreamClass SGMLParser = 
1.2 timbl 734: {       
1.32 frystyk 735:   "SGMLParser",
 736:   SGML_flush,
 737:   SGML_free,
 738:   SGML_abort,
 739:   SGML_character, 
 740:   SGML_string,
 741:   SGML_write,
1.2 timbl 742: }; 
 743: 
 744: /*   Create SGML Engine
 745: **   ------------------
 746: **
 747: ** On entry,
 748: **   dtd       represents the DTD, along with
 749: **   actions     is the sink for the data as a set of routines.
 750: **
 751: */
1.38 ! frystyk 752: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 753: {
 754:   int i;
1.36 frystyk 755:   HTStream* context;
 756:   if ((context = (HTStream *) HT_MALLOC(sizeof(*context))) == NULL)
 757:     HT_OUTOFMEM("SGML_begin");
1.2 timbl 758: 
 759:   context->isa = &SGMLParser;
1.33 frystyk 760:   context->string = HTChunk_new(128);    /* Grow by this much */
1.2 timbl 761:   context->dtd = dtd;
 762:   context->target = target;
 763:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 764:                    /* Ugh: no OO */
 765:   context->state = S_text;
 766:   context->element_stack = 0;            /* empty */
 767:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 768: 
 769:   return context;
 770: }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /