[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.39

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.39 ! frystyk 6: **   @(#) $Id: Date Author State $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 15: */
 16: 
1.25 frystyk 17: /* Library include files */
1.38 frystyk 18: #include "sysdep.h"
1.1 timbl 19: #include "HTUtils.h"
1.25 frystyk 20: #include "HTString.h"
1.1 timbl 21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23: 
1.2 timbl 24: #define INVALID (-1)
 25: 
1.1 timbl 26: /*   The State (context) of the parser
 27: **
1.2 timbl 28: **   This is passed with each call to make the parser reentrant
1.1 timbl 29: **
 30: */
 31: 
1.16 frystyk 32: 
1.2 timbl 33: 
 34:    
 35: /*       Element Stack
 36: **       -------------
 37: **   This allows us to return down the stack reselcting styles.
 38: **   As we return, attribute values will be garbage in general.
 39: */
 40: typedef struct _HTElement HTElement;
 41: struct _HTElement {
 42:    HTElement *   next;  /* Previously nested element or 0 */
 43:    HTTag*     tag;  /* The tag at this level */
 44: };
 45: 
 46: 
1.21 frystyk 47: typedef enum _sgml_state {
 48:   S_text, S_literal, S_tag, S_tag_gap, 
 49:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 50:   S_nl, S_nl_tago,
 51:   S_ero, S_cro,
 52: #ifdef ISO_2022_JP
 53:   S_esc, S_dollar, S_paren, S_nonascii_text,
 54: #endif
 55:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 56: } sgml_state;
 57: 
 58: 
1.2 timbl 59: /*   Internal Context Data Structure
 60: **   -------------------------------
 61: */
 62: struct _HTStream {
 63: 
1.38 frystyk 64:   const HTStreamClass *   isa;      /* inherited from HTStream */
1.2 timbl 65:   
1.38 frystyk 66:   const SGML_dtd       *dtd;
1.2 timbl 67:   HTStructuredClass *actions;    /* target class */
 68:   HTStructured    *target;    /* target object */
 69: 
1.1 timbl 70:   HTTag       *current_tag;
1.2 timbl 71:   int        current_attribute_number;
1.1 timbl 72:   HTChunk      *string;
 73:   HTElement     *element_stack;
1.21 frystyk 74:   sgml_state     state;
1.2 timbl 75:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 76:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 77: } ;
 78: 
 79: 
 80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 81: 
1.1 timbl 82: 
1.17 timbl 83: /*   Find Attribute Number
 84: **   ---------------------
 85: */
 86: 
1.38 frystyk 87: PUBLIC int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 88: {
 89:   attr* attributes = tag->attributes;
 90: 
 91:   int high, low, i, diff;      /* Binary search for attribute name */
 92:   for(low=0, high=tag->number_of_attributes;
 93:        high > low ;
 94:        diff < 0 ? (low = i+1) : (high = i) ) {
 95:    i = (low + (high-low)/2);
 96:    diff = strcasecomp(attributes[i].name, s);
 97:    if (diff==0) return i;         /* success: found it */
 98:   } /* for */
 99:   
 100:   return -1;
 101: }
 102: 
1.1 timbl 103: 
 104: /*   Handle Attribute
 105: **   ----------------
 106: */
1.38 frystyk 107: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 108: 
1.38 frystyk 109: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 110: {
1.2 timbl 111: 
 112:   HTTag * tag = context->current_tag;
 113: 
1.17 timbl 114:   int i = SGMLFindAttribute(tag, s);
 115:   if (i>=0) {
 116:    context->current_attribute_number = i;
 117:    context->present[i] = YES;
 118:    if (context->value[i]) {
1.36 frystyk 119:      HT_FREE(context->value[i]);
1.17 timbl 120:      context->value[i] = NULL;
 121:    }
 122:    return;
 123:   } /* if */
1.2 timbl 124:    
1.20 frystyk 125:   if (SGML_TRACE)
1.37 eric 126:    HTTrace("SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 127:      s, context->current_tag->name);
 128:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 129: }
 130: 
 131: 
 132: /*   Handle attribute value
 133: **   ----------------------
 134: */
1.38 frystyk 135: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 136: {
1.2 timbl 137:   if (context->current_attribute_number != INVALID) {
 138:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139:   } else {
1.37 eric 140:     if (SGML_TRACE) HTTrace("SGML: Attribute value %s ignored\n", s);
1.1 timbl 141:   }
1.2 timbl 142:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
 144: 
1.2 timbl 145: 
1.1 timbl 146: /*   Handle entity
 147: **   -------------
 148: **
 149: ** On entry,
 150: **   s    contains the entity name zero terminated
 151: ** Bugs:
 152: **   If the entity name is unknown, the terminator is treated as
 153: **   a printable non-special character in all cases, even if it is '<'
 154: */
1.31 frystyk 155: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 156: {
1.2 timbl 157: 
1.38 frystyk 158:   const char ** entities = context->dtd->entity_names;
 159:   const char *s = context->string->data;
1.2 timbl 160:   
 161:   int high, low, i, diff;
 162:   for(low=0, high = context->dtd->number_of_entities;
 163:        high > low ;
 164:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 165:    i = (low + (high-low)/2);
 166:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 167:    if (diff==0) {         /* success: found it */
 168:      (*context->actions->put_entity)(context->target, i);
 169:      return;
1.1 timbl 170:    }
 171:   }
 172:   /* If entity string not found, display as text */
1.20 frystyk 173:   if (SGML_TRACE)
1.37 eric 174:    HTTrace("SGML: Unknown entity %s\n", s); 
1.2 timbl 175:   PUTC('&');
1.1 timbl 176:   {
1.38 frystyk 177:    const char *p;
1.1 timbl 178:    for (p=s; *p; p++) {
1.2 timbl 179:      PUTC(*p);
1.1 timbl 180:    }
 181:   }
1.2 timbl 182:   PUTC(term);
1.1 timbl 183: }
 184: 
1.35 frystyk 185: /*
 186: **   Helper function to check if the tag is on the stack
 187: */
 188: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
 189: {
 190:   HTElement* elem;
 191:   for (elem = stack; elem != NULL; elem = elem->next)
 192:   {
 193:     if (elem->tag == tag) return YES;
 194:   }
 195:   return NO;
 196: }
1.2 timbl 197: 
1.1 timbl 198: /*   End element
1.2 timbl 199: **   -----------
1.1 timbl 200: */
1.31 frystyk 201: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 202: {
1.37 eric 203:   if (SGML_TRACE) HTTrace("SGML: End  </%s>\n", old_tag->name);
1.2 timbl 204:   if (old_tag->contents == SGML_EMPTY) {
1.37 eric 205:     if (SGML_TRACE) HTTrace("SGML: Illegal end tag </%s> found.\n",
1.1 timbl 206:        old_tag->name);
 207:    return;
 208:   }
 209:   while (context->element_stack)   {/* Loop is error path only */
 210:    HTElement * N = context->element_stack;
 211:    HTTag * t = N->tag;
 212:    
 213:    if (old_tag != t) {       /* Mismatch: syntax error */
1.35 frystyk 214:      /*
 215:      ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
 216:      ** See explanation in ../User/Patch/lib_4.0_1.fix
 217:      */
 218:       if (context->element_stack->next  /* This is not the last level */
 219:        && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 220:        if (SGML_TRACE) HTTrace(
1.1 timbl 221:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 222:          old_tag->name, t->name, t->name);
 223:      } else {          /* last level */
1.37 eric 224:        if (SGML_TRACE) HTTrace(
1.1 timbl 225:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 226:          old_tag->name, t->name, old_tag->name);
 227:        return;         /* Ignore */
 228:      }
 229:    }
 230:    
 231:    context->element_stack = N->next;        /* Remove from stack */
1.36 frystyk 232:    HT_FREE(N);
1.2 timbl 233:    (*context->actions->end_element)(context->target,
 234:         t - context->dtd->tags);
1.1 timbl 235:    if (old_tag == t) return; /* Correct sequence */
 236:    
 237:    /* Syntax error path only */
 238:    
 239:   }
1.37 eric 240:   if (SGML_TRACE) HTTrace(
1.1 timbl 241:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 242: }
 243: 
 244: 
1.17 timbl 245: /*   Start an element
 246: **   ----------------
1.1 timbl 247: */
1.31 frystyk 248: PRIVATE void start_element (HTStream * context)
1.1 timbl 249: {
 250:   HTTag * new_tag = context->current_tag;
 251:   
1.37 eric 252:   if (SGML_TRACE) HTTrace("SGML: Start <%s>\n", new_tag->name);
1.2 timbl 253:   (*context->actions->start_element)(
 254:    context->target,
 255:    new_tag - context->dtd->tags,
 256:    context->present,
1.38 frystyk 257:    (const char**) context->value); /* coerce type for think c */
1.2 timbl 258:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.36 frystyk 259:    HTElement * N;
 260:    if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
 261:      HT_OUTOFMEM("start_element");
1.1 timbl 262:    N->next = context->element_stack;
 263:    N->tag = new_tag;
 264:    context->element_stack = N;
 265:   }
 266: }
 267: 
 268: 
1.2 timbl 269: /*       Find Tag in DTD tag list
 270: **       ------------------------
1.1 timbl 271: **
 272: ** On entry,
1.2 timbl 273: **   dtd   points to dtd structire including valid tag list
 274: **   string points to name of tag in question
1.1 timbl 275: **
1.2 timbl 276: ** On exit,
 277: **   returns:
1.7 timbl 278: **       NULL      tag not found
 279: **       else      address of tag structure in dtd
1.2 timbl 280: */
1.38 frystyk 281: PUBLIC HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 282: {
 283:   int high, low, i, diff;
 284:   for(low=0, high=dtd->number_of_tags;
 285:        high > low ;
 286:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 287:    i = (low + (high-low)/2);
1.3 timbl 288:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 289:    if (diff==0) {         /* success: found it */
1.7 timbl 290:      return &dtd->tags[i];
1.2 timbl 291:    }
 292:   }
1.7 timbl 293:   return NULL;
1.2 timbl 294: }
 295: 
 296: /*________________________________________________________________________
 297: **           Public Methods
1.1 timbl 298: */
 299: 
1.2 timbl 300: 
 301: /*   Could check that we are back to bottom of stack! @@ */
1.31 frystyk 302: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 303: {
 304:   while (context->element_stack) {
 305:    HTElement *ptr = context->element_stack;
 306:    if (SGML_TRACE)
1.37 eric 307:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 308:          context->element_stack->tag->name);
 309:    context->element_stack = ptr->next;
1.36 frystyk 310:    HT_FREE(ptr);
1.26 frystyk 311:   }
 312:   return (*context->actions->flush)(context->target);
 313: }
1.1 timbl 314: 
1.31 frystyk 315: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 316: {
1.26 frystyk 317:   int status;
1.14 frystyk 318:   int cnt;
1.15 frystyk 319:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 320:    HTElement *ptr = context->element_stack;
 321: 
1.26 frystyk 322:    if (SGML_TRACE)
1.37 eric 323:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 324:          context->element_stack->tag->name);
1.15 frystyk 325:    context->element_stack = ptr->next;
1.36 frystyk 326:    HT_FREE(ptr);
1.15 frystyk 327:   }
1.26 frystyk 328:   if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 329:    return status;
1.33 frystyk 330:   HTChunk_delete(context->string);
1.15 frystyk 331:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 332:    if(context->value[cnt])
1.36 frystyk 333:      HT_FREE(context->value[cnt]);
 334:   HT_FREE(context);
1.26 frystyk 335:   return HT_OK;
1.1 timbl 336: }
 337: 
1.31 frystyk 338: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 339: {
1.14 frystyk 340:   int cnt;
1.15 frystyk 341:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 342:    HTElement *ptr = context->element_stack;
1.26 frystyk 343:    if (SGML_TRACE)
1.37 eric 344:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 345:          context->element_stack->tag->name);
1.15 frystyk 346:    context->element_stack = ptr->next;
1.36 frystyk 347:    HT_FREE(ptr);
1.15 frystyk 348:   }
1.8 timbl 349:   (*context->actions->abort)(context->target, e);
1.33 frystyk 350:   HTChunk_delete(context->string);
1.14 frystyk 351:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 352:    if(context->value[cnt])
1.36 frystyk 353:      HT_FREE(context->value[cnt]);
 354:   HT_FREE(context);
1.26 frystyk 355:   return HT_ERROR;
1.1 timbl 356: }
 357: 
1.31 frystyk 358: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 359: 
 360: {
1.38 frystyk 361:   const SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 362:   HTChunk  *string =    context->string;
 363: 
 364:   switch(context->state) {
1.18 timbl 365:   
 366:   case S_after_open: /* Strip one trainling newline
 367:            only after opening nonempty element. - SGML:Ugh! */
 368:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 369:      break;
 370:    }
 371:    context->state = S_text;
 372:    goto normal_text;
 373:    /* (***falls through***) */
 374:    
1.1 timbl 375:   case S_text:
1.18 timbl 376: normal_text:
 377: 
1.13 timbl 378: #ifdef ISO_2022_JP
 379:    if (c=='033円') {
 380:      context->state = S_esc;
 381:      PUTC(c);
 382:      break;
 383:    }
 384: #endif /* ISO_2022_JP */
1.6 timbl 385:    if (c=='&' && (!context->element_stack || (
 386:             context->element_stack->tag &&
 387:             ( context->element_stack->tag->contents == SGML_MIXED
 388:              || context->element_stack->tag->contents ==
 389:                             SGML_RCDATA)
 390:            ))) {
1.1 timbl 391:      string->size = 0;
 392:      context->state = S_ero;
 393:      
 394:    } else if (c=='<') {
 395:      string->size = 0;
 396:      context->state = (context->element_stack &&
1.13 timbl 397:        context->element_stack->tag &&
 398:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 399:                S_literal : S_tag;
1.18 timbl 400:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 401:      context->state = S_nl;
1.2 timbl 402:    } else PUTC(c);
1.1 timbl 403:    break;
1.13 timbl 404: 
1.18 timbl 405:   case S_nl:
 406:     if (c=='<') {
 407:      string->size = 0;
 408:      context->state = (context->element_stack &&
 409:        context->element_stack->tag &&
 410:        context->element_stack->tag->contents == SGML_LITERAL) ?
 411:                S_literal : S_nl_tago;
 412:    } else {
 413:      PUTC('\n');
 414:      context->state = S_text;
 415:      goto normal_text;
 416:    }
 417:    break;
 418: 
 419:   case S_nl_tago:      /* Had newline and tag opener */
 420:     if (c != '/') {
 421:      PUTC('\n');     /* Only ignore newline before </ */
 422:    }
 423:    context->state = S_tag;
 424:    goto handle_S_tag;
 425: 
1.13 timbl 426: #ifdef ISO_2022_JP
 427:   case S_esc:
 428:    if (c=='$') {
 429:      context->state = S_dollar;
 430:    } else if (c=='(') {
 431:      context->state = S_paren;
 432:    } else {
 433:      context->state = S_text;
 434:    }
 435:    PUTC(c);
 436:    break;
 437:   case S_dollar:
 438:    if (c=='@' || c=='B') {
 439:      context->state = S_nonascii_text;
 440:    } else {
 441:      context->state = S_text;
 442:    }
 443:    PUTC(c);
 444:    break;
 445:   case S_paren:
 446:    if (c=='B' || c=='J') {
 447:      context->state = S_text;
 448:    } else {
 449:      context->state = S_text;
 450:    }
 451:    PUTC(c);
 452:    break;
 453:   case S_nonascii_text:
 454:    if (c=='033円') {
 455:      context->state = S_esc;
 456:      PUTC(c);
 457:    } else {
 458:      PUTC(c);
 459:    }
 460:    break;
 461: #endif /* ISO_2022_JP */
1.1 timbl 462: 
1.12 timbl 463: /*   In literal mode, waits only for specific end tag!
1.2 timbl 464: **   Only foir compatibility with old servers.
1.1 timbl 465: */
1.12 timbl 466:   case S_literal :
1.33 frystyk 467:    HTChunk_putc(string, c);
1.1 timbl 468:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 469:        : context->element_stack->tag->name[string->size-2])) {
 470:      int i;
 471:      
1.12 timbl 472:      /* If complete match, end literal */
1.1 timbl 473:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 474:        end_element(context, context->element_stack->tag);
 475:        string->size = 0;
1.2 timbl 476:        context->current_attribute_number = INVALID;
1.1 timbl 477:        context->state = S_text;
 478:        break;
 479:      }      /* If Mismatch: recover string. */
1.2 timbl 480:      PUTC( '<');
1.1 timbl 481:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 482:        PUTC(
1.1 timbl 483:                       string->data[i]);
 484:      context->state = S_text;  
 485:    }
 486:    
 487:     break;
 488: 
 489: /*   Character reference or Entity
 490: */
 491:  case S_ero:
 492:    if (c=='#') {
 493:      context->state = S_cro; /*  &# is Char Ref Open */ 
 494:      break;
 495:    }
 496:    context->state = S_entity;  /* Fall through! */
 497:    
 498: /*   Handle Entities
 499: */
 500:   case S_entity:
 501:    if (isalnum(c))
1.33 frystyk 502:      HTChunk_putc(string, c);
1.1 timbl 503:    else {
1.33 frystyk 504:      HTChunk_terminate(string);
1.1 timbl 505:      handle_entity(context, c);
 506:      context->state = S_text;
 507:    }
 508:    break;
 509: 
 510: /*   Character reference
 511: */
 512:   case S_cro:
 513:    if (isalnum(c))
1.33 frystyk 514:      HTChunk_putc(string, c);  /* accumulate a character NUMBER */
1.1 timbl 515:    else {
 516:      int value;
1.33 frystyk 517:      HTChunk_terminate(string);
1.1 timbl 518:      if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 519:        PUTC((char) value);
1.1 timbl 520:      context->state = S_text;
 521:    }
 522:    break;
 523: 
 524: /*       Tag
 525: */     
 526:   case S_tag:                /* new tag */
1.18 timbl 527: handle_S_tag:
 528: 
1.1 timbl 529:    if (isalnum(c))
1.33 frystyk 530:      HTChunk_putc(string, c);
1.1 timbl 531:    else {             /* End of tag name */
1.7 timbl 532:      HTTag * t;
1.1 timbl 533:      if (c=='/') {
1.20 frystyk 534:        if (SGML_TRACE) if (string->size!=0)
1.37 eric 535:          HTTrace("SGML: `<%s/' found!\n", string->data);
1.1 timbl 536:        context->state = S_end;
 537:        break;
 538:      }
1.33 frystyk 539:      HTChunk_terminate(string) ;
1.2 timbl 540: 
1.10 timbl 541:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 542:      if (!t) {
1.37 eric 543:        if(SGML_TRACE) HTTrace("SGML: *** Unknown element %s\n",
1.1 timbl 544:            string->data);
 545:        context->state = (c=='>') ? S_text : S_junk_tag;
 546:        break;
 547:      }
1.7 timbl 548:      context->current_tag = t;
1.2 timbl 549:      
 550:      /* Clear out attributes
 551:      */
1.1 timbl 552:      
1.2 timbl 553:      {
 554:        int i;
 555:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 556:          context->present[i] = NO;
1.1 timbl 557:      }
 558:      string->size = 0;
1.2 timbl 559:      context->current_attribute_number = INVALID;
1.1 timbl 560:      
 561:      if (c=='>') {
 562:        if (context->current_tag->name) start_element(context);
1.18 timbl 563:        context->state = S_after_open;
1.1 timbl 564:      } else {
 565:        context->state = S_tag_gap;
 566:      }
 567:    }
 568:    break;
 569: 
 570:        
 571:   case S_tag_gap:      /* Expecting attribute or > */
 572:    if (WHITE(c)) break;  /* Gap between attributes */
 573:    if (c=='>') {      /* End of tag */
 574:      if (context->current_tag->name) start_element(context);
1.18 timbl 575:      context->state = S_after_open;
1.1 timbl 576:      break;
 577:    }
1.33 frystyk 578:    HTChunk_putc(string, c);
1.1 timbl 579:    context->state = S_attr;        /* Get attribute */
 580:    break;
 581:    
 582:                /* accumulating value */
 583:   case S_attr:
 584:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
1.33 frystyk 585:      HTChunk_terminate(string) ;
1.1 timbl 586:      handle_attribute_name(context, string->data);
 587:      string->size = 0;
 588:      if (c=='>') {        /* End of tag */
 589:        if (context->current_tag->name) start_element(context);
1.18 timbl 590:        context->state = S_after_open;
1.1 timbl 591:        break;
 592:      }
 593:      context->state = (c=='=' ? S_equals: S_attr_gap);
 594:    } else {
1.33 frystyk 595:      HTChunk_putc(string, c);
1.1 timbl 596:    }
 597:    break;
 598:        
 599:   case S_attr_gap:      /* Expecting attribute or = or > */
 600:    if (WHITE(c)) break;  /* Gap after attribute */
 601:    if (c=='>') {      /* End of tag */
 602:      if (context->current_tag->name) start_element(context);
1.18 timbl 603:      context->state = S_after_open;
1.1 timbl 604:      break;
 605:    } else if (c=='=') {
 606:      context->state = S_equals;
 607:      break;
 608:    }
1.33 frystyk 609:    HTChunk_putc(string, c);
1.1 timbl 610:    context->state = S_attr;        /* Get next attribute */
 611:    break;
 612:    
 613:   case S_equals:           /* After attr = */ 
 614:    if (WHITE(c)) break;  /* Before attribute value */
 615:    if (c=='>') {      /* End of tag */
1.37 eric 616:      if (SGML_TRACE) HTTrace("SGML: found = but no value\n");
1.1 timbl 617:      if (context->current_tag->name) start_element(context);
1.18 timbl 618:      context->state = S_after_open;
1.1 timbl 619:      break;
 620:      
 621:    } else if (c=='\'') {
 622:      context->state = S_squoted;
 623:      break;
 624: 
 625:    } else if (c=='"') {
 626:      context->state = S_dquoted;
 627:      break;
 628:    }
1.33 frystyk 629:    HTChunk_putc(string, c);
1.1 timbl 630:    context->state = S_value;
 631:    break;
 632:    
 633:   case S_value:
 634:    if (WHITE(c) || (c=='>')) {       /* End of word */
1.33 frystyk 635:      HTChunk_terminate(string) ;
1.1 timbl 636:      handle_attribute_value(context, string->data);
 637:      string->size = 0;
 638:      if (c=='>') {        /* End of tag */
 639:        if (context->current_tag->name) start_element(context);
1.18 timbl 640:        context->state = S_after_open;
1.1 timbl 641:        break;
 642:      }
 643:      else context->state = S_tag_gap;
 644:    } else {
1.33 frystyk 645:      HTChunk_putc(string, c);
1.1 timbl 646:    }
 647:    break;
 648:        
 649:   case S_squoted:      /* Quoted attribute value */
 650:    if (c=='\'') {     /* End of attribute value */
1.33 frystyk 651:      HTChunk_terminate(string) ;
1.1 timbl 652:      handle_attribute_value(context, string->data);
 653:      string->size = 0;
 654:      context->state = S_tag_gap;
 655:    } else {
1.33 frystyk 656:      HTChunk_putc(string, c);
1.1 timbl 657:    }
 658:    break;
 659:    
 660:   case S_dquoted:      /* Quoted attribute value */
 661:    if (c=='"') {      /* End of attribute value */
1.33 frystyk 662:      HTChunk_terminate(string) ;
1.1 timbl 663:      handle_attribute_value(context, string->data);
 664:      string->size = 0;
 665:      context->state = S_tag_gap;
 666:    } else {
1.33 frystyk 667:      HTChunk_putc(string, c);
1.1 timbl 668:    }
 669:    break;
 670:    
 671:   case S_end:                    /* </ */
 672:    if (isalnum(c))
1.33 frystyk 673:      HTChunk_putc(string, c);
1.1 timbl 674:    else {             /* End of end tag name */
1.7 timbl 675:      HTTag * t;
1.33 frystyk 676:      HTChunk_terminate(string) ;
1.7 timbl 677:      if (!*string->data) {    /* Empty end tag */
 678:        t = context->element_stack->tag;
 679:      } else {
1.10 timbl 680:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 681:      }
1.7 timbl 682:      if (!t) {
1.37 eric 683:        if(SGML_TRACE) HTTrace(
1.1 timbl 684:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 685:      } else {
1.7 timbl 686:        context->current_tag = t;
1.2 timbl 687:        end_element( context, context->current_tag);
1.1 timbl 688:      }
1.2 timbl 689: 
1.1 timbl 690:      string->size = 0;
1.2 timbl 691:      context->current_attribute_number = INVALID;
1.7 timbl 692:      if (c!='>') {
1.20 frystyk 693:        if (SGML_TRACE && !WHITE(c))
1.37 eric 694:          HTTrace("SGML: `</%s%c' found!\n",
1.7 timbl 695:            string->data, c);
 696:        context->state = S_junk_tag;
 697:      } else {
 698:        context->state = S_text;
 699:      }
1.1 timbl 700:    }
 701:    break;
 702: 
 703:        
 704:   case S_junk_tag:
 705:    if (c=='>') {
 706:      context->state = S_text;
 707:    }
 708:   } /* switch on context->state */
1.26 frystyk 709:   return HT_OK;
 710: }
1.2 timbl 711: 
 712: 
1.38 frystyk 713: PUBLIC int SGML_string (HTStream * context, const char* s)
1.2 timbl 714: {
1.26 frystyk 715:   while (*s)
 716:     SGML_character(context, *s++);
 717:   return HT_OK;
1.2 timbl 718: }
 719: 
 720: 
1.38 frystyk 721: PUBLIC int SGML_write (HTStream * context, const char* b, int l)
1.2 timbl 722: {
1.26 frystyk 723:   while (l-- > 0)
 724:     SGML_character(context, *b++);
 725:   return HT_OK;
1.2 timbl 726: }
 727: 
 728: /*_______________________________________________________________________
 729: */
 730: 
 731: /*   Structured Object Class
 732: **   -----------------------
 733: */
1.38 frystyk 734: PRIVATE const HTStreamClass SGMLParser = 
1.2 timbl 735: {       
1.32 frystyk 736:   "SGMLParser",
 737:   SGML_flush,
 738:   SGML_free,
 739:   SGML_abort,
 740:   SGML_character, 
 741:   SGML_string,
 742:   SGML_write,
1.2 timbl 743: }; 
 744: 
 745: /*   Create SGML Engine
 746: **   ------------------
 747: **
 748: ** On entry,
 749: **   dtd       represents the DTD, along with
 750: **   actions     is the sink for the data as a set of routines.
 751: **
 752: */
1.38 frystyk 753: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 754: {
 755:   int i;
1.36 frystyk 756:   HTStream* context;
 757:   if ((context = (HTStream *) HT_MALLOC(sizeof(*context))) == NULL)
 758:     HT_OUTOFMEM("SGML_begin");
1.2 timbl 759: 
 760:   context->isa = &SGMLParser;
1.33 frystyk 761:   context->string = HTChunk_new(128);    /* Grow by this much */
1.2 timbl 762:   context->dtd = dtd;
 763:   context->target = target;
 764:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 765:                    /* Ugh: no OO */
 766:   context->state = S_text;
 767:   context->element_stack = 0;            /* empty */
 768:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 769: 
 770:   return context;
 771: }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /