[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.22

1.1 timbl 1: /*           General SGML Parser code        SGML.c
 2: **           ========================
 3: **
1.2 timbl 4: **   This module implements an HTStream object. To parse an
1.1 timbl 5: **   SGML file, create this object which is a parser. The object
1.2 timbl 6: **   is (currently) created by being passed a DTD structure,
 7: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **   
1.19 duns 9: **   6 Feb 93    Binary seraches used. Intreface modified.
 10: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 11: */
 12: 
1.20 frystyk 13: /* System dependent stuff */
 14: #include "tcp.h"        /* For FROMASCII */
 15: 
 16: /* Library includes */
1.1 timbl 17: #include "HTUtils.h"
 18: #include "HTChunk.h"
1.20 frystyk 19: #include "SGML.h"
1.1 timbl 20: 
1.2 timbl 21: #define INVALID (-1)
 22: 
1.1 timbl 23: /*   The State (context) of the parser
 24: **
1.2 timbl 25: **   This is passed with each call to make the parser reentrant
1.1 timbl 26: **
 27: */
 28: 
1.16 frystyk 29: 
1.2 timbl 30: 
 31:    
 32: /*       Element Stack
 33: **       -------------
 34: **   This allows us to return down the stack reselcting styles.
 35: **   As we return, attribute values will be garbage in general.
 36: */
 37: typedef struct _HTElement HTElement;
 38: struct _HTElement {
 39:    HTElement *   next;  /* Previously nested element or 0 */
 40:    HTTag*     tag;  /* The tag at this level */
 41: };
 42: 
 43: 
1.21 frystyk 44: typedef enum _sgml_state {
 45:   S_text, S_literal, S_tag, S_tag_gap, 
 46:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 47:   S_nl, S_nl_tago,
 48:   S_ero, S_cro,
 49: #ifdef ISO_2022_JP
 50:   S_esc, S_dollar, S_paren, S_nonascii_text,
 51: #endif
 52:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 53: } sgml_state;
 54: 
 55: 
1.2 timbl 56: /*   Internal Context Data Structure
 57: **   -------------------------------
 58: */
 59: struct _HTStream {
 60: 
 61:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 62:   
 63:   CONST SGML_dtd       *dtd;
 64:   HTStructuredClass *actions;    /* target class */
 65:   HTStructured    *target;    /* target object */
 66: 
1.1 timbl 67:   HTTag       *current_tag;
1.2 timbl 68:   int        current_attribute_number;
1.1 timbl 69:   HTChunk      *string;
 70:   HTElement     *element_stack;
1.21 frystyk 71:   sgml_state     state;
1.2 timbl 72: #ifdef CALLERDATA        
1.1 timbl 73:   void *       callerData;
1.2 timbl 74: #endif
 75:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 76:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 77: } ;
 78: 
 79: 
 80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 81: 
1.1 timbl 82: 
1.17 timbl 83: /*   Find Attribute Number
 84: **   ---------------------
 85: */
 86: 
 87: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
 88: {
 89:   attr* attributes = tag->attributes;
 90: 
 91:   int high, low, i, diff;      /* Binary search for attribute name */
 92:   for(low=0, high=tag->number_of_attributes;
 93:        high > low ;
 94:        diff < 0 ? (low = i+1) : (high = i) ) {
 95:    i = (low + (high-low)/2);
 96:    diff = strcasecomp(attributes[i].name, s);
 97:    if (diff==0) return i;         /* success: found it */
 98:   } /* for */
 99:   
 100:   return -1;
 101: }
 102: 
1.1 timbl 103: 
 104: /*   Handle Attribute
 105: **   ----------------
 106: */
 107: /* PUBLIC CONST char * SGML_default = "";  ?? */
 108: 
1.21 frystyk 109: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 110: {
1.2 timbl 111: 
 112:   HTTag * tag = context->current_tag;
 113: 
1.17 timbl 114:   int i = SGMLFindAttribute(tag, s);
 115:   if (i>=0) {
 116:    context->current_attribute_number = i;
 117:    context->present[i] = YES;
 118:    if (context->value[i]) {
 119:      free(context->value[i]);
 120:      context->value[i] = NULL;
 121:    }
 122:    return;
 123:   } /* if */
1.2 timbl 124:    
1.20 frystyk 125:   if (SGML_TRACE)
1.2 timbl 126:    fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
 127:      s, context->current_tag->name);
 128:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 129: }
 130: 
 131: 
 132: /*   Handle attribute value
 133: **   ----------------------
 134: */
1.21 frystyk 135: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 136: {
1.2 timbl 137:   if (context->current_attribute_number != INVALID) {
 138:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139:   } else {
1.20 frystyk 140:     if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 141:   }
1.2 timbl 142:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
 144: 
1.2 timbl 145: 
1.1 timbl 146: /*   Handle entity
 147: **   -------------
 148: **
 149: ** On entry,
 150: **   s    contains the entity name zero terminated
 151: ** Bugs:
 152: **   If the entity name is unknown, the terminator is treated as
 153: **   a printable non-special character in all cases, even if it is '<'
 154: */
1.21 frystyk 155: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 156: {
1.2 timbl 157: 
1.3 timbl 158:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 159:   CONST char *s = context->string->data;
1.2 timbl 160:   
 161:   int high, low, i, diff;
 162:   for(low=0, high = context->dtd->number_of_entities;
 163:        high > low ;
 164:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 165:    i = (low + (high-low)/2);
 166:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 167:    if (diff==0) {         /* success: found it */
 168:      (*context->actions->put_entity)(context->target, i);
 169:      return;
1.1 timbl 170:    }
 171:   }
 172:   /* If entity string not found, display as text */
1.20 frystyk 173:   if (SGML_TRACE)
1.1 timbl 174:    fprintf(stderr, "SGML: Unknown entity %s\n", s); 
1.2 timbl 175:   PUTC('&');
1.1 timbl 176:   {
 177:    CONST char *p;
 178:    for (p=s; *p; p++) {
1.2 timbl 179:      PUTC(*p);
1.1 timbl 180:    }
 181:   }
1.2 timbl 182:   PUTC(term);
1.1 timbl 183: }
 184: 
1.2 timbl 185: 
1.1 timbl 186: /*   End element
1.2 timbl 187: **   -----------
1.1 timbl 188: */
1.21 frystyk 189: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 190: {
1.20 frystyk 191:   if (SGML_TRACE) fprintf(stderr, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 192:   if (old_tag->contents == SGML_EMPTY) {
1.20 frystyk 193:     if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 194:        old_tag->name);
 195:    return;
 196:   }
 197:   while (context->element_stack)   {/* Loop is error path only */
 198:    HTElement * N = context->element_stack;
 199:    HTTag * t = N->tag;
 200:    
 201:    if (old_tag != t) {       /* Mismatch: syntax error */
 202:      if (context->element_stack->next) { /* This is not the last level */
1.20 frystyk 203:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 204:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 205:          old_tag->name, t->name, t->name);
 206:      } else {          /* last level */
1.20 frystyk 207:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 208:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 209:          old_tag->name, t->name, old_tag->name);
 210:        return;         /* Ignore */
 211:      }
 212:    }
 213:    
 214:    context->element_stack = N->next;        /* Remove from stack */
 215:    free(N);
1.2 timbl 216:    (*context->actions->end_element)(context->target,
 217:         t - context->dtd->tags);
1.1 timbl 218:    if (old_tag == t) return; /* Correct sequence */
 219:    
 220:    /* Syntax error path only */
 221:    
 222:   }
1.20 frystyk 223:   if (SGML_TRACE) fprintf(stderr,
1.1 timbl 224:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 225: }
 226: 
 227: 
1.17 timbl 228: /*   Start an element
 229: **   ----------------
1.1 timbl 230: */
1.21 frystyk 231: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 232: {
 233:   HTTag * new_tag = context->current_tag;
 234:   
1.20 frystyk 235:   if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 236:   (*context->actions->start_element)(
 237:    context->target,
 238:    new_tag - context->dtd->tags,
 239:    context->present,
1.3 timbl 240:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 241:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 242:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 243:     if (N == NULL) outofmem(__FILE__, "start_element");
 244:    N->next = context->element_stack;
 245:    N->tag = new_tag;
 246:    context->element_stack = N;
 247:   }
 248: }
 249: 
 250: 
1.2 timbl 251: /*       Find Tag in DTD tag list
 252: **       ------------------------
1.1 timbl 253: **
 254: ** On entry,
1.2 timbl 255: **   dtd   points to dtd structire including valid tag list
 256: **   string points to name of tag in question
1.1 timbl 257: **
1.2 timbl 258: ** On exit,
 259: **   returns:
1.7 timbl 260: **       NULL      tag not found
 261: **       else      address of tag structure in dtd
1.2 timbl 262: */
1.11 timbl 263: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 264: {
 265:   int high, low, i, diff;
 266:   for(low=0, high=dtd->number_of_tags;
 267:        high > low ;
 268:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 269:    i = (low + (high-low)/2);
1.3 timbl 270:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 271:    if (diff==0) {         /* success: found it */
1.7 timbl 272:      return &dtd->tags[i];
1.2 timbl 273:    }
 274:   }
1.7 timbl 275:   return NULL;
1.2 timbl 276: }
 277: 
 278: /*________________________________________________________________________
 279: **           Public Methods
1.1 timbl 280: */
 281: 
1.2 timbl 282: 
 283: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 284: 
1.22 ! frystyk 285: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 286: {
1.14 frystyk 287:   int cnt;
 288: 
1.15 frystyk 289:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 290:    HTElement *ptr = context->element_stack;
 291: 
1.20 frystyk 292:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 293:             context->element_stack->tag->name);
 294:    context->element_stack = ptr->next;
 295:    free(ptr);
 296:   }
1.19 duns 297:   (*context->actions->_free)(context->target);
1.8 timbl 298:   HTChunkFree(context->string);
1.15 frystyk 299:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 300:    if(context->value[cnt])
 301:      free(context->value[cnt]);
1.8 timbl 302:   free(context);
1.22 ! frystyk 303:   return 0;
1.1 timbl 304: }
 305: 
1.22 ! frystyk 306: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 307: {
1.14 frystyk 308:   int cnt;
 309: 
1.15 frystyk 310:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 311:    HTElement *ptr = context->element_stack;
 312: 
1.20 frystyk 313:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 314:             context->element_stack->tag->name);
 315:    context->element_stack = ptr->next;
 316:    free(ptr);
 317:   }
1.8 timbl 318:   (*context->actions->abort)(context->target, e);
1.1 timbl 319:   HTChunkFree(context->string);
1.14 frystyk 320:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 321:    if(context->value[cnt])
 322:      free(context->value[cnt]);
1.1 timbl 323:   free(context);
1.22 ! frystyk 324:   return EOF;
1.1 timbl 325: }
 326: 
1.2 timbl 327: 
1.1 timbl 328: /*   Read and write user callback handle
 329: **   -----------------------------------
 330: **
 331: **  The callbacks from the SGML parser have an SGML context parameter.
 332: **  These calls allow the caller to associate his own context with a
 333: **  particular SGML context.
 334: */
 335: 
1.2 timbl 336: #ifdef CALLERDATA        
 337: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 338: {
 339:   return context->callerData;
 340: }
 341: 
1.2 timbl 342: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 343: {
 344:   context->callerData = data;
 345: }
1.2 timbl 346: #endif
1.1 timbl 347: 
1.2 timbl 348: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 349: 
 350: {
1.2 timbl 351:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 352:   HTChunk  *string =    context->string;
 353: 
 354:   switch(context->state) {
1.18 timbl 355:   
 356:   case S_after_open: /* Strip one trainling newline
 357:            only after opening nonempty element. - SGML:Ugh! */
 358:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 359:      break;
 360:    }
 361:    context->state = S_text;
 362:    goto normal_text;
 363:    /* (***falls through***) */
 364:    
1.1 timbl 365:   case S_text:
1.18 timbl 366: normal_text:
 367: 
1.13 timbl 368: #ifdef ISO_2022_JP
 369:    if (c=='033円') {
 370:      context->state = S_esc;
 371:      PUTC(c);
 372:      break;
 373:    }
 374: #endif /* ISO_2022_JP */
1.6 timbl 375:    if (c=='&' && (!context->element_stack || (
 376:             context->element_stack->tag &&
 377:             ( context->element_stack->tag->contents == SGML_MIXED
 378:              || context->element_stack->tag->contents ==
 379:                             SGML_RCDATA)
 380:            ))) {
1.1 timbl 381:      string->size = 0;
 382:      context->state = S_ero;
 383:      
 384:    } else if (c=='<') {
 385:      string->size = 0;
 386:      context->state = (context->element_stack &&
1.13 timbl 387:        context->element_stack->tag &&
 388:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 389:                S_literal : S_tag;
1.18 timbl 390:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 391:      context->state = S_nl;
1.2 timbl 392:    } else PUTC(c);
1.1 timbl 393:    break;
1.13 timbl 394: 
1.18 timbl 395:   case S_nl:
 396:     if (c=='<') {
 397:      string->size = 0;
 398:      context->state = (context->element_stack &&
 399:        context->element_stack->tag &&
 400:        context->element_stack->tag->contents == SGML_LITERAL) ?
 401:                S_literal : S_nl_tago;
 402:    } else {
 403:      PUTC('\n');
 404:      context->state = S_text;
 405:      goto normal_text;
 406:    }
 407:    break;
 408: 
 409:   case S_nl_tago:      /* Had newline and tag opener */
 410:     if (c != '/') {
 411:      PUTC('\n');     /* Only ignore newline before </ */
 412:    }
 413:    context->state = S_tag;
 414:    goto handle_S_tag;
 415: 
1.13 timbl 416: #ifdef ISO_2022_JP
 417:   case S_esc:
 418:    if (c=='$') {
 419:      context->state = S_dollar;
 420:    } else if (c=='(') {
 421:      context->state = S_paren;
 422:    } else {
 423:      context->state = S_text;
 424:    }
 425:    PUTC(c);
 426:    break;
 427:   case S_dollar:
 428:    if (c=='@' || c=='B') {
 429:      context->state = S_nonascii_text;
 430:    } else {
 431:      context->state = S_text;
 432:    }
 433:    PUTC(c);
 434:    break;
 435:   case S_paren:
 436:    if (c=='B' || c=='J') {
 437:      context->state = S_text;
 438:    } else {
 439:      context->state = S_text;
 440:    }
 441:    PUTC(c);
 442:    break;
 443:   case S_nonascii_text:
 444:    if (c=='033円') {
 445:      context->state = S_esc;
 446:      PUTC(c);
 447:    } else {
 448:      PUTC(c);
 449:    }
 450:    break;
 451: #endif /* ISO_2022_JP */
1.1 timbl 452: 
1.12 timbl 453: /*   In literal mode, waits only for specific end tag!
1.2 timbl 454: **   Only foir compatibility with old servers.
1.1 timbl 455: */
1.12 timbl 456:   case S_literal :
1.1 timbl 457:    HTChunkPutc(string, c);
 458:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 459:        : context->element_stack->tag->name[string->size-2])) {
 460:      int i;
 461:      
1.12 timbl 462:      /* If complete match, end literal */
1.1 timbl 463:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 464:        end_element(context, context->element_stack->tag);
 465:        string->size = 0;
1.2 timbl 466:        context->current_attribute_number = INVALID;
1.1 timbl 467:        context->state = S_text;
 468:        break;
 469:      }      /* If Mismatch: recover string. */
1.2 timbl 470:      PUTC( '<');
1.1 timbl 471:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 472:        PUTC(
1.1 timbl 473:                       string->data[i]);
 474:      context->state = S_text;  
 475:    }
 476:    
 477:     break;
 478: 
 479: /*   Character reference or Entity
 480: */
 481:  case S_ero:
 482:    if (c=='#') {
 483:      context->state = S_cro; /*  &# is Char Ref Open */ 
 484:      break;
 485:    }
 486:    context->state = S_entity;  /* Fall through! */
 487:    
 488: /*   Handle Entities
 489: */
 490:   case S_entity:
 491:    if (isalnum(c))
 492:      HTChunkPutc(string, c);
 493:    else {
 494:      HTChunkTerminate(string);
 495:      handle_entity(context, c);
 496:      context->state = S_text;
 497:    }
 498:    break;
 499: 
 500: /*   Character reference
 501: */
 502:   case S_cro:
 503:    if (isalnum(c))
 504:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 505:    else {
 506:      int value;
 507:      HTChunkTerminate(string);
 508:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 509:        PUTC(FROMASCII((char)value));
1.1 timbl 510:      context->state = S_text;
 511:    }
 512:    break;
 513: 
 514: /*       Tag
 515: */     
 516:   case S_tag:                /* new tag */
1.18 timbl 517: handle_S_tag:
 518: 
1.1 timbl 519:    if (isalnum(c))
 520:      HTChunkPutc(string, c);
 521:    else {             /* End of tag name */
1.7 timbl 522:      HTTag * t;
1.1 timbl 523:      if (c=='/') {
1.20 frystyk 524:        if (SGML_TRACE) if (string->size!=0)
1.1 timbl 525:          fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
 526:        context->state = S_end;
 527:        break;
 528:      }
 529:      HTChunkTerminate(string) ;
1.2 timbl 530: 
1.10 timbl 531:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 532:      if (!t) {
1.20 frystyk 533:        if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 534:            string->data);
 535:        context->state = (c=='>') ? S_text : S_junk_tag;
 536:        break;
 537:      }
1.7 timbl 538:      context->current_tag = t;
1.2 timbl 539:      
 540:      /* Clear out attributes
 541:      */
1.1 timbl 542:      
1.2 timbl 543:      {
 544:        int i;
 545:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 546:          context->present[i] = NO;
1.1 timbl 547:      }
 548:      string->size = 0;
1.2 timbl 549:      context->current_attribute_number = INVALID;
1.1 timbl 550:      
 551:      if (c=='>') {
 552:        if (context->current_tag->name) start_element(context);
1.18 timbl 553:        context->state = S_after_open;
1.1 timbl 554:      } else {
 555:        context->state = S_tag_gap;
 556:      }
 557:    }
 558:    break;
 559: 
 560:        
 561:   case S_tag_gap:      /* Expecting attribute or > */
 562:    if (WHITE(c)) break;  /* Gap between attributes */
 563:    if (c=='>') {      /* End of tag */
 564:      if (context->current_tag->name) start_element(context);
1.18 timbl 565:      context->state = S_after_open;
1.1 timbl 566:      break;
 567:    }
 568:    HTChunkPutc(string, c);
 569:    context->state = S_attr;        /* Get attribute */
 570:    break;
 571:    
 572:                /* accumulating value */
 573:   case S_attr:
 574:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 575:      HTChunkTerminate(string) ;
 576:      handle_attribute_name(context, string->data);
 577:      string->size = 0;
 578:      if (c=='>') {        /* End of tag */
 579:        if (context->current_tag->name) start_element(context);
1.18 timbl 580:        context->state = S_after_open;
1.1 timbl 581:        break;
 582:      }
 583:      context->state = (c=='=' ? S_equals: S_attr_gap);
 584:    } else {
 585:      HTChunkPutc(string, c);
 586:    }
 587:    break;
 588:        
 589:   case S_attr_gap:      /* Expecting attribute or = or > */
 590:    if (WHITE(c)) break;  /* Gap after attribute */
 591:    if (c=='>') {      /* End of tag */
 592:      if (context->current_tag->name) start_element(context);
1.18 timbl 593:      context->state = S_after_open;
1.1 timbl 594:      break;
 595:    } else if (c=='=') {
 596:      context->state = S_equals;
 597:      break;
 598:    }
 599:    HTChunkPutc(string, c);
 600:    context->state = S_attr;        /* Get next attribute */
 601:    break;
 602:    
 603:   case S_equals:           /* After attr = */ 
 604:    if (WHITE(c)) break;  /* Before attribute value */
 605:    if (c=='>') {      /* End of tag */
1.20 frystyk 606:      if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 607:      if (context->current_tag->name) start_element(context);
1.18 timbl 608:      context->state = S_after_open;
1.1 timbl 609:      break;
 610:      
 611:    } else if (c=='\'') {
 612:      context->state = S_squoted;
 613:      break;
 614: 
 615:    } else if (c=='"') {
 616:      context->state = S_dquoted;
 617:      break;
 618:    }
 619:    HTChunkPutc(string, c);
 620:    context->state = S_value;
 621:    break;
 622:    
 623:   case S_value:
 624:    if (WHITE(c) || (c=='>')) {       /* End of word */
 625:      HTChunkTerminate(string) ;
 626:      handle_attribute_value(context, string->data);
 627:      string->size = 0;
 628:      if (c=='>') {        /* End of tag */
 629:        if (context->current_tag->name) start_element(context);
1.18 timbl 630:        context->state = S_after_open;
1.1 timbl 631:        break;
 632:      }
 633:      else context->state = S_tag_gap;
 634:    } else {
 635:      HTChunkPutc(string, c);
 636:    }
 637:    break;
 638:        
 639:   case S_squoted:      /* Quoted attribute value */
 640:    if (c=='\'') {     /* End of attribute value */
 641:      HTChunkTerminate(string) ;
 642:      handle_attribute_value(context, string->data);
 643:      string->size = 0;
 644:      context->state = S_tag_gap;
 645:    } else {
 646:      HTChunkPutc(string, c);
 647:    }
 648:    break;
 649:    
 650:   case S_dquoted:      /* Quoted attribute value */
 651:    if (c=='"') {      /* End of attribute value */
 652:      HTChunkTerminate(string) ;
 653:      handle_attribute_value(context, string->data);
 654:      string->size = 0;
 655:      context->state = S_tag_gap;
 656:    } else {
 657:      HTChunkPutc(string, c);
 658:    }
 659:    break;
 660:    
 661:   case S_end:                    /* </ */
 662:    if (isalnum(c))
 663:      HTChunkPutc(string, c);
 664:    else {             /* End of end tag name */
1.7 timbl 665:      HTTag * t;
1.1 timbl 666:      HTChunkTerminate(string) ;
1.7 timbl 667:      if (!*string->data) {    /* Empty end tag */
 668:        t = context->element_stack->tag;
 669:      } else {
1.10 timbl 670:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 671:      }
1.7 timbl 672:      if (!t) {
1.20 frystyk 673:        if(SGML_TRACE) fprintf(stderr,
1.1 timbl 674:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 675:      } else {
1.7 timbl 676:        context->current_tag = t;
1.2 timbl 677:        end_element( context, context->current_tag);
1.1 timbl 678:      }
1.2 timbl 679: 
1.1 timbl 680:      string->size = 0;
1.2 timbl 681:      context->current_attribute_number = INVALID;
1.7 timbl 682:      if (c!='>') {
1.20 frystyk 683:        if (SGML_TRACE && !WHITE(c))
1.7 timbl 684:          fprintf(stderr,"SGML: `</%s%c' found!\n",
 685:            string->data, c);
 686:        context->state = S_junk_tag;
 687:      } else {
 688:        context->state = S_text;
 689:      }
1.1 timbl 690:    }
 691:    break;
 692: 
 693:        
 694:   case S_junk_tag:
 695:    if (c=='>') {
 696:      context->state = S_text;
 697:    }
 698:    
 699:   } /* switch on context->state */
 700: 
 701: } /* SGML_character */
1.2 timbl 702: 
 703: 
 704: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 705: {
 706:   CONST char *p;
 707:   for(p=str; *p; p++)
 708:     SGML_character(context, *p);
 709: }
 710: 
 711: 
 712: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 713: {
 714:   CONST char *p;
 715:   CONST char *e = str+l;
 716:   for(p=str; p<e; p++)
 717:     SGML_character(context, *p);
 718: }
 719: 
 720: /*_______________________________________________________________________
 721: */
 722: 
 723: /*   Structured Object Class
 724: **   -----------------------
 725: */
 726: PUBLIC CONST HTStreamClass SGMLParser = 
 727: {       
 728:    "SGMLParser",
 729:    SGML_free,
1.8 timbl 730:    SGML_abort,
1.9 timbl 731:    SGML_character, 
 732:    SGML_string,
 733:    SGML_write,
1.2 timbl 734: }; 
 735: 
 736: /*   Create SGML Engine
 737: **   ------------------
 738: **
 739: ** On entry,
 740: **   dtd       represents the DTD, along with
 741: **   actions     is the sink for the data as a set of routines.
 742: **
 743: */
 744: 
 745: PUBLIC HTStream* SGML_new ARGS2(
 746:    CONST SGML_dtd *,    dtd,
 747:    HTStructured *,     target)
 748: {
 749:   int i;
 750:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 751:   if (!context) outofmem(__FILE__, "SGML_begin");
 752: 
 753:   context->isa = &SGMLParser;
 754:   context->string = HTChunkCreate(128);   /* Grow by this much */
 755:   context->dtd = dtd;
 756:   context->target = target;
 757:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 758:                    /* Ugh: no OO */
 759:   context->state = S_text;
 760:   context->element_stack = 0;            /* empty */
 761: #ifdef CALLERDATA        
 762:   context->callerData = (void*) callerData;
 763: #endif  
 764:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 765: 
 766:   return context;
 767: }
1.14 frystyk 768: 
 769: 
 770: 
 771: 
 772: 
 773: 
 774: 
 775: 
 776: 
 777: 
 778: 
1.2 timbl 779: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /