[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.16

1.1 timbl 1: /*           General SGML Parser code        SGML.c
 2: **           ========================
 3: **
1.2 timbl 4: **   This module implements an HTStream object. To parse an
1.1 timbl 5: **   SGML file, create this object which is a parser. The object
1.2 timbl 6: **   is (currently) created by being passed a DTD structure,
 7: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **   
1.2 timbl 9: **   6 Feb 93 Binary seraches used. Intreface modified.
1.1 timbl 10: */
 11: #include "SGML.h"
 12: 
 13: #include <ctype.h>
 14: #include <stdio.h>
 15: #include "HTUtils.h"
 16: #include "HTChunk.h"
 17: #include "tcp.h"        /* For FROMASCII */
 18: 
1.2 timbl 19: #define INVALID (-1)
 20: 
1.1 timbl 21: /*   The State (context) of the parser
 22: **
1.2 timbl 23: **   This is passed with each call to make the parser reentrant
1.1 timbl 24: **
 25: */
 26: 
1.16 ! frystyk 27: 
1.2 timbl 28: 
 29:    
 30: /*       Element Stack
 31: **       -------------
 32: **   This allows us to return down the stack reselcting styles.
 33: **   As we return, attribute values will be garbage in general.
 34: */
 35: typedef struct _HTElement HTElement;
 36: struct _HTElement {
 37:    HTElement *   next;  /* Previously nested element or 0 */
 38:    HTTag*     tag;  /* The tag at this level */
 39: };
 40: 
 41: 
 42: /*   Internal Context Data Structure
 43: **   -------------------------------
 44: */
 45: struct _HTStream {
 46: 
 47:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 48:   
 49:   CONST SGML_dtd       *dtd;
 50:   HTStructuredClass *actions;    /* target class */
 51:   HTStructured    *target;    /* target object */
 52: 
1.1 timbl 53:   HTTag       *current_tag;
1.2 timbl 54:   int        current_attribute_number;
1.1 timbl 55:   HTChunk      *string;
 56:   HTElement     *element_stack;
1.12 timbl 57:   enum sgml_state { S_text, S_literal, S_tag, S_tag_gap, 
1.1 timbl 58:        S_attr, S_attr_gap, S_equals, S_value,
 59:        S_ero, S_cro,
1.13 timbl 60: #ifdef ISO_2022_JP
 61:        S_esc, S_dollar, S_paren, S_nonascii_text,
 62: #endif
1.1 timbl 63:         S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 64: #ifdef CALLERDATA        
1.1 timbl 65:   void *       callerData;
1.2 timbl 66: #endif
 67:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 68:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 69: } ;
 70: 
 71: 
 72: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 73: 
1.1 timbl 74: 
 75: 
 76: /*   Handle Attribute
 77: **   ----------------
 78: */
 79: /* PUBLIC CONST char * SGML_default = "";  ?? */
 80: 
 81: #ifdef __STDC__
1.2 timbl 82: PRIVATE void handle_attribute_name(HTStream * context, const char * s)
1.1 timbl 83: #else
 84: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 85:   HTStream * context;
1.1 timbl 86:   char *s;
 87: #endif
 88: {
1.2 timbl 89: 
 90:   HTTag * tag = context->current_tag;
 91:   attr* attributes = tag->attributes;
 92: 
 93:   int high, low, i, diff;      /* Binary search for attribute name */
 94:   for(low=0, high=tag->number_of_attributes;
 95:        high > low ;
 96:        diff < 0 ? (low = i+1) : (high = i) ) {
 97:    i = (low + (high-low)/2);
 98:    diff = strcasecomp(attributes[i].name, s);
 99:    if (diff==0) {         /* success: found it */
 100:      context->current_attribute_number = i;
 101:      context->present[i] = YES;
 102:      if (context->value[i]) {
 103:        free(context->value[i]);
 104:        context->value[i] = NULL;
 105:      }
 106:      return;
 107:    } /* if */
 108:    
 109:   } /* for */
 110:   
 111:   if (TRACE)
 112:    fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
 113:      s, context->current_tag->name);
 114:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 115: }
 116: 
 117: 
 118: /*   Handle attribute value
 119: **   ----------------------
 120: */
 121: #ifdef __STDC__
1.2 timbl 122: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 123: #else
 124: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 125:   HTStream * context;
1.1 timbl 126:   char *s;
 127: #endif
 128: {
1.2 timbl 129:   if (context->current_attribute_number != INVALID) {
 130:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 131:   } else {
 132:     if (TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
 133:   }
1.2 timbl 134:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 135: }
 136: 
1.2 timbl 137: 
1.1 timbl 138: /*   Handle entity
 139: **   -------------
 140: **
 141: ** On entry,
 142: **   s    contains the entity name zero terminated
 143: ** Bugs:
 144: **   If the entity name is unknown, the terminator is treated as
 145: **   a printable non-special character in all cases, even if it is '<'
 146: */
 147: #ifdef __STDC__
1.2 timbl 148: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 149: #else
 150: PRIVATE void handle_entity(context, term)
1.2 timbl 151:   HTStream * context;
1.1 timbl 152:   char term;
 153: #endif
 154: {
1.2 timbl 155: 
1.3 timbl 156:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 157:   CONST char *s = context->string->data;
1.2 timbl 158:   
 159:   int high, low, i, diff;
 160:   for(low=0, high = context->dtd->number_of_entities;
 161:        high > low ;
 162:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 163:    i = (low + (high-low)/2);
 164:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 165:    if (diff==0) {         /* success: found it */
 166:      (*context->actions->put_entity)(context->target, i);
 167:      return;
1.1 timbl 168:    }
 169:   }
 170:   /* If entity string not found, display as text */
 171:   if (TRACE)
 172:    fprintf(stderr, "SGML: Unknown entity %s\n", s); 
1.2 timbl 173:   PUTC('&');
1.1 timbl 174:   {
 175:    CONST char *p;
 176:    for (p=s; *p; p++) {
1.2 timbl 177:      PUTC(*p);
1.1 timbl 178:    }
 179:   }
1.2 timbl 180:   PUTC(term);
1.1 timbl 181: }
 182: 
1.2 timbl 183: 
1.1 timbl 184: /*   End element
1.2 timbl 185: **   -----------
1.1 timbl 186: */
 187: #ifdef __STDC__
1.2 timbl 188: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 189: #else
 190: PRIVATE void end_element(context, old_tag)
 191:   HTTag * old_tag;
1.2 timbl 192:   HTStream * context;
1.1 timbl 193: #endif
 194: {
 195:   if (TRACE) fprintf(stderr, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 196:   if (old_tag->contents == SGML_EMPTY) {
1.1 timbl 197:     if (TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
 198:        old_tag->name);
 199:    return;
 200:   }
 201:   while (context->element_stack)   {/* Loop is error path only */
 202:    HTElement * N = context->element_stack;
 203:    HTTag * t = N->tag;
 204:    
 205:    if (old_tag != t) {       /* Mismatch: syntax error */
 206:      if (context->element_stack->next) { /* This is not the last level */
 207:        if (TRACE) fprintf(stderr,
 208:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 209:          old_tag->name, t->name, t->name);
 210:      } else {          /* last level */
 211:        if (TRACE) fprintf(stderr,
 212:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 213:          old_tag->name, t->name, old_tag->name);
 214:        return;         /* Ignore */
 215:      }
 216:    }
 217:    
 218:    context->element_stack = N->next;        /* Remove from stack */
 219:    free(N);
1.2 timbl 220:    (*context->actions->end_element)(context->target,
 221:         t - context->dtd->tags);
1.1 timbl 222:    if (old_tag == t) return; /* Correct sequence */
 223:    
 224:    /* Syntax error path only */
 225:    
 226:   }
1.5 timbl 227:   if (TRACE) fprintf(stderr,
1.1 timbl 228:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 229: }
 230: 
 231: 
 232: /*   Start a element
 233: */
 234: #ifdef __STDC__
1.2 timbl 235: PRIVATE void start_element(HTStream * context)
1.1 timbl 236: #else
 237: PRIVATE void start_element(context)
1.2 timbl 238:   HTStream * context;
1.1 timbl 239: #endif
 240: {
 241:   HTTag * new_tag = context->current_tag;
 242:   
 243:   if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 244:   (*context->actions->start_element)(
 245:    context->target,
 246:    new_tag - context->dtd->tags,
 247:    context->present,
1.3 timbl 248:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 249:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 250:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 251:     if (N == NULL) outofmem(__FILE__, "start_element");
 252:    N->next = context->element_stack;
 253:    N->tag = new_tag;
 254:    context->element_stack = N;
 255:   }
 256: }
 257: 
 258: 
1.2 timbl 259: /*       Find Tag in DTD tag list
 260: **       ------------------------
1.1 timbl 261: **
 262: ** On entry,
1.2 timbl 263: **   dtd   points to dtd structire including valid tag list
 264: **   string points to name of tag in question
1.1 timbl 265: **
1.2 timbl 266: ** On exit,
 267: **   returns:
1.7 timbl 268: **       NULL      tag not found
 269: **       else      address of tag structure in dtd
1.2 timbl 270: */
1.11 timbl 271: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 272: {
 273:   int high, low, i, diff;
 274:   for(low=0, high=dtd->number_of_tags;
 275:        high > low ;
 276:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 277:    i = (low + (high-low)/2);
1.3 timbl 278:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 279:    if (diff==0) {         /* success: found it */
1.7 timbl 280:      return &dtd->tags[i];
1.2 timbl 281:    }
 282:   }
1.7 timbl 283:   return NULL;
1.2 timbl 284: }
 285: 
 286: /*________________________________________________________________________
 287: **           Public Methods
1.1 timbl 288: */
 289: 
1.2 timbl 290: 
 291: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 292: 
1.8 timbl 293: PUBLIC void SGML_free ARGS1(HTStream *, context)
 294: {
1.14 frystyk 295:   int cnt;
 296: 
1.15 frystyk 297:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 298:    HTElement *ptr = context->element_stack;
 299: 
 300:    if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
 301:             context->element_stack->tag->name);
 302:    context->element_stack = ptr->next;
 303:    free(ptr);
 304:   }
1.8 timbl 305:   (*context->actions->free)(context->target);
 306:   HTChunkFree(context->string);
1.15 frystyk 307:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 308:    if(context->value[cnt])
 309:      free(context->value[cnt]);
1.8 timbl 310:   free(context);
1.1 timbl 311: }
 312: 
1.8 timbl 313: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 314: {
1.14 frystyk 315:   int cnt;
 316: 
1.15 frystyk 317:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 318:    HTElement *ptr = context->element_stack;
 319: 
 320:    if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
 321:             context->element_stack->tag->name);
 322:    context->element_stack = ptr->next;
 323:    free(ptr);
 324:   }
1.8 timbl 325:   (*context->actions->abort)(context->target, e);
1.1 timbl 326:   HTChunkFree(context->string);
1.14 frystyk 327:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 328:    if(context->value[cnt])
 329:      free(context->value[cnt]);
1.1 timbl 330:   free(context);
 331: }
 332: 
1.2 timbl 333: 
1.1 timbl 334: /*   Read and write user callback handle
 335: **   -----------------------------------
 336: **
 337: **  The callbacks from the SGML parser have an SGML context parameter.
 338: **  These calls allow the caller to associate his own context with a
 339: **  particular SGML context.
 340: */
 341: 
1.2 timbl 342: #ifdef CALLERDATA        
 343: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 344: {
 345:   return context->callerData;
 346: }
 347: 
1.2 timbl 348: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 349: {
 350:   context->callerData = data;
 351: }
1.2 timbl 352: #endif
1.1 timbl 353: 
1.2 timbl 354: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 355: 
 356: {
1.2 timbl 357:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 358:   HTChunk  *string =    context->string;
 359: 
 360:   switch(context->state) {
 361:   case S_text:
1.13 timbl 362: #ifdef ISO_2022_JP
 363:    if (c=='033円') {
 364:      context->state = S_esc;
 365:      PUTC(c);
 366:      break;
 367:    }
 368: #endif /* ISO_2022_JP */
1.6 timbl 369:    if (c=='&' && (!context->element_stack || (
 370:             context->element_stack->tag &&
 371:             ( context->element_stack->tag->contents == SGML_MIXED
 372:              || context->element_stack->tag->contents ==
 373:                             SGML_RCDATA)
 374:            ))) {
1.1 timbl 375:      string->size = 0;
 376:      context->state = S_ero;
 377:      
 378:    } else if (c=='<') {
 379:      string->size = 0;
 380:      context->state = (context->element_stack &&
1.13 timbl 381:        context->element_stack->tag &&
 382:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 383:                S_literal : S_tag;
1.2 timbl 384:    } else PUTC(c);
1.1 timbl 385:    break;
1.13 timbl 386: 
 387: #ifdef ISO_2022_JP
 388:   case S_esc:
 389:    if (c=='$') {
 390:      context->state = S_dollar;
 391:    } else if (c=='(') {
 392:      context->state = S_paren;
 393:    } else {
 394:      context->state = S_text;
 395:    }
 396:    PUTC(c);
 397:    break;
 398:   case S_dollar:
 399:    if (c=='@' || c=='B') {
 400:      context->state = S_nonascii_text;
 401:    } else {
 402:      context->state = S_text;
 403:    }
 404:    PUTC(c);
 405:    break;
 406:   case S_paren:
 407:    if (c=='B' || c=='J') {
 408:      context->state = S_text;
 409:    } else {
 410:      context->state = S_text;
 411:    }
 412:    PUTC(c);
 413:    break;
 414:   case S_nonascii_text:
 415:    if (c=='033円') {
 416:      context->state = S_esc;
 417:      PUTC(c);
 418:    } else {
 419:      PUTC(c);
 420:    }
 421:    break;
 422: #endif /* ISO_2022_JP */
1.1 timbl 423: 
1.12 timbl 424: /*   In literal mode, waits only for specific end tag!
1.2 timbl 425: **   Only foir compatibility with old servers.
1.1 timbl 426: */
1.12 timbl 427:   case S_literal :
1.1 timbl 428:    HTChunkPutc(string, c);
 429:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 430:        : context->element_stack->tag->name[string->size-2])) {
 431:      int i;
 432:      
1.12 timbl 433:      /* If complete match, end literal */
1.1 timbl 434:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 435:        end_element(context, context->element_stack->tag);
 436:        string->size = 0;
1.2 timbl 437:        context->current_attribute_number = INVALID;
1.1 timbl 438:        context->state = S_text;
 439:        break;
 440:      }      /* If Mismatch: recover string. */
1.2 timbl 441:      PUTC( '<');
1.1 timbl 442:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 443:        PUTC(
1.1 timbl 444:                       string->data[i]);
 445:      context->state = S_text;  
 446:    }
 447:    
 448:     break;
 449: 
 450: /*   Character reference or Entity
 451: */
 452:  case S_ero:
 453:    if (c=='#') {
 454:      context->state = S_cro; /*  &# is Char Ref Open */ 
 455:      break;
 456:    }
 457:    context->state = S_entity;  /* Fall through! */
 458:    
 459: /*   Handle Entities
 460: */
 461:   case S_entity:
 462:    if (isalnum(c))
 463:      HTChunkPutc(string, c);
 464:    else {
 465:      HTChunkTerminate(string);
 466:      handle_entity(context, c);
 467:      context->state = S_text;
 468:    }
 469:    break;
 470: 
 471: /*   Character reference
 472: */
 473:   case S_cro:
 474:    if (isalnum(c))
 475:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 476:    else {
 477:      int value;
 478:      HTChunkTerminate(string);
 479:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 480:        PUTC(FROMASCII((char)value));
1.1 timbl 481:      context->state = S_text;
 482:    }
 483:    break;
 484: 
 485: /*       Tag
 486: */     
 487:   case S_tag:                /* new tag */
 488:    if (isalnum(c))
 489:      HTChunkPutc(string, c);
 490:    else {             /* End of tag name */
1.7 timbl 491:      HTTag * t;
1.1 timbl 492:      if (c=='/') {
 493:        if (TRACE) if (string->size!=0)
 494:          fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
 495:        context->state = S_end;
 496:        break;
 497:      }
 498:      HTChunkTerminate(string) ;
1.2 timbl 499: 
1.10 timbl 500:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 501:      if (!t) {
1.2 timbl 502:        if(TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 503:            string->data);
 504:        context->state = (c=='>') ? S_text : S_junk_tag;
 505:        break;
 506:      }
1.7 timbl 507:      context->current_tag = t;
1.2 timbl 508:      
 509:      /* Clear out attributes
 510:      */
1.1 timbl 511:      
1.2 timbl 512:      {
 513:        int i;
 514:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 515:          context->present[i] = NO;
1.1 timbl 516:      }
 517:      string->size = 0;
1.2 timbl 518:      context->current_attribute_number = INVALID;
1.1 timbl 519:      
 520:      if (c=='>') {
 521:        if (context->current_tag->name) start_element(context);
 522:        context->state = S_text;
 523:      } else {
 524:        context->state = S_tag_gap;
 525:      }
 526:    }
 527:    break;
 528: 
 529:        
 530:   case S_tag_gap:      /* Expecting attribute or > */
 531:    if (WHITE(c)) break;  /* Gap between attributes */
 532:    if (c=='>') {      /* End of tag */
 533:      if (context->current_tag->name) start_element(context);
 534:      context->state = S_text;
 535:      break;
 536:    }
 537:    HTChunkPutc(string, c);
 538:    context->state = S_attr;        /* Get attribute */
 539:    break;
 540:    
 541:                /* accumulating value */
 542:   case S_attr:
 543:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 544:      HTChunkTerminate(string) ;
 545:      handle_attribute_name(context, string->data);
 546:      string->size = 0;
 547:      if (c=='>') {        /* End of tag */
 548:        if (context->current_tag->name) start_element(context);
 549:        context->state = S_text;
 550:        break;
 551:      }
 552:      context->state = (c=='=' ? S_equals: S_attr_gap);
 553:    } else {
 554:      HTChunkPutc(string, c);
 555:    }
 556:    break;
 557:        
 558:   case S_attr_gap:      /* Expecting attribute or = or > */
 559:    if (WHITE(c)) break;  /* Gap after attribute */
 560:    if (c=='>') {      /* End of tag */
 561:      if (context->current_tag->name) start_element(context);
 562:      context->state = S_text;
 563:      break;
 564:    } else if (c=='=') {
 565:      context->state = S_equals;
 566:      break;
 567:    }
 568:    HTChunkPutc(string, c);
 569:    context->state = S_attr;        /* Get next attribute */
 570:    break;
 571:    
 572:   case S_equals:           /* After attr = */ 
 573:    if (WHITE(c)) break;  /* Before attribute value */
 574:    if (c=='>') {      /* End of tag */
1.5 timbl 575:      if (TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 576:      if (context->current_tag->name) start_element(context);
 577:      context->state = S_text;
 578:      break;
 579:      
 580:    } else if (c=='\'') {
 581:      context->state = S_squoted;
 582:      break;
 583: 
 584:    } else if (c=='"') {
 585:      context->state = S_dquoted;
 586:      break;
 587:    }
 588:    HTChunkPutc(string, c);
 589:    context->state = S_value;
 590:    break;
 591:    
 592:   case S_value:
 593:    if (WHITE(c) || (c=='>')) {       /* End of word */
 594:      HTChunkTerminate(string) ;
 595:      handle_attribute_value(context, string->data);
 596:      string->size = 0;
 597:      if (c=='>') {        /* End of tag */
 598:        if (context->current_tag->name) start_element(context);
 599:        context->state = S_text;
 600:        break;
 601:      }
 602:      else context->state = S_tag_gap;
 603:    } else {
 604:      HTChunkPutc(string, c);
 605:    }
 606:    break;
 607:        
 608:   case S_squoted:      /* Quoted attribute value */
 609:    if (c=='\'') {     /* End of attribute value */
 610:      HTChunkTerminate(string) ;
 611:      handle_attribute_value(context, string->data);
 612:      string->size = 0;
 613:      context->state = S_tag_gap;
 614:    } else {
 615:      HTChunkPutc(string, c);
 616:    }
 617:    break;
 618:    
 619:   case S_dquoted:      /* Quoted attribute value */
 620:    if (c=='"') {      /* End of attribute value */
 621:      HTChunkTerminate(string) ;
 622:      handle_attribute_value(context, string->data);
 623:      string->size = 0;
 624:      context->state = S_tag_gap;
 625:    } else {
 626:      HTChunkPutc(string, c);
 627:    }
 628:    break;
 629:    
 630:   case S_end:                    /* </ */
 631:    if (isalnum(c))
 632:      HTChunkPutc(string, c);
 633:    else {             /* End of end tag name */
1.7 timbl 634:      HTTag * t;
1.1 timbl 635:      HTChunkTerminate(string) ;
1.7 timbl 636:      if (!*string->data) {    /* Empty end tag */
 637:        t = context->element_stack->tag;
 638:      } else {
1.10 timbl 639:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 640:      }
1.7 timbl 641:      if (!t) {
1.1 timbl 642:        if(TRACE) fprintf(stderr,
 643:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 644:      } else {
1.7 timbl 645:        context->current_tag = t;
1.2 timbl 646:        end_element( context, context->current_tag);
1.1 timbl 647:      }
1.2 timbl 648: 
1.1 timbl 649:      string->size = 0;
1.2 timbl 650:      context->current_attribute_number = INVALID;
1.7 timbl 651:      if (c!='>') {
 652:        if (TRACE && !WHITE(c))
 653:          fprintf(stderr,"SGML: `</%s%c' found!\n",
 654:            string->data, c);
 655:        context->state = S_junk_tag;
 656:      } else {
 657:        context->state = S_text;
 658:      }
1.1 timbl 659:    }
 660:    break;
 661: 
 662:        
 663:   case S_junk_tag:
 664:    if (c=='>') {
 665:      context->state = S_text;
 666:    }
 667:    
 668:   } /* switch on context->state */
 669: 
 670: } /* SGML_character */
1.2 timbl 671: 
 672: 
 673: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 674: {
 675:   CONST char *p;
 676:   for(p=str; *p; p++)
 677:     SGML_character(context, *p);
 678: }
 679: 
 680: 
 681: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 682: {
 683:   CONST char *p;
 684:   CONST char *e = str+l;
 685:   for(p=str; p<e; p++)
 686:     SGML_character(context, *p);
 687: }
 688: 
 689: /*_______________________________________________________________________
 690: */
 691: 
 692: /*   Structured Object Class
 693: **   -----------------------
 694: */
 695: PUBLIC CONST HTStreamClass SGMLParser = 
 696: {       
 697:    "SGMLParser",
 698:    SGML_free,
1.8 timbl 699:    SGML_abort,
1.9 timbl 700:    SGML_character, 
 701:    SGML_string,
 702:    SGML_write,
1.2 timbl 703: }; 
 704: 
 705: /*   Create SGML Engine
 706: **   ------------------
 707: **
 708: ** On entry,
 709: **   dtd       represents the DTD, along with
 710: **   actions     is the sink for the data as a set of routines.
 711: **
 712: */
 713: 
 714: PUBLIC HTStream* SGML_new ARGS2(
 715:    CONST SGML_dtd *,    dtd,
 716:    HTStructured *,     target)
 717: {
 718:   int i;
 719:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 720:   if (!context) outofmem(__FILE__, "SGML_begin");
 721: 
 722:   context->isa = &SGMLParser;
 723:   context->string = HTChunkCreate(128);   /* Grow by this much */
 724:   context->dtd = dtd;
 725:   context->target = target;
 726:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 727:                    /* Ugh: no OO */
 728:   context->state = S_text;
 729:   context->element_stack = 0;            /* empty */
 730: #ifdef CALLERDATA        
 731:   context->callerData = (void*) callerData;
 732: #endif  
 733:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 734: 
 735:   return context;
 736: }
1.14 frystyk 737: 
 738: 
 739: 
 740: 
 741: 
 742: 
 743: 
 744: 
 745: 
 746: 
 747: 
1.2 timbl 748: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /