[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.18

1.1 timbl 1: /*           General SGML Parser code        SGML.c
 2: **           ========================
 3: **
1.2 timbl 4: **   This module implements an HTStream object. To parse an
1.1 timbl 5: **   SGML file, create this object which is a parser. The object
1.2 timbl 6: **   is (currently) created by being passed a DTD structure,
 7: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **   
1.2 timbl 9: **   6 Feb 93 Binary seraches used. Intreface modified.
1.1 timbl 10: */
 11: #include "SGML.h"
 12: 
 13: #include <ctype.h>
 14: #include <stdio.h>
 15: #include "HTUtils.h"
 16: #include "HTChunk.h"
 17: #include "tcp.h"        /* For FROMASCII */
 18: 
1.2 timbl 19: #define INVALID (-1)
 20: 
1.1 timbl 21: /*   The State (context) of the parser
 22: **
1.2 timbl 23: **   This is passed with each call to make the parser reentrant
1.1 timbl 24: **
 25: */
 26: 
1.16 frystyk 27: 
1.2 timbl 28: 
 29:    
 30: /*       Element Stack
 31: **       -------------
 32: **   This allows us to return down the stack reselcting styles.
 33: **   As we return, attribute values will be garbage in general.
 34: */
 35: typedef struct _HTElement HTElement;
 36: struct _HTElement {
 37:    HTElement *   next;  /* Previously nested element or 0 */
 38:    HTTag*     tag;  /* The tag at this level */
 39: };
 40: 
 41: 
 42: /*   Internal Context Data Structure
 43: **   -------------------------------
 44: */
 45: struct _HTStream {
 46: 
 47:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 48:   
 49:   CONST SGML_dtd       *dtd;
 50:   HTStructuredClass *actions;    /* target class */
 51:   HTStructured    *target;    /* target object */
 52: 
1.1 timbl 53:   HTTag       *current_tag;
1.2 timbl 54:   int        current_attribute_number;
1.1 timbl 55:   HTChunk      *string;
 56:   HTElement     *element_stack;
1.12 timbl 57:   enum sgml_state { S_text, S_literal, S_tag, S_tag_gap, 
1.18 ! timbl 58:        S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 ! 59:        S_nl, S_nl_tago,
1.1 timbl 60:        S_ero, S_cro,
1.13 timbl 61: #ifdef ISO_2022_JP
 62:        S_esc, S_dollar, S_paren, S_nonascii_text,
 63: #endif
1.1 timbl 64:         S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 65: #ifdef CALLERDATA        
1.1 timbl 66:   void *       callerData;
1.2 timbl 67: #endif
 68:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 69:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 70: } ;
 71: 
 72: 
 73: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 74: 
1.1 timbl 75: 
1.17 timbl 76: /*   Find Attribute Number
 77: **   ---------------------
 78: */
 79: 
 80: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
 81: {
 82:   attr* attributes = tag->attributes;
 83: 
 84:   int high, low, i, diff;      /* Binary search for attribute name */
 85:   for(low=0, high=tag->number_of_attributes;
 86:        high > low ;
 87:        diff < 0 ? (low = i+1) : (high = i) ) {
 88:    i = (low + (high-low)/2);
 89:    diff = strcasecomp(attributes[i].name, s);
 90:    if (diff==0) return i;         /* success: found it */
 91:   } /* for */
 92:   
 93:   return -1;
 94: }
 95: 
1.1 timbl 96: 
 97: /*   Handle Attribute
 98: **   ----------------
 99: */
 100: /* PUBLIC CONST char * SGML_default = "";  ?? */
 101: 
 102: #ifdef __STDC__
1.17 timbl 103: PRIVATE void handle_attribute_name(HTStream * context, CONST char * s)
1.1 timbl 104: #else
 105: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 106:   HTStream * context;
1.1 timbl 107:   char *s;
 108: #endif
 109: {
1.2 timbl 110: 
 111:   HTTag * tag = context->current_tag;
 112: 
1.17 timbl 113:   int i = SGMLFindAttribute(tag, s);
 114:   if (i>=0) {
 115:    context->current_attribute_number = i;
 116:    context->present[i] = YES;
 117:    if (context->value[i]) {
 118:      free(context->value[i]);
 119:      context->value[i] = NULL;
 120:    }
 121:    return;
 122:   } /* if */
1.2 timbl 123:    
 124:   if (TRACE)
 125:    fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
 126:      s, context->current_tag->name);
 127:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 128: }
 129: 
 130: 
 131: /*   Handle attribute value
 132: **   ----------------------
 133: */
 134: #ifdef __STDC__
1.2 timbl 135: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 136: #else
 137: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 138:   HTStream * context;
1.1 timbl 139:   char *s;
 140: #endif
 141: {
1.2 timbl 142:   if (context->current_attribute_number != INVALID) {
 143:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 144:   } else {
 145:     if (TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
 146:   }
1.2 timbl 147:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 148: }
 149: 
1.2 timbl 150: 
1.1 timbl 151: /*   Handle entity
 152: **   -------------
 153: **
 154: ** On entry,
 155: **   s    contains the entity name zero terminated
 156: ** Bugs:
 157: **   If the entity name is unknown, the terminator is treated as
 158: **   a printable non-special character in all cases, even if it is '<'
 159: */
 160: #ifdef __STDC__
1.2 timbl 161: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 162: #else
 163: PRIVATE void handle_entity(context, term)
1.2 timbl 164:   HTStream * context;
1.1 timbl 165:   char term;
 166: #endif
 167: {
1.2 timbl 168: 
1.3 timbl 169:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 170:   CONST char *s = context->string->data;
1.2 timbl 171:   
 172:   int high, low, i, diff;
 173:   for(low=0, high = context->dtd->number_of_entities;
 174:        high > low ;
 175:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 176:    i = (low + (high-low)/2);
 177:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 178:    if (diff==0) {         /* success: found it */
 179:      (*context->actions->put_entity)(context->target, i);
 180:      return;
1.1 timbl 181:    }
 182:   }
 183:   /* If entity string not found, display as text */
 184:   if (TRACE)
 185:    fprintf(stderr, "SGML: Unknown entity %s\n", s); 
1.2 timbl 186:   PUTC('&');
1.1 timbl 187:   {
 188:    CONST char *p;
 189:    for (p=s; *p; p++) {
1.2 timbl 190:      PUTC(*p);
1.1 timbl 191:    }
 192:   }
1.2 timbl 193:   PUTC(term);
1.1 timbl 194: }
 195: 
1.2 timbl 196: 
1.1 timbl 197: /*   End element
1.2 timbl 198: **   -----------
1.1 timbl 199: */
 200: #ifdef __STDC__
1.2 timbl 201: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 202: #else
 203: PRIVATE void end_element(context, old_tag)
 204:   HTTag * old_tag;
1.2 timbl 205:   HTStream * context;
1.1 timbl 206: #endif
 207: {
 208:   if (TRACE) fprintf(stderr, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 209:   if (old_tag->contents == SGML_EMPTY) {
1.1 timbl 210:     if (TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
 211:        old_tag->name);
 212:    return;
 213:   }
 214:   while (context->element_stack)   {/* Loop is error path only */
 215:    HTElement * N = context->element_stack;
 216:    HTTag * t = N->tag;
 217:    
 218:    if (old_tag != t) {       /* Mismatch: syntax error */
 219:      if (context->element_stack->next) { /* This is not the last level */
 220:        if (TRACE) fprintf(stderr,
 221:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 222:          old_tag->name, t->name, t->name);
 223:      } else {          /* last level */
 224:        if (TRACE) fprintf(stderr,
 225:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 226:          old_tag->name, t->name, old_tag->name);
 227:        return;         /* Ignore */
 228:      }
 229:    }
 230:    
 231:    context->element_stack = N->next;        /* Remove from stack */
 232:    free(N);
1.2 timbl 233:    (*context->actions->end_element)(context->target,
 234:         t - context->dtd->tags);
1.1 timbl 235:    if (old_tag == t) return; /* Correct sequence */
 236:    
 237:    /* Syntax error path only */
 238:    
 239:   }
1.5 timbl 240:   if (TRACE) fprintf(stderr,
1.1 timbl 241:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 242: }
 243: 
 244: 
1.17 timbl 245: /*   Start an element
 246: **   ----------------
1.1 timbl 247: */
 248: #ifdef __STDC__
1.2 timbl 249: PRIVATE void start_element(HTStream * context)
1.1 timbl 250: #else
 251: PRIVATE void start_element(context)
1.2 timbl 252:   HTStream * context;
1.1 timbl 253: #endif
 254: {
 255:   HTTag * new_tag = context->current_tag;
 256:   
 257:   if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 258:   (*context->actions->start_element)(
 259:    context->target,
 260:    new_tag - context->dtd->tags,
 261:    context->present,
1.3 timbl 262:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 263:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 264:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 265:     if (N == NULL) outofmem(__FILE__, "start_element");
 266:    N->next = context->element_stack;
 267:    N->tag = new_tag;
 268:    context->element_stack = N;
 269:   }
 270: }
 271: 
 272: 
1.2 timbl 273: /*       Find Tag in DTD tag list
 274: **       ------------------------
1.1 timbl 275: **
 276: ** On entry,
1.2 timbl 277: **   dtd   points to dtd structire including valid tag list
 278: **   string points to name of tag in question
1.1 timbl 279: **
1.2 timbl 280: ** On exit,
 281: **   returns:
1.7 timbl 282: **       NULL      tag not found
 283: **       else      address of tag structure in dtd
1.2 timbl 284: */
1.11 timbl 285: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 286: {
 287:   int high, low, i, diff;
 288:   for(low=0, high=dtd->number_of_tags;
 289:        high > low ;
 290:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 291:    i = (low + (high-low)/2);
1.3 timbl 292:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 293:    if (diff==0) {         /* success: found it */
1.7 timbl 294:      return &dtd->tags[i];
1.2 timbl 295:    }
 296:   }
1.7 timbl 297:   return NULL;
1.2 timbl 298: }
 299: 
 300: /*________________________________________________________________________
 301: **           Public Methods
1.1 timbl 302: */
 303: 
1.2 timbl 304: 
 305: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 306: 
1.8 timbl 307: PUBLIC void SGML_free ARGS1(HTStream *, context)
 308: {
1.14 frystyk 309:   int cnt;
 310: 
1.15 frystyk 311:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 312:    HTElement *ptr = context->element_stack;
 313: 
 314:    if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
 315:             context->element_stack->tag->name);
 316:    context->element_stack = ptr->next;
 317:    free(ptr);
 318:   }
1.8 timbl 319:   (*context->actions->free)(context->target);
 320:   HTChunkFree(context->string);
1.15 frystyk 321:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 322:    if(context->value[cnt])
 323:      free(context->value[cnt]);
1.8 timbl 324:   free(context);
1.1 timbl 325: }
 326: 
1.8 timbl 327: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 328: {
1.14 frystyk 329:   int cnt;
 330: 
1.15 frystyk 331:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 332:    HTElement *ptr = context->element_stack;
 333: 
 334:    if(TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
 335:             context->element_stack->tag->name);
 336:    context->element_stack = ptr->next;
 337:    free(ptr);
 338:   }
1.8 timbl 339:   (*context->actions->abort)(context->target, e);
1.1 timbl 340:   HTChunkFree(context->string);
1.14 frystyk 341:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 342:    if(context->value[cnt])
 343:      free(context->value[cnt]);
1.1 timbl 344:   free(context);
 345: }
 346: 
1.2 timbl 347: 
1.1 timbl 348: /*   Read and write user callback handle
 349: **   -----------------------------------
 350: **
 351: **  The callbacks from the SGML parser have an SGML context parameter.
 352: **  These calls allow the caller to associate his own context with a
 353: **  particular SGML context.
 354: */
 355: 
1.2 timbl 356: #ifdef CALLERDATA        
 357: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 358: {
 359:   return context->callerData;
 360: }
 361: 
1.2 timbl 362: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 363: {
 364:   context->callerData = data;
 365: }
1.2 timbl 366: #endif
1.1 timbl 367: 
1.2 timbl 368: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 369: 
 370: {
1.2 timbl 371:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 372:   HTChunk  *string =    context->string;
 373: 
 374:   switch(context->state) {
1.18 ! timbl 375:   
 ! 376:   case S_after_open: /* Strip one trainling newline
 ! 377:            only after opening nonempty element. - SGML:Ugh! */
 ! 378:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 ! 379:      break;
 ! 380:    }
 ! 381:    context->state = S_text;
 ! 382:    goto normal_text;
 ! 383:    /* (***falls through***) */
 ! 384:    
1.1 timbl 385:   case S_text:
1.18 ! timbl 386: normal_text:
 ! 387: 
1.13 timbl 388: #ifdef ISO_2022_JP
 389:    if (c=='033円') {
 390:      context->state = S_esc;
 391:      PUTC(c);
 392:      break;
 393:    }
 394: #endif /* ISO_2022_JP */
1.6 timbl 395:    if (c=='&' && (!context->element_stack || (
 396:             context->element_stack->tag &&
 397:             ( context->element_stack->tag->contents == SGML_MIXED
 398:              || context->element_stack->tag->contents ==
 399:                             SGML_RCDATA)
 400:            ))) {
1.1 timbl 401:      string->size = 0;
 402:      context->state = S_ero;
 403:      
 404:    } else if (c=='<') {
 405:      string->size = 0;
 406:      context->state = (context->element_stack &&
1.13 timbl 407:        context->element_stack->tag &&
 408:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 409:                S_literal : S_tag;
1.18 ! timbl 410:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 ! 411:      context->state = S_nl;
1.2 timbl 412:    } else PUTC(c);
1.1 timbl 413:    break;
1.13 timbl 414: 
1.18 ! timbl 415:   case S_nl:
 ! 416:     if (c=='<') {
 ! 417:      string->size = 0;
 ! 418:      context->state = (context->element_stack &&
 ! 419:        context->element_stack->tag &&
 ! 420:        context->element_stack->tag->contents == SGML_LITERAL) ?
 ! 421:                S_literal : S_nl_tago;
 ! 422:    } else {
 ! 423:      PUTC('\n');
 ! 424:      context->state = S_text;
 ! 425:      goto normal_text;
 ! 426:    }
 ! 427:    break;
 ! 428: 
 ! 429:   case S_nl_tago:      /* Had newline and tag opener */
 ! 430:     if (c != '/') {
 ! 431:      PUTC('\n');     /* Only ignore newline before </ */
 ! 432:    }
 ! 433:    context->state = S_tag;
 ! 434:    goto handle_S_tag;
 ! 435: 
1.13 timbl 436: #ifdef ISO_2022_JP
 437:   case S_esc:
 438:    if (c=='$') {
 439:      context->state = S_dollar;
 440:    } else if (c=='(') {
 441:      context->state = S_paren;
 442:    } else {
 443:      context->state = S_text;
 444:    }
 445:    PUTC(c);
 446:    break;
 447:   case S_dollar:
 448:    if (c=='@' || c=='B') {
 449:      context->state = S_nonascii_text;
 450:    } else {
 451:      context->state = S_text;
 452:    }
 453:    PUTC(c);
 454:    break;
 455:   case S_paren:
 456:    if (c=='B' || c=='J') {
 457:      context->state = S_text;
 458:    } else {
 459:      context->state = S_text;
 460:    }
 461:    PUTC(c);
 462:    break;
 463:   case S_nonascii_text:
 464:    if (c=='033円') {
 465:      context->state = S_esc;
 466:      PUTC(c);
 467:    } else {
 468:      PUTC(c);
 469:    }
 470:    break;
 471: #endif /* ISO_2022_JP */
1.1 timbl 472: 
1.12 timbl 473: /*   In literal mode, waits only for specific end tag!
1.2 timbl 474: **   Only foir compatibility with old servers.
1.1 timbl 475: */
1.12 timbl 476:   case S_literal :
1.1 timbl 477:    HTChunkPutc(string, c);
 478:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 479:        : context->element_stack->tag->name[string->size-2])) {
 480:      int i;
 481:      
1.12 timbl 482:      /* If complete match, end literal */
1.1 timbl 483:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 484:        end_element(context, context->element_stack->tag);
 485:        string->size = 0;
1.2 timbl 486:        context->current_attribute_number = INVALID;
1.1 timbl 487:        context->state = S_text;
 488:        break;
 489:      }      /* If Mismatch: recover string. */
1.2 timbl 490:      PUTC( '<');
1.1 timbl 491:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 492:        PUTC(
1.1 timbl 493:                       string->data[i]);
 494:      context->state = S_text;  
 495:    }
 496:    
 497:     break;
 498: 
 499: /*   Character reference or Entity
 500: */
 501:  case S_ero:
 502:    if (c=='#') {
 503:      context->state = S_cro; /*  &# is Char Ref Open */ 
 504:      break;
 505:    }
 506:    context->state = S_entity;  /* Fall through! */
 507:    
 508: /*   Handle Entities
 509: */
 510:   case S_entity:
 511:    if (isalnum(c))
 512:      HTChunkPutc(string, c);
 513:    else {
 514:      HTChunkTerminate(string);
 515:      handle_entity(context, c);
 516:      context->state = S_text;
 517:    }
 518:    break;
 519: 
 520: /*   Character reference
 521: */
 522:   case S_cro:
 523:    if (isalnum(c))
 524:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 525:    else {
 526:      int value;
 527:      HTChunkTerminate(string);
 528:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 529:        PUTC(FROMASCII((char)value));
1.1 timbl 530:      context->state = S_text;
 531:    }
 532:    break;
 533: 
 534: /*       Tag
 535: */     
 536:   case S_tag:                /* new tag */
1.18 ! timbl 537: handle_S_tag:
 ! 538: 
1.1 timbl 539:    if (isalnum(c))
 540:      HTChunkPutc(string, c);
 541:    else {             /* End of tag name */
1.7 timbl 542:      HTTag * t;
1.1 timbl 543:      if (c=='/') {
 544:        if (TRACE) if (string->size!=0)
 545:          fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
 546:        context->state = S_end;
 547:        break;
 548:      }
 549:      HTChunkTerminate(string) ;
1.2 timbl 550: 
1.10 timbl 551:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 552:      if (!t) {
1.2 timbl 553:        if(TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 554:            string->data);
 555:        context->state = (c=='>') ? S_text : S_junk_tag;
 556:        break;
 557:      }
1.7 timbl 558:      context->current_tag = t;
1.2 timbl 559:      
 560:      /* Clear out attributes
 561:      */
1.1 timbl 562:      
1.2 timbl 563:      {
 564:        int i;
 565:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 566:          context->present[i] = NO;
1.1 timbl 567:      }
 568:      string->size = 0;
1.2 timbl 569:      context->current_attribute_number = INVALID;
1.1 timbl 570:      
 571:      if (c=='>') {
 572:        if (context->current_tag->name) start_element(context);
1.18 ! timbl 573:        context->state = S_after_open;
1.1 timbl 574:      } else {
 575:        context->state = S_tag_gap;
 576:      }
 577:    }
 578:    break;
 579: 
 580:        
 581:   case S_tag_gap:      /* Expecting attribute or > */
 582:    if (WHITE(c)) break;  /* Gap between attributes */
 583:    if (c=='>') {      /* End of tag */
 584:      if (context->current_tag->name) start_element(context);
1.18 ! timbl 585:      context->state = S_after_open;
1.1 timbl 586:      break;
 587:    }
 588:    HTChunkPutc(string, c);
 589:    context->state = S_attr;        /* Get attribute */
 590:    break;
 591:    
 592:                /* accumulating value */
 593:   case S_attr:
 594:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 595:      HTChunkTerminate(string) ;
 596:      handle_attribute_name(context, string->data);
 597:      string->size = 0;
 598:      if (c=='>') {        /* End of tag */
 599:        if (context->current_tag->name) start_element(context);
1.18 ! timbl 600:        context->state = S_after_open;
1.1 timbl 601:        break;
 602:      }
 603:      context->state = (c=='=' ? S_equals: S_attr_gap);
 604:    } else {
 605:      HTChunkPutc(string, c);
 606:    }
 607:    break;
 608:        
 609:   case S_attr_gap:      /* Expecting attribute or = or > */
 610:    if (WHITE(c)) break;  /* Gap after attribute */
 611:    if (c=='>') {      /* End of tag */
 612:      if (context->current_tag->name) start_element(context);
1.18 ! timbl 613:      context->state = S_after_open;
1.1 timbl 614:      break;
 615:    } else if (c=='=') {
 616:      context->state = S_equals;
 617:      break;
 618:    }
 619:    HTChunkPutc(string, c);
 620:    context->state = S_attr;        /* Get next attribute */
 621:    break;
 622:    
 623:   case S_equals:           /* After attr = */ 
 624:    if (WHITE(c)) break;  /* Before attribute value */
 625:    if (c=='>') {      /* End of tag */
1.5 timbl 626:      if (TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 627:      if (context->current_tag->name) start_element(context);
1.18 ! timbl 628:      context->state = S_after_open;
1.1 timbl 629:      break;
 630:      
 631:    } else if (c=='\'') {
 632:      context->state = S_squoted;
 633:      break;
 634: 
 635:    } else if (c=='"') {
 636:      context->state = S_dquoted;
 637:      break;
 638:    }
 639:    HTChunkPutc(string, c);
 640:    context->state = S_value;
 641:    break;
 642:    
 643:   case S_value:
 644:    if (WHITE(c) || (c=='>')) {       /* End of word */
 645:      HTChunkTerminate(string) ;
 646:      handle_attribute_value(context, string->data);
 647:      string->size = 0;
 648:      if (c=='>') {        /* End of tag */
 649:        if (context->current_tag->name) start_element(context);
1.18 ! timbl 650:        context->state = S_after_open;
1.1 timbl 651:        break;
 652:      }
 653:      else context->state = S_tag_gap;
 654:    } else {
 655:      HTChunkPutc(string, c);
 656:    }
 657:    break;
 658:        
 659:   case S_squoted:      /* Quoted attribute value */
 660:    if (c=='\'') {     /* End of attribute value */
 661:      HTChunkTerminate(string) ;
 662:      handle_attribute_value(context, string->data);
 663:      string->size = 0;
 664:      context->state = S_tag_gap;
 665:    } else {
 666:      HTChunkPutc(string, c);
 667:    }
 668:    break;
 669:    
 670:   case S_dquoted:      /* Quoted attribute value */
 671:    if (c=='"') {      /* End of attribute value */
 672:      HTChunkTerminate(string) ;
 673:      handle_attribute_value(context, string->data);
 674:      string->size = 0;
 675:      context->state = S_tag_gap;
 676:    } else {
 677:      HTChunkPutc(string, c);
 678:    }
 679:    break;
 680:    
 681:   case S_end:                    /* </ */
 682:    if (isalnum(c))
 683:      HTChunkPutc(string, c);
 684:    else {             /* End of end tag name */
1.7 timbl 685:      HTTag * t;
1.1 timbl 686:      HTChunkTerminate(string) ;
1.7 timbl 687:      if (!*string->data) {    /* Empty end tag */
 688:        t = context->element_stack->tag;
 689:      } else {
1.10 timbl 690:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 691:      }
1.7 timbl 692:      if (!t) {
1.1 timbl 693:        if(TRACE) fprintf(stderr,
 694:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 695:      } else {
1.7 timbl 696:        context->current_tag = t;
1.2 timbl 697:        end_element( context, context->current_tag);
1.1 timbl 698:      }
1.2 timbl 699: 
1.1 timbl 700:      string->size = 0;
1.2 timbl 701:      context->current_attribute_number = INVALID;
1.7 timbl 702:      if (c!='>') {
 703:        if (TRACE && !WHITE(c))
 704:          fprintf(stderr,"SGML: `</%s%c' found!\n",
 705:            string->data, c);
 706:        context->state = S_junk_tag;
 707:      } else {
 708:        context->state = S_text;
 709:      }
1.1 timbl 710:    }
 711:    break;
 712: 
 713:        
 714:   case S_junk_tag:
 715:    if (c=='>') {
 716:      context->state = S_text;
 717:    }
 718:    
 719:   } /* switch on context->state */
 720: 
 721: } /* SGML_character */
1.2 timbl 722: 
 723: 
 724: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 725: {
 726:   CONST char *p;
 727:   for(p=str; *p; p++)
 728:     SGML_character(context, *p);
 729: }
 730: 
 731: 
 732: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 733: {
 734:   CONST char *p;
 735:   CONST char *e = str+l;
 736:   for(p=str; p<e; p++)
 737:     SGML_character(context, *p);
 738: }
 739: 
 740: /*_______________________________________________________________________
 741: */
 742: 
 743: /*   Structured Object Class
 744: **   -----------------------
 745: */
 746: PUBLIC CONST HTStreamClass SGMLParser = 
 747: {       
 748:    "SGMLParser",
 749:    SGML_free,
1.8 timbl 750:    SGML_abort,
1.9 timbl 751:    SGML_character, 
 752:    SGML_string,
 753:    SGML_write,
1.2 timbl 754: }; 
 755: 
 756: /*   Create SGML Engine
 757: **   ------------------
 758: **
 759: ** On entry,
 760: **   dtd       represents the DTD, along with
 761: **   actions     is the sink for the data as a set of routines.
 762: **
 763: */
 764: 
 765: PUBLIC HTStream* SGML_new ARGS2(
 766:    CONST SGML_dtd *,    dtd,
 767:    HTStructured *,     target)
 768: {
 769:   int i;
 770:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 771:   if (!context) outofmem(__FILE__, "SGML_begin");
 772: 
 773:   context->isa = &SGMLParser;
 774:   context->string = HTChunkCreate(128);   /* Grow by this much */
 775:   context->dtd = dtd;
 776:   context->target = target;
 777:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 778:                    /* Ugh: no OO */
 779:   context->state = S_text;
 780:   context->element_stack = 0;            /* empty */
 781: #ifdef CALLERDATA        
 782:   context->callerData = (void*) callerData;
 783: #endif  
 784:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 785: 
 786:   return context;
 787: }
1.14 frystyk 788: 
 789: 
 790: 
 791: 
 792: 
 793: 
 794: 
 795: 
 796: 
 797: 
 798: 
1.2 timbl 799: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /