[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.23

1.23 ! frystyk 1: /*                                   SGML.c
 ! 2: **   GENERAL SGML PARSER CODE
 ! 3: **
 ! 4: **   (c) COPYRIGHT CERN 1994.
 ! 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This module implements an HTStream object. To parse an
1.1 timbl 8: **   SGML file, create this object which is a parser. The object
1.2 timbl 9: **   is (currently) created by being passed a DTD structure,
 10: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **   
1.19 duns 12: **   6 Feb 93    Binary seraches used. Intreface modified.
 13: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 14: */
 15: 
1.20 frystyk 16: /* System dependent stuff */
 17: #include "tcp.h"        /* For FROMASCII */
 18: 
 19: /* Library includes */
1.1 timbl 20: #include "HTUtils.h"
 21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23: 
1.2 timbl 24: #define INVALID (-1)
 25: 
1.1 timbl 26: /*   The State (context) of the parser
 27: **
1.2 timbl 28: **   This is passed with each call to make the parser reentrant
1.1 timbl 29: **
 30: */
 31: 
1.16 frystyk 32: 
1.2 timbl 33: 
 34:    
 35: /*       Element Stack
 36: **       -------------
 37: **   This allows us to return down the stack reselcting styles.
 38: **   As we return, attribute values will be garbage in general.
 39: */
 40: typedef struct _HTElement HTElement;
 41: struct _HTElement {
 42:    HTElement *   next;  /* Previously nested element or 0 */
 43:    HTTag*     tag;  /* The tag at this level */
 44: };
 45: 
 46: 
1.21 frystyk 47: typedef enum _sgml_state {
 48:   S_text, S_literal, S_tag, S_tag_gap, 
 49:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 50:   S_nl, S_nl_tago,
 51:   S_ero, S_cro,
 52: #ifdef ISO_2022_JP
 53:   S_esc, S_dollar, S_paren, S_nonascii_text,
 54: #endif
 55:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 56: } sgml_state;
 57: 
 58: 
1.2 timbl 59: /*   Internal Context Data Structure
 60: **   -------------------------------
 61: */
 62: struct _HTStream {
 63: 
 64:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 65:   
 66:   CONST SGML_dtd       *dtd;
 67:   HTStructuredClass *actions;    /* target class */
 68:   HTStructured    *target;    /* target object */
 69: 
1.1 timbl 70:   HTTag       *current_tag;
1.2 timbl 71:   int        current_attribute_number;
1.1 timbl 72:   HTChunk      *string;
 73:   HTElement     *element_stack;
1.21 frystyk 74:   sgml_state     state;
1.2 timbl 75: #ifdef CALLERDATA        
1.1 timbl 76:   void *       callerData;
1.2 timbl 77: #endif
 78:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 79:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 80: } ;
 81: 
 82: 
 83: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 84: 
1.1 timbl 85: 
1.17 timbl 86: /*   Find Attribute Number
 87: **   ---------------------
 88: */
 89: 
 90: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
 91: {
 92:   attr* attributes = tag->attributes;
 93: 
 94:   int high, low, i, diff;      /* Binary search for attribute name */
 95:   for(low=0, high=tag->number_of_attributes;
 96:        high > low ;
 97:        diff < 0 ? (low = i+1) : (high = i) ) {
 98:    i = (low + (high-low)/2);
 99:    diff = strcasecomp(attributes[i].name, s);
 100:    if (diff==0) return i;         /* success: found it */
 101:   } /* for */
 102:   
 103:   return -1;
 104: }
 105: 
1.1 timbl 106: 
 107: /*   Handle Attribute
 108: **   ----------------
 109: */
 110: /* PUBLIC CONST char * SGML_default = "";  ?? */
 111: 
1.21 frystyk 112: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 113: {
1.2 timbl 114: 
 115:   HTTag * tag = context->current_tag;
 116: 
1.17 timbl 117:   int i = SGMLFindAttribute(tag, s);
 118:   if (i>=0) {
 119:    context->current_attribute_number = i;
 120:    context->present[i] = YES;
 121:    if (context->value[i]) {
 122:      free(context->value[i]);
 123:      context->value[i] = NULL;
 124:    }
 125:    return;
 126:   } /* if */
1.2 timbl 127:    
1.20 frystyk 128:   if (SGML_TRACE)
1.2 timbl 129:    fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
 130:      s, context->current_tag->name);
 131:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 132: }
 133: 
 134: 
 135: /*   Handle attribute value
 136: **   ----------------------
 137: */
1.21 frystyk 138: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 139: {
1.2 timbl 140:   if (context->current_attribute_number != INVALID) {
 141:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 142:   } else {
1.20 frystyk 143:     if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 144:   }
1.2 timbl 145:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 146: }
 147: 
1.2 timbl 148: 
1.1 timbl 149: /*   Handle entity
 150: **   -------------
 151: **
 152: ** On entry,
 153: **   s    contains the entity name zero terminated
 154: ** Bugs:
 155: **   If the entity name is unknown, the terminator is treated as
 156: **   a printable non-special character in all cases, even if it is '<'
 157: */
1.21 frystyk 158: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 159: {
1.2 timbl 160: 
1.3 timbl 161:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 162:   CONST char *s = context->string->data;
1.2 timbl 163:   
 164:   int high, low, i, diff;
 165:   for(low=0, high = context->dtd->number_of_entities;
 166:        high > low ;
 167:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 168:    i = (low + (high-low)/2);
 169:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 170:    if (diff==0) {         /* success: found it */
 171:      (*context->actions->put_entity)(context->target, i);
 172:      return;
1.1 timbl 173:    }
 174:   }
 175:   /* If entity string not found, display as text */
1.20 frystyk 176:   if (SGML_TRACE)
1.1 timbl 177:    fprintf(stderr, "SGML: Unknown entity %s\n", s); 
1.2 timbl 178:   PUTC('&');
1.1 timbl 179:   {
 180:    CONST char *p;
 181:    for (p=s; *p; p++) {
1.2 timbl 182:      PUTC(*p);
1.1 timbl 183:    }
 184:   }
1.2 timbl 185:   PUTC(term);
1.1 timbl 186: }
 187: 
1.2 timbl 188: 
1.1 timbl 189: /*   End element
1.2 timbl 190: **   -----------
1.1 timbl 191: */
1.21 frystyk 192: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 193: {
1.20 frystyk 194:   if (SGML_TRACE) fprintf(stderr, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 195:   if (old_tag->contents == SGML_EMPTY) {
1.20 frystyk 196:     if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 197:        old_tag->name);
 198:    return;
 199:   }
 200:   while (context->element_stack)   {/* Loop is error path only */
 201:    HTElement * N = context->element_stack;
 202:    HTTag * t = N->tag;
 203:    
 204:    if (old_tag != t) {       /* Mismatch: syntax error */
 205:      if (context->element_stack->next) { /* This is not the last level */
1.20 frystyk 206:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 207:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 208:          old_tag->name, t->name, t->name);
 209:      } else {          /* last level */
1.20 frystyk 210:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 211:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 212:          old_tag->name, t->name, old_tag->name);
 213:        return;         /* Ignore */
 214:      }
 215:    }
 216:    
 217:    context->element_stack = N->next;        /* Remove from stack */
 218:    free(N);
1.2 timbl 219:    (*context->actions->end_element)(context->target,
 220:         t - context->dtd->tags);
1.1 timbl 221:    if (old_tag == t) return; /* Correct sequence */
 222:    
 223:    /* Syntax error path only */
 224:    
 225:   }
1.20 frystyk 226:   if (SGML_TRACE) fprintf(stderr,
1.1 timbl 227:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 228: }
 229: 
 230: 
1.17 timbl 231: /*   Start an element
 232: **   ----------------
1.1 timbl 233: */
1.21 frystyk 234: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 235: {
 236:   HTTag * new_tag = context->current_tag;
 237:   
1.20 frystyk 238:   if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 239:   (*context->actions->start_element)(
 240:    context->target,
 241:    new_tag - context->dtd->tags,
 242:    context->present,
1.3 timbl 243:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 244:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 245:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 246:     if (N == NULL) outofmem(__FILE__, "start_element");
 247:    N->next = context->element_stack;
 248:    N->tag = new_tag;
 249:    context->element_stack = N;
 250:   }
 251: }
 252: 
 253: 
1.2 timbl 254: /*       Find Tag in DTD tag list
 255: **       ------------------------
1.1 timbl 256: **
 257: ** On entry,
1.2 timbl 258: **   dtd   points to dtd structire including valid tag list
 259: **   string points to name of tag in question
1.1 timbl 260: **
1.2 timbl 261: ** On exit,
 262: **   returns:
1.7 timbl 263: **       NULL      tag not found
 264: **       else      address of tag structure in dtd
1.2 timbl 265: */
1.11 timbl 266: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 267: {
 268:   int high, low, i, diff;
 269:   for(low=0, high=dtd->number_of_tags;
 270:        high > low ;
 271:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 272:    i = (low + (high-low)/2);
1.3 timbl 273:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 274:    if (diff==0) {         /* success: found it */
1.7 timbl 275:      return &dtd->tags[i];
1.2 timbl 276:    }
 277:   }
1.7 timbl 278:   return NULL;
1.2 timbl 279: }
 280: 
 281: /*________________________________________________________________________
 282: **           Public Methods
1.1 timbl 283: */
 284: 
1.2 timbl 285: 
 286: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 287: 
1.22 frystyk 288: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 289: {
1.14 frystyk 290:   int cnt;
 291: 
1.15 frystyk 292:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 293:    HTElement *ptr = context->element_stack;
 294: 
1.20 frystyk 295:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 296:             context->element_stack->tag->name);
 297:    context->element_stack = ptr->next;
 298:    free(ptr);
 299:   }
1.19 duns 300:   (*context->actions->_free)(context->target);
1.8 timbl 301:   HTChunkFree(context->string);
1.15 frystyk 302:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 303:    if(context->value[cnt])
 304:      free(context->value[cnt]);
1.8 timbl 305:   free(context);
1.22 frystyk 306:   return 0;
1.1 timbl 307: }
 308: 
1.22 frystyk 309: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 310: {
1.14 frystyk 311:   int cnt;
 312: 
1.15 frystyk 313:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 314:    HTElement *ptr = context->element_stack;
 315: 
1.20 frystyk 316:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 317:             context->element_stack->tag->name);
 318:    context->element_stack = ptr->next;
 319:    free(ptr);
 320:   }
1.8 timbl 321:   (*context->actions->abort)(context->target, e);
1.1 timbl 322:   HTChunkFree(context->string);
1.14 frystyk 323:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 324:    if(context->value[cnt])
 325:      free(context->value[cnt]);
1.1 timbl 326:   free(context);
1.22 frystyk 327:   return EOF;
1.1 timbl 328: }
 329: 
1.2 timbl 330: 
1.1 timbl 331: /*   Read and write user callback handle
 332: **   -----------------------------------
 333: **
 334: **  The callbacks from the SGML parser have an SGML context parameter.
 335: **  These calls allow the caller to associate his own context with a
 336: **  particular SGML context.
 337: */
 338: 
1.2 timbl 339: #ifdef CALLERDATA        
 340: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 341: {
 342:   return context->callerData;
 343: }
 344: 
1.2 timbl 345: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 346: {
 347:   context->callerData = data;
 348: }
1.2 timbl 349: #endif
1.1 timbl 350: 
1.2 timbl 351: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 352: 
 353: {
1.2 timbl 354:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 355:   HTChunk  *string =    context->string;
 356: 
 357:   switch(context->state) {
1.18 timbl 358:   
 359:   case S_after_open: /* Strip one trainling newline
 360:            only after opening nonempty element. - SGML:Ugh! */
 361:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 362:      break;
 363:    }
 364:    context->state = S_text;
 365:    goto normal_text;
 366:    /* (***falls through***) */
 367:    
1.1 timbl 368:   case S_text:
1.18 timbl 369: normal_text:
 370: 
1.13 timbl 371: #ifdef ISO_2022_JP
 372:    if (c=='033円') {
 373:      context->state = S_esc;
 374:      PUTC(c);
 375:      break;
 376:    }
 377: #endif /* ISO_2022_JP */
1.6 timbl 378:    if (c=='&' && (!context->element_stack || (
 379:             context->element_stack->tag &&
 380:             ( context->element_stack->tag->contents == SGML_MIXED
 381:              || context->element_stack->tag->contents ==
 382:                             SGML_RCDATA)
 383:            ))) {
1.1 timbl 384:      string->size = 0;
 385:      context->state = S_ero;
 386:      
 387:    } else if (c=='<') {
 388:      string->size = 0;
 389:      context->state = (context->element_stack &&
1.13 timbl 390:        context->element_stack->tag &&
 391:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 392:                S_literal : S_tag;
1.18 timbl 393:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 394:      context->state = S_nl;
1.2 timbl 395:    } else PUTC(c);
1.1 timbl 396:    break;
1.13 timbl 397: 
1.18 timbl 398:   case S_nl:
 399:     if (c=='<') {
 400:      string->size = 0;
 401:      context->state = (context->element_stack &&
 402:        context->element_stack->tag &&
 403:        context->element_stack->tag->contents == SGML_LITERAL) ?
 404:                S_literal : S_nl_tago;
 405:    } else {
 406:      PUTC('\n');
 407:      context->state = S_text;
 408:      goto normal_text;
 409:    }
 410:    break;
 411: 
 412:   case S_nl_tago:      /* Had newline and tag opener */
 413:     if (c != '/') {
 414:      PUTC('\n');     /* Only ignore newline before </ */
 415:    }
 416:    context->state = S_tag;
 417:    goto handle_S_tag;
 418: 
1.13 timbl 419: #ifdef ISO_2022_JP
 420:   case S_esc:
 421:    if (c=='$') {
 422:      context->state = S_dollar;
 423:    } else if (c=='(') {
 424:      context->state = S_paren;
 425:    } else {
 426:      context->state = S_text;
 427:    }
 428:    PUTC(c);
 429:    break;
 430:   case S_dollar:
 431:    if (c=='@' || c=='B') {
 432:      context->state = S_nonascii_text;
 433:    } else {
 434:      context->state = S_text;
 435:    }
 436:    PUTC(c);
 437:    break;
 438:   case S_paren:
 439:    if (c=='B' || c=='J') {
 440:      context->state = S_text;
 441:    } else {
 442:      context->state = S_text;
 443:    }
 444:    PUTC(c);
 445:    break;
 446:   case S_nonascii_text:
 447:    if (c=='033円') {
 448:      context->state = S_esc;
 449:      PUTC(c);
 450:    } else {
 451:      PUTC(c);
 452:    }
 453:    break;
 454: #endif /* ISO_2022_JP */
1.1 timbl 455: 
1.12 timbl 456: /*   In literal mode, waits only for specific end tag!
1.2 timbl 457: **   Only foir compatibility with old servers.
1.1 timbl 458: */
1.12 timbl 459:   case S_literal :
1.1 timbl 460:    HTChunkPutc(string, c);
 461:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 462:        : context->element_stack->tag->name[string->size-2])) {
 463:      int i;
 464:      
1.12 timbl 465:      /* If complete match, end literal */
1.1 timbl 466:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 467:        end_element(context, context->element_stack->tag);
 468:        string->size = 0;
1.2 timbl 469:        context->current_attribute_number = INVALID;
1.1 timbl 470:        context->state = S_text;
 471:        break;
 472:      }      /* If Mismatch: recover string. */
1.2 timbl 473:      PUTC( '<');
1.1 timbl 474:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 475:        PUTC(
1.1 timbl 476:                       string->data[i]);
 477:      context->state = S_text;  
 478:    }
 479:    
 480:     break;
 481: 
 482: /*   Character reference or Entity
 483: */
 484:  case S_ero:
 485:    if (c=='#') {
 486:      context->state = S_cro; /*  &# is Char Ref Open */ 
 487:      break;
 488:    }
 489:    context->state = S_entity;  /* Fall through! */
 490:    
 491: /*   Handle Entities
 492: */
 493:   case S_entity:
 494:    if (isalnum(c))
 495:      HTChunkPutc(string, c);
 496:    else {
 497:      HTChunkTerminate(string);
 498:      handle_entity(context, c);
 499:      context->state = S_text;
 500:    }
 501:    break;
 502: 
 503: /*   Character reference
 504: */
 505:   case S_cro:
 506:    if (isalnum(c))
 507:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 508:    else {
 509:      int value;
 510:      HTChunkTerminate(string);
 511:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 512:        PUTC(FROMASCII((char)value));
1.1 timbl 513:      context->state = S_text;
 514:    }
 515:    break;
 516: 
 517: /*       Tag
 518: */     
 519:   case S_tag:                /* new tag */
1.18 timbl 520: handle_S_tag:
 521: 
1.1 timbl 522:    if (isalnum(c))
 523:      HTChunkPutc(string, c);
 524:    else {             /* End of tag name */
1.7 timbl 525:      HTTag * t;
1.1 timbl 526:      if (c=='/') {
1.20 frystyk 527:        if (SGML_TRACE) if (string->size!=0)
1.1 timbl 528:          fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
 529:        context->state = S_end;
 530:        break;
 531:      }
 532:      HTChunkTerminate(string) ;
1.2 timbl 533: 
1.10 timbl 534:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 535:      if (!t) {
1.20 frystyk 536:        if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 537:            string->data);
 538:        context->state = (c=='>') ? S_text : S_junk_tag;
 539:        break;
 540:      }
1.7 timbl 541:      context->current_tag = t;
1.2 timbl 542:      
 543:      /* Clear out attributes
 544:      */
1.1 timbl 545:      
1.2 timbl 546:      {
 547:        int i;
 548:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 549:          context->present[i] = NO;
1.1 timbl 550:      }
 551:      string->size = 0;
1.2 timbl 552:      context->current_attribute_number = INVALID;
1.1 timbl 553:      
 554:      if (c=='>') {
 555:        if (context->current_tag->name) start_element(context);
1.18 timbl 556:        context->state = S_after_open;
1.1 timbl 557:      } else {
 558:        context->state = S_tag_gap;
 559:      }
 560:    }
 561:    break;
 562: 
 563:        
 564:   case S_tag_gap:      /* Expecting attribute or > */
 565:    if (WHITE(c)) break;  /* Gap between attributes */
 566:    if (c=='>') {      /* End of tag */
 567:      if (context->current_tag->name) start_element(context);
1.18 timbl 568:      context->state = S_after_open;
1.1 timbl 569:      break;
 570:    }
 571:    HTChunkPutc(string, c);
 572:    context->state = S_attr;        /* Get attribute */
 573:    break;
 574:    
 575:                /* accumulating value */
 576:   case S_attr:
 577:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 578:      HTChunkTerminate(string) ;
 579:      handle_attribute_name(context, string->data);
 580:      string->size = 0;
 581:      if (c=='>') {        /* End of tag */
 582:        if (context->current_tag->name) start_element(context);
1.18 timbl 583:        context->state = S_after_open;
1.1 timbl 584:        break;
 585:      }
 586:      context->state = (c=='=' ? S_equals: S_attr_gap);
 587:    } else {
 588:      HTChunkPutc(string, c);
 589:    }
 590:    break;
 591:        
 592:   case S_attr_gap:      /* Expecting attribute or = or > */
 593:    if (WHITE(c)) break;  /* Gap after attribute */
 594:    if (c=='>') {      /* End of tag */
 595:      if (context->current_tag->name) start_element(context);
1.18 timbl 596:      context->state = S_after_open;
1.1 timbl 597:      break;
 598:    } else if (c=='=') {
 599:      context->state = S_equals;
 600:      break;
 601:    }
 602:    HTChunkPutc(string, c);
 603:    context->state = S_attr;        /* Get next attribute */
 604:    break;
 605:    
 606:   case S_equals:           /* After attr = */ 
 607:    if (WHITE(c)) break;  /* Before attribute value */
 608:    if (c=='>') {      /* End of tag */
1.20 frystyk 609:      if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 610:      if (context->current_tag->name) start_element(context);
1.18 timbl 611:      context->state = S_after_open;
1.1 timbl 612:      break;
 613:      
 614:    } else if (c=='\'') {
 615:      context->state = S_squoted;
 616:      break;
 617: 
 618:    } else if (c=='"') {
 619:      context->state = S_dquoted;
 620:      break;
 621:    }
 622:    HTChunkPutc(string, c);
 623:    context->state = S_value;
 624:    break;
 625:    
 626:   case S_value:
 627:    if (WHITE(c) || (c=='>')) {       /* End of word */
 628:      HTChunkTerminate(string) ;
 629:      handle_attribute_value(context, string->data);
 630:      string->size = 0;
 631:      if (c=='>') {        /* End of tag */
 632:        if (context->current_tag->name) start_element(context);
1.18 timbl 633:        context->state = S_after_open;
1.1 timbl 634:        break;
 635:      }
 636:      else context->state = S_tag_gap;
 637:    } else {
 638:      HTChunkPutc(string, c);
 639:    }
 640:    break;
 641:        
 642:   case S_squoted:      /* Quoted attribute value */
 643:    if (c=='\'') {     /* End of attribute value */
 644:      HTChunkTerminate(string) ;
 645:      handle_attribute_value(context, string->data);
 646:      string->size = 0;
 647:      context->state = S_tag_gap;
 648:    } else {
 649:      HTChunkPutc(string, c);
 650:    }
 651:    break;
 652:    
 653:   case S_dquoted:      /* Quoted attribute value */
 654:    if (c=='"') {      /* End of attribute value */
 655:      HTChunkTerminate(string) ;
 656:      handle_attribute_value(context, string->data);
 657:      string->size = 0;
 658:      context->state = S_tag_gap;
 659:    } else {
 660:      HTChunkPutc(string, c);
 661:    }
 662:    break;
 663:    
 664:   case S_end:                    /* </ */
 665:    if (isalnum(c))
 666:      HTChunkPutc(string, c);
 667:    else {             /* End of end tag name */
1.7 timbl 668:      HTTag * t;
1.1 timbl 669:      HTChunkTerminate(string) ;
1.7 timbl 670:      if (!*string->data) {    /* Empty end tag */
 671:        t = context->element_stack->tag;
 672:      } else {
1.10 timbl 673:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 674:      }
1.7 timbl 675:      if (!t) {
1.20 frystyk 676:        if(SGML_TRACE) fprintf(stderr,
1.1 timbl 677:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 678:      } else {
1.7 timbl 679:        context->current_tag = t;
1.2 timbl 680:        end_element( context, context->current_tag);
1.1 timbl 681:      }
1.2 timbl 682: 
1.1 timbl 683:      string->size = 0;
1.2 timbl 684:      context->current_attribute_number = INVALID;
1.7 timbl 685:      if (c!='>') {
1.20 frystyk 686:        if (SGML_TRACE && !WHITE(c))
1.7 timbl 687:          fprintf(stderr,"SGML: `</%s%c' found!\n",
 688:            string->data, c);
 689:        context->state = S_junk_tag;
 690:      } else {
 691:        context->state = S_text;
 692:      }
1.1 timbl 693:    }
 694:    break;
 695: 
 696:        
 697:   case S_junk_tag:
 698:    if (c=='>') {
 699:      context->state = S_text;
 700:    }
 701:    
 702:   } /* switch on context->state */
 703: 
 704: } /* SGML_character */
1.2 timbl 705: 
 706: 
 707: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 708: {
 709:   CONST char *p;
 710:   for(p=str; *p; p++)
 711:     SGML_character(context, *p);
 712: }
 713: 
 714: 
 715: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 716: {
 717:   CONST char *p;
 718:   CONST char *e = str+l;
 719:   for(p=str; p<e; p++)
 720:     SGML_character(context, *p);
 721: }
 722: 
 723: /*_______________________________________________________________________
 724: */
 725: 
 726: /*   Structured Object Class
 727: **   -----------------------
 728: */
 729: PUBLIC CONST HTStreamClass SGMLParser = 
 730: {       
 731:    "SGMLParser",
 732:    SGML_free,
1.8 timbl 733:    SGML_abort,
1.9 timbl 734:    SGML_character, 
 735:    SGML_string,
 736:    SGML_write,
1.2 timbl 737: }; 
 738: 
 739: /*   Create SGML Engine
 740: **   ------------------
 741: **
 742: ** On entry,
 743: **   dtd       represents the DTD, along with
 744: **   actions     is the sink for the data as a set of routines.
 745: **
 746: */
 747: 
 748: PUBLIC HTStream* SGML_new ARGS2(
 749:    CONST SGML_dtd *,    dtd,
 750:    HTStructured *,     target)
 751: {
 752:   int i;
 753:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 754:   if (!context) outofmem(__FILE__, "SGML_begin");
 755: 
 756:   context->isa = &SGMLParser;
 757:   context->string = HTChunkCreate(128);   /* Grow by this much */
 758:   context->dtd = dtd;
 759:   context->target = target;
 760:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 761:                    /* Ugh: no OO */
 762:   context->state = S_text;
 763:   context->element_stack = 0;            /* empty */
 764: #ifdef CALLERDATA        
 765:   context->callerData = (void*) callerData;
 766: #endif  
 767:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 768: 
 769:   return context;
 770: }
1.14 frystyk 771: 
 772: 
 773: 
 774: 
 775: 
 776: 
 777: 
 778: 
 779: 
 780: 
 781: 
1.2 timbl 782: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /