[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.20

1.1 timbl 1: /*           General SGML Parser code        SGML.c
 2: **           ========================
 3: **
1.2 timbl 4: **   This module implements an HTStream object. To parse an
1.1 timbl 5: **   SGML file, create this object which is a parser. The object
1.2 timbl 6: **   is (currently) created by being passed a DTD structure,
 7: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 8: **   
1.19 duns 9: **   6 Feb 93    Binary seraches used. Intreface modified.
 10: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 11: */
 12: 
1.20 ! frystyk 13: /* System dependent stuff */
 ! 14: #include "tcp.h"        /* For FROMASCII */
 ! 15: 
 ! 16: /* Library includes */
1.1 timbl 17: #include "HTUtils.h"
 18: #include "HTChunk.h"
1.20 ! frystyk 19: #include "SGML.h"
1.1 timbl 20: 
1.2 timbl 21: #define INVALID (-1)
 22: 
1.1 timbl 23: /*   The State (context) of the parser
 24: **
1.2 timbl 25: **   This is passed with each call to make the parser reentrant
1.1 timbl 26: **
 27: */
 28: 
1.16 frystyk 29: 
1.2 timbl 30: 
 31:    
 32: /*       Element Stack
 33: **       -------------
 34: **   This allows us to return down the stack reselcting styles.
 35: **   As we return, attribute values will be garbage in general.
 36: */
 37: typedef struct _HTElement HTElement;
 38: struct _HTElement {
 39:    HTElement *   next;  /* Previously nested element or 0 */
 40:    HTTag*     tag;  /* The tag at this level */
 41: };
 42: 
 43: 
 44: /*   Internal Context Data Structure
 45: **   -------------------------------
 46: */
 47: struct _HTStream {
 48: 
 49:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 50:   
 51:   CONST SGML_dtd       *dtd;
 52:   HTStructuredClass *actions;    /* target class */
 53:   HTStructured    *target;    /* target object */
 54: 
1.1 timbl 55:   HTTag       *current_tag;
1.2 timbl 56:   int        current_attribute_number;
1.1 timbl 57:   HTChunk      *string;
 58:   HTElement     *element_stack;
1.12 timbl 59:   enum sgml_state { S_text, S_literal, S_tag, S_tag_gap, 
1.18 timbl 60:        S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 61:        S_nl, S_nl_tago,
1.1 timbl 62:        S_ero, S_cro,
1.13 timbl 63: #ifdef ISO_2022_JP
 64:        S_esc, S_dollar, S_paren, S_nonascii_text,
 65: #endif
1.1 timbl 66:         S_squoted, S_dquoted, S_end, S_entity, S_junk_tag} state;
1.2 timbl 67: #ifdef CALLERDATA        
1.1 timbl 68:   void *       callerData;
1.2 timbl 69: #endif
 70:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 71:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 72: } ;
 73: 
 74: 
 75: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 76: 
1.1 timbl 77: 
1.17 timbl 78: /*   Find Attribute Number
 79: **   ---------------------
 80: */
 81: 
 82: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
 83: {
 84:   attr* attributes = tag->attributes;
 85: 
 86:   int high, low, i, diff;      /* Binary search for attribute name */
 87:   for(low=0, high=tag->number_of_attributes;
 88:        high > low ;
 89:        diff < 0 ? (low = i+1) : (high = i) ) {
 90:    i = (low + (high-low)/2);
 91:    diff = strcasecomp(attributes[i].name, s);
 92:    if (diff==0) return i;         /* success: found it */
 93:   } /* for */
 94:   
 95:   return -1;
 96: }
 97: 
1.1 timbl 98: 
 99: /*   Handle Attribute
 100: **   ----------------
 101: */
 102: /* PUBLIC CONST char * SGML_default = "";  ?? */
 103: 
 104: #ifdef __STDC__
1.17 timbl 105: PRIVATE void handle_attribute_name(HTStream * context, CONST char * s)
1.1 timbl 106: #else
 107: PRIVATE void handle_attribute_name(context, s)
1.2 timbl 108:   HTStream * context;
1.1 timbl 109:   char *s;
 110: #endif
 111: {
1.2 timbl 112: 
 113:   HTTag * tag = context->current_tag;
 114: 
1.17 timbl 115:   int i = SGMLFindAttribute(tag, s);
 116:   if (i>=0) {
 117:    context->current_attribute_number = i;
 118:    context->present[i] = YES;
 119:    if (context->value[i]) {
 120:      free(context->value[i]);
 121:      context->value[i] = NULL;
 122:    }
 123:    return;
 124:   } /* if */
1.2 timbl 125:    
1.20 ! frystyk 126:   if (SGML_TRACE)
1.2 timbl 127:    fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
 128:      s, context->current_tag->name);
 129:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 130: }
 131: 
 132: 
 133: /*   Handle attribute value
 134: **   ----------------------
 135: */
 136: #ifdef __STDC__
1.2 timbl 137: PRIVATE void handle_attribute_value(HTStream * context, const char * s)
1.1 timbl 138: #else
 139: PRIVATE void handle_attribute_value(context, s)
1.2 timbl 140:   HTStream * context;
1.1 timbl 141:   char *s;
 142: #endif
 143: {
1.2 timbl 144:   if (context->current_attribute_number != INVALID) {
 145:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 146:   } else {
1.20 ! frystyk 147:     if (SGML_TRACE) fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 148:   }
1.2 timbl 149:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 150: }
 151: 
1.2 timbl 152: 
1.1 timbl 153: /*   Handle entity
 154: **   -------------
 155: **
 156: ** On entry,
 157: **   s    contains the entity name zero terminated
 158: ** Bugs:
 159: **   If the entity name is unknown, the terminator is treated as
 160: **   a printable non-special character in all cases, even if it is '<'
 161: */
 162: #ifdef __STDC__
1.2 timbl 163: PRIVATE void handle_entity(HTStream * context, char term)
1.1 timbl 164: #else
 165: PRIVATE void handle_entity(context, term)
1.2 timbl 166:   HTStream * context;
1.1 timbl 167:   char term;
 168: #endif
 169: {
1.2 timbl 170: 
1.3 timbl 171:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 172:   CONST char *s = context->string->data;
1.2 timbl 173:   
 174:   int high, low, i, diff;
 175:   for(low=0, high = context->dtd->number_of_entities;
 176:        high > low ;
 177:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 178:    i = (low + (high-low)/2);
 179:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 180:    if (diff==0) {         /* success: found it */
 181:      (*context->actions->put_entity)(context->target, i);
 182:      return;
1.1 timbl 183:    }
 184:   }
 185:   /* If entity string not found, display as text */
1.20 ! frystyk 186:   if (SGML_TRACE)
1.1 timbl 187:    fprintf(stderr, "SGML: Unknown entity %s\n", s); 
1.2 timbl 188:   PUTC('&');
1.1 timbl 189:   {
 190:    CONST char *p;
 191:    for (p=s; *p; p++) {
1.2 timbl 192:      PUTC(*p);
1.1 timbl 193:    }
 194:   }
1.2 timbl 195:   PUTC(term);
1.1 timbl 196: }
 197: 
1.2 timbl 198: 
1.1 timbl 199: /*   End element
1.2 timbl 200: **   -----------
1.1 timbl 201: */
 202: #ifdef __STDC__
1.2 timbl 203: PRIVATE void end_element(HTStream * context, HTTag * old_tag)
1.1 timbl 204: #else
 205: PRIVATE void end_element(context, old_tag)
 206:   HTTag * old_tag;
1.2 timbl 207:   HTStream * context;
1.1 timbl 208: #endif
 209: {
1.20 ! frystyk 210:   if (SGML_TRACE) fprintf(stderr, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 211:   if (old_tag->contents == SGML_EMPTY) {
1.20 ! frystyk 212:     if (SGML_TRACE) fprintf(stderr,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 213:        old_tag->name);
 214:    return;
 215:   }
 216:   while (context->element_stack)   {/* Loop is error path only */
 217:    HTElement * N = context->element_stack;
 218:    HTTag * t = N->tag;
 219:    
 220:    if (old_tag != t) {       /* Mismatch: syntax error */
 221:      if (context->element_stack->next) { /* This is not the last level */
1.20 ! frystyk 222:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 223:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 224:          old_tag->name, t->name, t->name);
 225:      } else {          /* last level */
1.20 ! frystyk 226:        if (SGML_TRACE) fprintf(stderr,
1.1 timbl 227:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 228:          old_tag->name, t->name, old_tag->name);
 229:        return;         /* Ignore */
 230:      }
 231:    }
 232:    
 233:    context->element_stack = N->next;        /* Remove from stack */
 234:    free(N);
1.2 timbl 235:    (*context->actions->end_element)(context->target,
 236:         t - context->dtd->tags);
1.1 timbl 237:    if (old_tag == t) return; /* Correct sequence */
 238:    
 239:    /* Syntax error path only */
 240:    
 241:   }
1.20 ! frystyk 242:   if (SGML_TRACE) fprintf(stderr,
1.1 timbl 243:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 244: }
 245: 
 246: 
1.17 timbl 247: /*   Start an element
 248: **   ----------------
1.1 timbl 249: */
 250: #ifdef __STDC__
1.2 timbl 251: PRIVATE void start_element(HTStream * context)
1.1 timbl 252: #else
 253: PRIVATE void start_element(context)
1.2 timbl 254:   HTStream * context;
1.1 timbl 255: #endif
 256: {
 257:   HTTag * new_tag = context->current_tag;
 258:   
1.20 ! frystyk 259:   if (SGML_TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 260:   (*context->actions->start_element)(
 261:    context->target,
 262:    new_tag - context->dtd->tags,
 263:    context->present,
1.3 timbl 264:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 265:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 266:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 267:     if (N == NULL) outofmem(__FILE__, "start_element");
 268:    N->next = context->element_stack;
 269:    N->tag = new_tag;
 270:    context->element_stack = N;
 271:   }
 272: }
 273: 
 274: 
1.2 timbl 275: /*       Find Tag in DTD tag list
 276: **       ------------------------
1.1 timbl 277: **
 278: ** On entry,
1.2 timbl 279: **   dtd   points to dtd structire including valid tag list
 280: **   string points to name of tag in question
1.1 timbl 281: **
1.2 timbl 282: ** On exit,
 283: **   returns:
1.7 timbl 284: **       NULL      tag not found
 285: **       else      address of tag structure in dtd
1.2 timbl 286: */
1.11 timbl 287: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 288: {
 289:   int high, low, i, diff;
 290:   for(low=0, high=dtd->number_of_tags;
 291:        high > low ;
 292:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 293:    i = (low + (high-low)/2);
1.3 timbl 294:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 295:    if (diff==0) {         /* success: found it */
1.7 timbl 296:      return &dtd->tags[i];
1.2 timbl 297:    }
 298:   }
1.7 timbl 299:   return NULL;
1.2 timbl 300: }
 301: 
 302: /*________________________________________________________________________
 303: **           Public Methods
1.1 timbl 304: */
 305: 
1.2 timbl 306: 
 307: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 308: 
1.8 timbl 309: PUBLIC void SGML_free ARGS1(HTStream *, context)
 310: {
1.14 frystyk 311:   int cnt;
 312: 
1.15 frystyk 313:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 314:    HTElement *ptr = context->element_stack;
 315: 
1.20 ! frystyk 316:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 317:             context->element_stack->tag->name);
 318:    context->element_stack = ptr->next;
 319:    free(ptr);
 320:   }
1.19 duns 321:   (*context->actions->_free)(context->target);
1.8 timbl 322:   HTChunkFree(context->string);
1.15 frystyk 323:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 324:    if(context->value[cnt])
 325:      free(context->value[cnt]);
1.8 timbl 326:   free(context);
1.1 timbl 327: }
 328: 
1.8 timbl 329: PUBLIC void SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 330: {
1.14 frystyk 331:   int cnt;
 332: 
1.15 frystyk 333:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 334:    HTElement *ptr = context->element_stack;
 335: 
1.20 ! frystyk 336:    if(SGML_TRACE) fprintf(stderr, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 337:             context->element_stack->tag->name);
 338:    context->element_stack = ptr->next;
 339:    free(ptr);
 340:   }
1.8 timbl 341:   (*context->actions->abort)(context->target, e);
1.1 timbl 342:   HTChunkFree(context->string);
1.14 frystyk 343:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 344:    if(context->value[cnt])
 345:      free(context->value[cnt]);
1.1 timbl 346:   free(context);
 347: }
 348: 
1.2 timbl 349: 
1.1 timbl 350: /*   Read and write user callback handle
 351: **   -----------------------------------
 352: **
 353: **  The callbacks from the SGML parser have an SGML context parameter.
 354: **  These calls allow the caller to associate his own context with a
 355: **  particular SGML context.
 356: */
 357: 
1.2 timbl 358: #ifdef CALLERDATA        
 359: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 360: {
 361:   return context->callerData;
 362: }
 363: 
1.2 timbl 364: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 365: {
 366:   context->callerData = data;
 367: }
1.2 timbl 368: #endif
1.1 timbl 369: 
1.2 timbl 370: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 371: 
 372: {
1.2 timbl 373:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 374:   HTChunk  *string =    context->string;
 375: 
 376:   switch(context->state) {
1.18 timbl 377:   
 378:   case S_after_open: /* Strip one trainling newline
 379:            only after opening nonempty element. - SGML:Ugh! */
 380:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 381:      break;
 382:    }
 383:    context->state = S_text;
 384:    goto normal_text;
 385:    /* (***falls through***) */
 386:    
1.1 timbl 387:   case S_text:
1.18 timbl 388: normal_text:
 389: 
1.13 timbl 390: #ifdef ISO_2022_JP
 391:    if (c=='033円') {
 392:      context->state = S_esc;
 393:      PUTC(c);
 394:      break;
 395:    }
 396: #endif /* ISO_2022_JP */
1.6 timbl 397:    if (c=='&' && (!context->element_stack || (
 398:             context->element_stack->tag &&
 399:             ( context->element_stack->tag->contents == SGML_MIXED
 400:              || context->element_stack->tag->contents ==
 401:                             SGML_RCDATA)
 402:            ))) {
1.1 timbl 403:      string->size = 0;
 404:      context->state = S_ero;
 405:      
 406:    } else if (c=='<') {
 407:      string->size = 0;
 408:      context->state = (context->element_stack &&
1.13 timbl 409:        context->element_stack->tag &&
 410:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 411:                S_literal : S_tag;
1.18 timbl 412:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 413:      context->state = S_nl;
1.2 timbl 414:    } else PUTC(c);
1.1 timbl 415:    break;
1.13 timbl 416: 
1.18 timbl 417:   case S_nl:
 418:     if (c=='<') {
 419:      string->size = 0;
 420:      context->state = (context->element_stack &&
 421:        context->element_stack->tag &&
 422:        context->element_stack->tag->contents == SGML_LITERAL) ?
 423:                S_literal : S_nl_tago;
 424:    } else {
 425:      PUTC('\n');
 426:      context->state = S_text;
 427:      goto normal_text;
 428:    }
 429:    break;
 430: 
 431:   case S_nl_tago:      /* Had newline and tag opener */
 432:     if (c != '/') {
 433:      PUTC('\n');     /* Only ignore newline before </ */
 434:    }
 435:    context->state = S_tag;
 436:    goto handle_S_tag;
 437: 
1.13 timbl 438: #ifdef ISO_2022_JP
 439:   case S_esc:
 440:    if (c=='$') {
 441:      context->state = S_dollar;
 442:    } else if (c=='(') {
 443:      context->state = S_paren;
 444:    } else {
 445:      context->state = S_text;
 446:    }
 447:    PUTC(c);
 448:    break;
 449:   case S_dollar:
 450:    if (c=='@' || c=='B') {
 451:      context->state = S_nonascii_text;
 452:    } else {
 453:      context->state = S_text;
 454:    }
 455:    PUTC(c);
 456:    break;
 457:   case S_paren:
 458:    if (c=='B' || c=='J') {
 459:      context->state = S_text;
 460:    } else {
 461:      context->state = S_text;
 462:    }
 463:    PUTC(c);
 464:    break;
 465:   case S_nonascii_text:
 466:    if (c=='033円') {
 467:      context->state = S_esc;
 468:      PUTC(c);
 469:    } else {
 470:      PUTC(c);
 471:    }
 472:    break;
 473: #endif /* ISO_2022_JP */
1.1 timbl 474: 
1.12 timbl 475: /*   In literal mode, waits only for specific end tag!
1.2 timbl 476: **   Only foir compatibility with old servers.
1.1 timbl 477: */
1.12 timbl 478:   case S_literal :
1.1 timbl 479:    HTChunkPutc(string, c);
 480:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 481:        : context->element_stack->tag->name[string->size-2])) {
 482:      int i;
 483:      
1.12 timbl 484:      /* If complete match, end literal */
1.1 timbl 485:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 486:        end_element(context, context->element_stack->tag);
 487:        string->size = 0;
1.2 timbl 488:        context->current_attribute_number = INVALID;
1.1 timbl 489:        context->state = S_text;
 490:        break;
 491:      }      /* If Mismatch: recover string. */
1.2 timbl 492:      PUTC( '<');
1.1 timbl 493:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 494:        PUTC(
1.1 timbl 495:                       string->data[i]);
 496:      context->state = S_text;  
 497:    }
 498:    
 499:     break;
 500: 
 501: /*   Character reference or Entity
 502: */
 503:  case S_ero:
 504:    if (c=='#') {
 505:      context->state = S_cro; /*  &# is Char Ref Open */ 
 506:      break;
 507:    }
 508:    context->state = S_entity;  /* Fall through! */
 509:    
 510: /*   Handle Entities
 511: */
 512:   case S_entity:
 513:    if (isalnum(c))
 514:      HTChunkPutc(string, c);
 515:    else {
 516:      HTChunkTerminate(string);
 517:      handle_entity(context, c);
 518:      context->state = S_text;
 519:    }
 520:    break;
 521: 
 522: /*   Character reference
 523: */
 524:   case S_cro:
 525:    if (isalnum(c))
 526:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 527:    else {
 528:      int value;
 529:      HTChunkTerminate(string);
 530:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 531:        PUTC(FROMASCII((char)value));
1.1 timbl 532:      context->state = S_text;
 533:    }
 534:    break;
 535: 
 536: /*       Tag
 537: */     
 538:   case S_tag:                /* new tag */
1.18 timbl 539: handle_S_tag:
 540: 
1.1 timbl 541:    if (isalnum(c))
 542:      HTChunkPutc(string, c);
 543:    else {             /* End of tag name */
1.7 timbl 544:      HTTag * t;
1.1 timbl 545:      if (c=='/') {
1.20 ! frystyk 546:        if (SGML_TRACE) if (string->size!=0)
1.1 timbl 547:          fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
 548:        context->state = S_end;
 549:        break;
 550:      }
 551:      HTChunkTerminate(string) ;
1.2 timbl 552: 
1.10 timbl 553:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 554:      if (!t) {
1.20 ! frystyk 555:        if(SGML_TRACE) fprintf(stderr, "SGML: *** Unknown element %s\n",
1.1 timbl 556:            string->data);
 557:        context->state = (c=='>') ? S_text : S_junk_tag;
 558:        break;
 559:      }
1.7 timbl 560:      context->current_tag = t;
1.2 timbl 561:      
 562:      /* Clear out attributes
 563:      */
1.1 timbl 564:      
1.2 timbl 565:      {
 566:        int i;
 567:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 568:          context->present[i] = NO;
1.1 timbl 569:      }
 570:      string->size = 0;
1.2 timbl 571:      context->current_attribute_number = INVALID;
1.1 timbl 572:      
 573:      if (c=='>') {
 574:        if (context->current_tag->name) start_element(context);
1.18 timbl 575:        context->state = S_after_open;
1.1 timbl 576:      } else {
 577:        context->state = S_tag_gap;
 578:      }
 579:    }
 580:    break;
 581: 
 582:        
 583:   case S_tag_gap:      /* Expecting attribute or > */
 584:    if (WHITE(c)) break;  /* Gap between attributes */
 585:    if (c=='>') {      /* End of tag */
 586:      if (context->current_tag->name) start_element(context);
1.18 timbl 587:      context->state = S_after_open;
1.1 timbl 588:      break;
 589:    }
 590:    HTChunkPutc(string, c);
 591:    context->state = S_attr;        /* Get attribute */
 592:    break;
 593:    
 594:                /* accumulating value */
 595:   case S_attr:
 596:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 597:      HTChunkTerminate(string) ;
 598:      handle_attribute_name(context, string->data);
 599:      string->size = 0;
 600:      if (c=='>') {        /* End of tag */
 601:        if (context->current_tag->name) start_element(context);
1.18 timbl 602:        context->state = S_after_open;
1.1 timbl 603:        break;
 604:      }
 605:      context->state = (c=='=' ? S_equals: S_attr_gap);
 606:    } else {
 607:      HTChunkPutc(string, c);
 608:    }
 609:    break;
 610:        
 611:   case S_attr_gap:      /* Expecting attribute or = or > */
 612:    if (WHITE(c)) break;  /* Gap after attribute */
 613:    if (c=='>') {      /* End of tag */
 614:      if (context->current_tag->name) start_element(context);
1.18 timbl 615:      context->state = S_after_open;
1.1 timbl 616:      break;
 617:    } else if (c=='=') {
 618:      context->state = S_equals;
 619:      break;
 620:    }
 621:    HTChunkPutc(string, c);
 622:    context->state = S_attr;        /* Get next attribute */
 623:    break;
 624:    
 625:   case S_equals:           /* After attr = */ 
 626:    if (WHITE(c)) break;  /* Before attribute value */
 627:    if (c=='>') {      /* End of tag */
1.20 ! frystyk 628:      if (SGML_TRACE) fprintf(stderr, "SGML: found = but no value\n");
1.1 timbl 629:      if (context->current_tag->name) start_element(context);
1.18 timbl 630:      context->state = S_after_open;
1.1 timbl 631:      break;
 632:      
 633:    } else if (c=='\'') {
 634:      context->state = S_squoted;
 635:      break;
 636: 
 637:    } else if (c=='"') {
 638:      context->state = S_dquoted;
 639:      break;
 640:    }
 641:    HTChunkPutc(string, c);
 642:    context->state = S_value;
 643:    break;
 644:    
 645:   case S_value:
 646:    if (WHITE(c) || (c=='>')) {       /* End of word */
 647:      HTChunkTerminate(string) ;
 648:      handle_attribute_value(context, string->data);
 649:      string->size = 0;
 650:      if (c=='>') {        /* End of tag */
 651:        if (context->current_tag->name) start_element(context);
1.18 timbl 652:        context->state = S_after_open;
1.1 timbl 653:        break;
 654:      }
 655:      else context->state = S_tag_gap;
 656:    } else {
 657:      HTChunkPutc(string, c);
 658:    }
 659:    break;
 660:        
 661:   case S_squoted:      /* Quoted attribute value */
 662:    if (c=='\'') {     /* End of attribute value */
 663:      HTChunkTerminate(string) ;
 664:      handle_attribute_value(context, string->data);
 665:      string->size = 0;
 666:      context->state = S_tag_gap;
 667:    } else {
 668:      HTChunkPutc(string, c);
 669:    }
 670:    break;
 671:    
 672:   case S_dquoted:      /* Quoted attribute value */
 673:    if (c=='"') {      /* End of attribute value */
 674:      HTChunkTerminate(string) ;
 675:      handle_attribute_value(context, string->data);
 676:      string->size = 0;
 677:      context->state = S_tag_gap;
 678:    } else {
 679:      HTChunkPutc(string, c);
 680:    }
 681:    break;
 682:    
 683:   case S_end:                    /* </ */
 684:    if (isalnum(c))
 685:      HTChunkPutc(string, c);
 686:    else {             /* End of end tag name */
1.7 timbl 687:      HTTag * t;
1.1 timbl 688:      HTChunkTerminate(string) ;
1.7 timbl 689:      if (!*string->data) {    /* Empty end tag */
 690:        t = context->element_stack->tag;
 691:      } else {
1.10 timbl 692:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 693:      }
1.7 timbl 694:      if (!t) {
1.20 ! frystyk 695:        if(SGML_TRACE) fprintf(stderr,
1.1 timbl 696:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 697:      } else {
1.7 timbl 698:        context->current_tag = t;
1.2 timbl 699:        end_element( context, context->current_tag);
1.1 timbl 700:      }
1.2 timbl 701: 
1.1 timbl 702:      string->size = 0;
1.2 timbl 703:      context->current_attribute_number = INVALID;
1.7 timbl 704:      if (c!='>') {
1.20 ! frystyk 705:        if (SGML_TRACE && !WHITE(c))
1.7 timbl 706:          fprintf(stderr,"SGML: `</%s%c' found!\n",
 707:            string->data, c);
 708:        context->state = S_junk_tag;
 709:      } else {
 710:        context->state = S_text;
 711:      }
1.1 timbl 712:    }
 713:    break;
 714: 
 715:        
 716:   case S_junk_tag:
 717:    if (c=='>') {
 718:      context->state = S_text;
 719:    }
 720:    
 721:   } /* switch on context->state */
 722: 
 723: } /* SGML_character */
1.2 timbl 724: 
 725: 
 726: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 727: {
 728:   CONST char *p;
 729:   for(p=str; *p; p++)
 730:     SGML_character(context, *p);
 731: }
 732: 
 733: 
 734: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 735: {
 736:   CONST char *p;
 737:   CONST char *e = str+l;
 738:   for(p=str; p<e; p++)
 739:     SGML_character(context, *p);
 740: }
 741: 
 742: /*_______________________________________________________________________
 743: */
 744: 
 745: /*   Structured Object Class
 746: **   -----------------------
 747: */
 748: PUBLIC CONST HTStreamClass SGMLParser = 
 749: {       
 750:    "SGMLParser",
 751:    SGML_free,
1.8 timbl 752:    SGML_abort,
1.9 timbl 753:    SGML_character, 
 754:    SGML_string,
 755:    SGML_write,
1.2 timbl 756: }; 
 757: 
 758: /*   Create SGML Engine
 759: **   ------------------
 760: **
 761: ** On entry,
 762: **   dtd       represents the DTD, along with
 763: **   actions     is the sink for the data as a set of routines.
 764: **
 765: */
 766: 
 767: PUBLIC HTStream* SGML_new ARGS2(
 768:    CONST SGML_dtd *,    dtd,
 769:    HTStructured *,     target)
 770: {
 771:   int i;
 772:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 773:   if (!context) outofmem(__FILE__, "SGML_begin");
 774: 
 775:   context->isa = &SGMLParser;
 776:   context->string = HTChunkCreate(128);   /* Grow by this much */
 777:   context->dtd = dtd;
 778:   context->target = target;
 779:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 780:                    /* Ugh: no OO */
 781:   context->state = S_text;
 782:   context->element_stack = 0;            /* empty */
 783: #ifdef CALLERDATA        
 784:   context->callerData = (void*) callerData;
 785: #endif  
 786:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 787: 
 788:   return context;
 789: }
1.14 frystyk 790: 
 791: 
 792: 
 793: 
 794: 
 795: 
 796: 
 797: 
 798: 
 799: 
 800: 
1.2 timbl 801: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /