[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.23.2.1

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
 4: **   (c) COPYRIGHT CERN 1994.
 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This module implements an HTStream object. To parse an
1.1 timbl 8: **   SGML file, create this object which is a parser. The object
1.2 timbl 9: **   is (currently) created by being passed a DTD structure,
 10: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **   
1.19 duns 12: **   6 Feb 93    Binary seraches used. Intreface modified.
 13: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 14: */
 15: 
1.23.2.1! frystyk 16: /* Library include files */
 ! 17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.23.2.1! frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22: 
1.2 timbl 23: #define INVALID (-1)
 24: 
1.1 timbl 25: /*   The State (context) of the parser
 26: **
1.2 timbl 27: **   This is passed with each call to make the parser reentrant
1.1 timbl 28: **
 29: */
 30: 
1.16 frystyk 31: 
1.2 timbl 32: 
 33:    
 34: /*       Element Stack
 35: **       -------------
 36: **   This allows us to return down the stack reselcting styles.
 37: **   As we return, attribute values will be garbage in general.
 38: */
 39: typedef struct _HTElement HTElement;
 40: struct _HTElement {
 41:    HTElement *   next;  /* Previously nested element or 0 */
 42:    HTTag*     tag;  /* The tag at this level */
 43: };
 44: 
 45: 
1.21 frystyk 46: typedef enum _sgml_state {
 47:   S_text, S_literal, S_tag, S_tag_gap, 
 48:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 49:   S_nl, S_nl_tago,
 50:   S_ero, S_cro,
 51: #ifdef ISO_2022_JP
 52:   S_esc, S_dollar, S_paren, S_nonascii_text,
 53: #endif
 54:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 55: } sgml_state;
 56: 
 57: 
1.2 timbl 58: /*   Internal Context Data Structure
 59: **   -------------------------------
 60: */
 61: struct _HTStream {
 62: 
 63:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 64:   
 65:   CONST SGML_dtd       *dtd;
 66:   HTStructuredClass *actions;    /* target class */
 67:   HTStructured    *target;    /* target object */
 68: 
1.1 timbl 69:   HTTag       *current_tag;
1.2 timbl 70:   int        current_attribute_number;
1.1 timbl 71:   HTChunk      *string;
 72:   HTElement     *element_stack;
1.21 frystyk 73:   sgml_state     state;
1.2 timbl 74: #ifdef CALLERDATA        
1.1 timbl 75:   void *       callerData;
1.2 timbl 76: #endif
 77:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 78:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 79: } ;
 80: 
 81: 
 82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 83: 
1.1 timbl 84: 
1.17 timbl 85: /*   Find Attribute Number
 86: **   ---------------------
 87: */
 88: 
 89: PUBLIC int SGMLFindAttribute ARGS2 (HTTag*, tag, CONST char *, s)
 90: {
 91:   attr* attributes = tag->attributes;
 92: 
 93:   int high, low, i, diff;      /* Binary search for attribute name */
 94:   for(low=0, high=tag->number_of_attributes;
 95:        high > low ;
 96:        diff < 0 ? (low = i+1) : (high = i) ) {
 97:    i = (low + (high-low)/2);
 98:    diff = strcasecomp(attributes[i].name, s);
 99:    if (diff==0) return i;         /* success: found it */
 100:   } /* for */
 101:   
 102:   return -1;
 103: }
 104: 
1.1 timbl 105: 
 106: /*   Handle Attribute
 107: **   ----------------
 108: */
 109: /* PUBLIC CONST char * SGML_default = "";  ?? */
 110: 
1.21 frystyk 111: PRIVATE void handle_attribute_name ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 112: {
1.2 timbl 113: 
 114:   HTTag * tag = context->current_tag;
 115: 
1.17 timbl 116:   int i = SGMLFindAttribute(tag, s);
 117:   if (i>=0) {
 118:    context->current_attribute_number = i;
 119:    context->present[i] = YES;
 120:    if (context->value[i]) {
 121:      free(context->value[i]);
 122:      context->value[i] = NULL;
 123:    }
 124:    return;
 125:   } /* if */
1.2 timbl 126:    
1.20 frystyk 127:   if (SGML_TRACE)
1.23.2.1! frystyk 128:    fprintf(TDEST, "SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129:      s, context->current_tag->name);
 130:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 131: }
 132: 
 133: 
 134: /*   Handle attribute value
 135: **   ----------------------
 136: */
1.21 frystyk 137: PRIVATE void handle_attribute_value ARGS2(HTStream *, context, CONST char *, s)
1.1 timbl 138: {
1.2 timbl 139:   if (context->current_attribute_number != INVALID) {
 140:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141:   } else {
1.23.2.1! frystyk 142:     if (SGML_TRACE) fprintf(TDEST, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 143:   }
1.2 timbl 144:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
 146: 
1.2 timbl 147: 
1.1 timbl 148: /*   Handle entity
 149: **   -------------
 150: **
 151: ** On entry,
 152: **   s    contains the entity name zero terminated
 153: ** Bugs:
 154: **   If the entity name is unknown, the terminator is treated as
 155: **   a printable non-special character in all cases, even if it is '<'
 156: */
1.21 frystyk 157: PRIVATE void handle_entity ARGS2(HTStream *, context, char, term)
1.1 timbl 158: {
1.2 timbl 159: 
1.3 timbl 160:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161:   CONST char *s = context->string->data;
1.2 timbl 162:   
 163:   int high, low, i, diff;
 164:   for(low=0, high = context->dtd->number_of_entities;
 165:        high > low ;
 166:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 167:    i = (low + (high-low)/2);
 168:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 169:    if (diff==0) {         /* success: found it */
 170:      (*context->actions->put_entity)(context->target, i);
 171:      return;
1.1 timbl 172:    }
 173:   }
 174:   /* If entity string not found, display as text */
1.20 frystyk 175:   if (SGML_TRACE)
1.23.2.1! frystyk 176:    fprintf(TDEST, "SGML: Unknown entity %s\n", s); 
1.2 timbl 177:   PUTC('&');
1.1 timbl 178:   {
 179:    CONST char *p;
 180:    for (p=s; *p; p++) {
1.2 timbl 181:      PUTC(*p);
1.1 timbl 182:    }
 183:   }
1.2 timbl 184:   PUTC(term);
1.1 timbl 185: }
 186: 
1.2 timbl 187: 
1.1 timbl 188: /*   End element
1.2 timbl 189: **   -----------
1.1 timbl 190: */
1.21 frystyk 191: PRIVATE void end_element ARGS2(HTStream *, context, HTTag *, old_tag)
1.1 timbl 192: {
1.23.2.1! frystyk 193:   if (SGML_TRACE) fprintf(TDEST, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 194:   if (old_tag->contents == SGML_EMPTY) {
1.23.2.1! frystyk 195:     if (SGML_TRACE) fprintf(TDEST,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 196:        old_tag->name);
 197:    return;
 198:   }
 199:   while (context->element_stack)   {/* Loop is error path only */
 200:    HTElement * N = context->element_stack;
 201:    HTTag * t = N->tag;
 202:    
 203:    if (old_tag != t) {       /* Mismatch: syntax error */
 204:      if (context->element_stack->next) { /* This is not the last level */
1.23.2.1! frystyk 205:        if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 206:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 207:          old_tag->name, t->name, t->name);
 208:      } else {          /* last level */
1.23.2.1! frystyk 209:        if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 210:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 211:          old_tag->name, t->name, old_tag->name);
 212:        return;         /* Ignore */
 213:      }
 214:    }
 215:    
 216:    context->element_stack = N->next;        /* Remove from stack */
 217:    free(N);
1.2 timbl 218:    (*context->actions->end_element)(context->target,
 219:         t - context->dtd->tags);
1.1 timbl 220:    if (old_tag == t) return; /* Correct sequence */
 221:    
 222:    /* Syntax error path only */
 223:    
 224:   }
1.23.2.1! frystyk 225:   if (SGML_TRACE) fprintf(TDEST,
1.1 timbl 226:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 227: }
 228: 
 229: 
1.17 timbl 230: /*   Start an element
 231: **   ----------------
1.1 timbl 232: */
1.21 frystyk 233: PRIVATE void start_element ARGS1(HTStream *, context)
1.1 timbl 234: {
 235:   HTTag * new_tag = context->current_tag;
 236:   
1.23.2.1! frystyk 237:   if (SGML_TRACE) fprintf(TDEST, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 238:   (*context->actions->start_element)(
 239:    context->target,
 240:    new_tag - context->dtd->tags,
 241:    context->present,
1.3 timbl 242:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 243:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.1 timbl 244:    HTElement * N = (HTElement *)malloc(sizeof(HTElement));
 245:     if (N == NULL) outofmem(__FILE__, "start_element");
 246:    N->next = context->element_stack;
 247:    N->tag = new_tag;
 248:    context->element_stack = N;
 249:   }
 250: }
 251: 
 252: 
1.2 timbl 253: /*       Find Tag in DTD tag list
 254: **       ------------------------
1.1 timbl 255: **
 256: ** On entry,
1.2 timbl 257: **   dtd   points to dtd structire including valid tag list
 258: **   string points to name of tag in question
1.1 timbl 259: **
1.2 timbl 260: ** On exit,
 261: **   returns:
1.7 timbl 262: **       NULL      tag not found
 263: **       else      address of tag structure in dtd
1.2 timbl 264: */
1.11 timbl 265: PUBLIC HTTag * SGMLFindTag ARGS2(CONST SGML_dtd*, dtd, CONST char *, string)
1.2 timbl 266: {
 267:   int high, low, i, diff;
 268:   for(low=0, high=dtd->number_of_tags;
 269:        high > low ;
 270:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 271:    i = (low + (high-low)/2);
1.3 timbl 272:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 273:    if (diff==0) {         /* success: found it */
1.7 timbl 274:      return &dtd->tags[i];
1.2 timbl 275:    }
 276:   }
1.7 timbl 277:   return NULL;
1.2 timbl 278: }
 279: 
 280: /*________________________________________________________________________
 281: **           Public Methods
1.1 timbl 282: */
 283: 
1.2 timbl 284: 
 285: /*   Could check that we are back to bottom of stack! @@ */
1.1 timbl 286: 
1.22 frystyk 287: PUBLIC int SGML_free ARGS1(HTStream *, context)
1.8 timbl 288: {
1.14 frystyk 289:   int cnt;
 290: 
1.15 frystyk 291:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 292:    HTElement *ptr = context->element_stack;
 293: 
1.23.2.1! frystyk 294:    if(SGML_TRACE) fprintf(TDEST, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 295:             context->element_stack->tag->name);
 296:    context->element_stack = ptr->next;
 297:    free(ptr);
 298:   }
1.19 duns 299:   (*context->actions->_free)(context->target);
1.8 timbl 300:   HTChunkFree(context->string);
1.15 frystyk 301:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 302:    if(context->value[cnt])
 303:      free(context->value[cnt]);
1.8 timbl 304:   free(context);
1.22 frystyk 305:   return 0;
1.1 timbl 306: }
 307: 
1.22 frystyk 308: PUBLIC int SGML_abort ARGS2(HTStream *, context, HTError, e)
1.1 timbl 309: {
1.14 frystyk 310:   int cnt;
 311: 
1.15 frystyk 312:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 313:    HTElement *ptr = context->element_stack;
 314: 
1.23.2.1! frystyk 315:    if(SGML_TRACE) fprintf(TDEST, "SGML: Non-matched tag found: <%s>\n",
1.15 frystyk 316:             context->element_stack->tag->name);
 317:    context->element_stack = ptr->next;
 318:    free(ptr);
 319:   }
1.8 timbl 320:   (*context->actions->abort)(context->target, e);
1.1 timbl 321:   HTChunkFree(context->string);
1.14 frystyk 322:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 323:    if(context->value[cnt])
 324:      free(context->value[cnt]);
1.1 timbl 325:   free(context);
1.22 frystyk 326:   return EOF;
1.1 timbl 327: }
 328: 
1.2 timbl 329: 
1.1 timbl 330: /*   Read and write user callback handle
 331: **   -----------------------------------
 332: **
 333: **  The callbacks from the SGML parser have an SGML context parameter.
 334: **  These calls allow the caller to associate his own context with a
 335: **  particular SGML context.
 336: */
 337: 
1.2 timbl 338: #ifdef CALLERDATA        
 339: PUBLIC void* SGML_callerData ARGS1(HTStream *, context)
1.1 timbl 340: {
 341:   return context->callerData;
 342: }
 343: 
1.2 timbl 344: PUBLIC void SGML_setCallerData ARGS2(HTStream *, context, void*, data)
1.1 timbl 345: {
 346:   context->callerData = data;
 347: }
1.2 timbl 348: #endif
1.1 timbl 349: 
1.2 timbl 350: PUBLIC void SGML_character ARGS2(HTStream *, context, char,c)
1.1 timbl 351: 
 352: {
1.2 timbl 353:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 354:   HTChunk  *string =    context->string;
 355: 
 356:   switch(context->state) {
1.18 timbl 357:   
 358:   case S_after_open: /* Strip one trainling newline
 359:            only after opening nonempty element. - SGML:Ugh! */
 360:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 361:      break;
 362:    }
 363:    context->state = S_text;
 364:    goto normal_text;
 365:    /* (***falls through***) */
 366:    
1.1 timbl 367:   case S_text:
1.18 timbl 368: normal_text:
 369: 
1.13 timbl 370: #ifdef ISO_2022_JP
 371:    if (c=='033円') {
 372:      context->state = S_esc;
 373:      PUTC(c);
 374:      break;
 375:    }
 376: #endif /* ISO_2022_JP */
1.6 timbl 377:    if (c=='&' && (!context->element_stack || (
 378:             context->element_stack->tag &&
 379:             ( context->element_stack->tag->contents == SGML_MIXED
 380:              || context->element_stack->tag->contents ==
 381:                             SGML_RCDATA)
 382:            ))) {
1.1 timbl 383:      string->size = 0;
 384:      context->state = S_ero;
 385:      
 386:    } else if (c=='<') {
 387:      string->size = 0;
 388:      context->state = (context->element_stack &&
1.13 timbl 389:        context->element_stack->tag &&
 390:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 391:                S_literal : S_tag;
1.18 timbl 392:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 393:      context->state = S_nl;
1.2 timbl 394:    } else PUTC(c);
1.1 timbl 395:    break;
1.13 timbl 396: 
1.18 timbl 397:   case S_nl:
 398:     if (c=='<') {
 399:      string->size = 0;
 400:      context->state = (context->element_stack &&
 401:        context->element_stack->tag &&
 402:        context->element_stack->tag->contents == SGML_LITERAL) ?
 403:                S_literal : S_nl_tago;
 404:    } else {
 405:      PUTC('\n');
 406:      context->state = S_text;
 407:      goto normal_text;
 408:    }
 409:    break;
 410: 
 411:   case S_nl_tago:      /* Had newline and tag opener */
 412:     if (c != '/') {
 413:      PUTC('\n');     /* Only ignore newline before </ */
 414:    }
 415:    context->state = S_tag;
 416:    goto handle_S_tag;
 417: 
1.13 timbl 418: #ifdef ISO_2022_JP
 419:   case S_esc:
 420:    if (c=='$') {
 421:      context->state = S_dollar;
 422:    } else if (c=='(') {
 423:      context->state = S_paren;
 424:    } else {
 425:      context->state = S_text;
 426:    }
 427:    PUTC(c);
 428:    break;
 429:   case S_dollar:
 430:    if (c=='@' || c=='B') {
 431:      context->state = S_nonascii_text;
 432:    } else {
 433:      context->state = S_text;
 434:    }
 435:    PUTC(c);
 436:    break;
 437:   case S_paren:
 438:    if (c=='B' || c=='J') {
 439:      context->state = S_text;
 440:    } else {
 441:      context->state = S_text;
 442:    }
 443:    PUTC(c);
 444:    break;
 445:   case S_nonascii_text:
 446:    if (c=='033円') {
 447:      context->state = S_esc;
 448:      PUTC(c);
 449:    } else {
 450:      PUTC(c);
 451:    }
 452:    break;
 453: #endif /* ISO_2022_JP */
1.1 timbl 454: 
1.12 timbl 455: /*   In literal mode, waits only for specific end tag!
1.2 timbl 456: **   Only foir compatibility with old servers.
1.1 timbl 457: */
1.12 timbl 458:   case S_literal :
1.1 timbl 459:    HTChunkPutc(string, c);
 460:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 461:        : context->element_stack->tag->name[string->size-2])) {
 462:      int i;
 463:      
1.12 timbl 464:      /* If complete match, end literal */
1.1 timbl 465:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 466:        end_element(context, context->element_stack->tag);
 467:        string->size = 0;
1.2 timbl 468:        context->current_attribute_number = INVALID;
1.1 timbl 469:        context->state = S_text;
 470:        break;
 471:      }      /* If Mismatch: recover string. */
1.2 timbl 472:      PUTC( '<');
1.1 timbl 473:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 474:        PUTC(
1.1 timbl 475:                       string->data[i]);
 476:      context->state = S_text;  
 477:    }
 478:    
 479:     break;
 480: 
 481: /*   Character reference or Entity
 482: */
 483:  case S_ero:
 484:    if (c=='#') {
 485:      context->state = S_cro; /*  &# is Char Ref Open */ 
 486:      break;
 487:    }
 488:    context->state = S_entity;  /* Fall through! */
 489:    
 490: /*   Handle Entities
 491: */
 492:   case S_entity:
 493:    if (isalnum(c))
 494:      HTChunkPutc(string, c);
 495:    else {
 496:      HTChunkTerminate(string);
 497:      handle_entity(context, c);
 498:      context->state = S_text;
 499:    }
 500:    break;
 501: 
 502: /*   Character reference
 503: */
 504:   case S_cro:
 505:    if (isalnum(c))
 506:      HTChunkPutc(string, c);   /* accumulate a character NUMBER */
 507:    else {
 508:      int value;
 509:      HTChunkTerminate(string);
 510:      if (sscanf(string->data, "%d", &value)==1)
1.2 timbl 511:        PUTC(FROMASCII((char)value));
1.1 timbl 512:      context->state = S_text;
 513:    }
 514:    break;
 515: 
 516: /*       Tag
 517: */     
 518:   case S_tag:                /* new tag */
1.18 timbl 519: handle_S_tag:
 520: 
1.1 timbl 521:    if (isalnum(c))
 522:      HTChunkPutc(string, c);
 523:    else {             /* End of tag name */
1.7 timbl 524:      HTTag * t;
1.1 timbl 525:      if (c=='/') {
1.20 frystyk 526:        if (SGML_TRACE) if (string->size!=0)
1.23.2.1! frystyk 527:          fprintf(TDEST,"SGML: `<%s/' found!\n", string->data);
1.1 timbl 528:        context->state = S_end;
 529:        break;
 530:      }
 531:      HTChunkTerminate(string) ;
1.2 timbl 532: 
1.10 timbl 533:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 534:      if (!t) {
1.23.2.1! frystyk 535:        if(SGML_TRACE) fprintf(TDEST, "SGML: *** Unknown element %s\n",
1.1 timbl 536:            string->data);
 537:        context->state = (c=='>') ? S_text : S_junk_tag;
 538:        break;
 539:      }
1.7 timbl 540:      context->current_tag = t;
1.2 timbl 541:      
 542:      /* Clear out attributes
 543:      */
1.1 timbl 544:      
1.2 timbl 545:      {
 546:        int i;
 547:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 548:          context->present[i] = NO;
1.1 timbl 549:      }
 550:      string->size = 0;
1.2 timbl 551:      context->current_attribute_number = INVALID;
1.1 timbl 552:      
 553:      if (c=='>') {
 554:        if (context->current_tag->name) start_element(context);
1.18 timbl 555:        context->state = S_after_open;
1.1 timbl 556:      } else {
 557:        context->state = S_tag_gap;
 558:      }
 559:    }
 560:    break;
 561: 
 562:        
 563:   case S_tag_gap:      /* Expecting attribute or > */
 564:    if (WHITE(c)) break;  /* Gap between attributes */
 565:    if (c=='>') {      /* End of tag */
 566:      if (context->current_tag->name) start_element(context);
1.18 timbl 567:      context->state = S_after_open;
1.1 timbl 568:      break;
 569:    }
 570:    HTChunkPutc(string, c);
 571:    context->state = S_attr;        /* Get attribute */
 572:    break;
 573:    
 574:                /* accumulating value */
 575:   case S_attr:
 576:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
 577:      HTChunkTerminate(string) ;
 578:      handle_attribute_name(context, string->data);
 579:      string->size = 0;
 580:      if (c=='>') {        /* End of tag */
 581:        if (context->current_tag->name) start_element(context);
1.18 timbl 582:        context->state = S_after_open;
1.1 timbl 583:        break;
 584:      }
 585:      context->state = (c=='=' ? S_equals: S_attr_gap);
 586:    } else {
 587:      HTChunkPutc(string, c);
 588:    }
 589:    break;
 590:        
 591:   case S_attr_gap:      /* Expecting attribute or = or > */
 592:    if (WHITE(c)) break;  /* Gap after attribute */
 593:    if (c=='>') {      /* End of tag */
 594:      if (context->current_tag->name) start_element(context);
1.18 timbl 595:      context->state = S_after_open;
1.1 timbl 596:      break;
 597:    } else if (c=='=') {
 598:      context->state = S_equals;
 599:      break;
 600:    }
 601:    HTChunkPutc(string, c);
 602:    context->state = S_attr;        /* Get next attribute */
 603:    break;
 604:    
 605:   case S_equals:           /* After attr = */ 
 606:    if (WHITE(c)) break;  /* Before attribute value */
 607:    if (c=='>') {      /* End of tag */
1.23.2.1! frystyk 608:      if (SGML_TRACE) fprintf(TDEST, "SGML: found = but no value\n");
1.1 timbl 609:      if (context->current_tag->name) start_element(context);
1.18 timbl 610:      context->state = S_after_open;
1.1 timbl 611:      break;
 612:      
 613:    } else if (c=='\'') {
 614:      context->state = S_squoted;
 615:      break;
 616: 
 617:    } else if (c=='"') {
 618:      context->state = S_dquoted;
 619:      break;
 620:    }
 621:    HTChunkPutc(string, c);
 622:    context->state = S_value;
 623:    break;
 624:    
 625:   case S_value:
 626:    if (WHITE(c) || (c=='>')) {       /* End of word */
 627:      HTChunkTerminate(string) ;
 628:      handle_attribute_value(context, string->data);
 629:      string->size = 0;
 630:      if (c=='>') {        /* End of tag */
 631:        if (context->current_tag->name) start_element(context);
1.18 timbl 632:        context->state = S_after_open;
1.1 timbl 633:        break;
 634:      }
 635:      else context->state = S_tag_gap;
 636:    } else {
 637:      HTChunkPutc(string, c);
 638:    }
 639:    break;
 640:        
 641:   case S_squoted:      /* Quoted attribute value */
 642:    if (c=='\'') {     /* End of attribute value */
 643:      HTChunkTerminate(string) ;
 644:      handle_attribute_value(context, string->data);
 645:      string->size = 0;
 646:      context->state = S_tag_gap;
 647:    } else {
 648:      HTChunkPutc(string, c);
 649:    }
 650:    break;
 651:    
 652:   case S_dquoted:      /* Quoted attribute value */
 653:    if (c=='"') {      /* End of attribute value */
 654:      HTChunkTerminate(string) ;
 655:      handle_attribute_value(context, string->data);
 656:      string->size = 0;
 657:      context->state = S_tag_gap;
 658:    } else {
 659:      HTChunkPutc(string, c);
 660:    }
 661:    break;
 662:    
 663:   case S_end:                    /* </ */
 664:    if (isalnum(c))
 665:      HTChunkPutc(string, c);
 666:    else {             /* End of end tag name */
1.7 timbl 667:      HTTag * t;
1.1 timbl 668:      HTChunkTerminate(string) ;
1.7 timbl 669:      if (!*string->data) {    /* Empty end tag */
 670:        t = context->element_stack->tag;
 671:      } else {
1.10 timbl 672:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 673:      }
1.7 timbl 674:      if (!t) {
1.23.2.1! frystyk 675:        if(SGML_TRACE) fprintf(TDEST,
1.1 timbl 676:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 677:      } else {
1.7 timbl 678:        context->current_tag = t;
1.2 timbl 679:        end_element( context, context->current_tag);
1.1 timbl 680:      }
1.2 timbl 681: 
1.1 timbl 682:      string->size = 0;
1.2 timbl 683:      context->current_attribute_number = INVALID;
1.7 timbl 684:      if (c!='>') {
1.20 frystyk 685:        if (SGML_TRACE && !WHITE(c))
1.23.2.1! frystyk 686:          fprintf(TDEST,"SGML: `</%s%c' found!\n",
1.7 timbl 687:            string->data, c);
 688:        context->state = S_junk_tag;
 689:      } else {
 690:        context->state = S_text;
 691:      }
1.1 timbl 692:    }
 693:    break;
 694: 
 695:        
 696:   case S_junk_tag:
 697:    if (c=='>') {
 698:      context->state = S_text;
 699:    }
 700:    
 701:   } /* switch on context->state */
 702: 
 703: } /* SGML_character */
1.2 timbl 704: 
 705: 
 706: PUBLIC void SGML_string ARGS2(HTStream *, context, CONST char*, str)
 707: {
 708:   CONST char *p;
 709:   for(p=str; *p; p++)
 710:     SGML_character(context, *p);
 711: }
 712: 
 713: 
 714: PUBLIC void SGML_write ARGS3(HTStream *, context, CONST char*, str, int, l)
 715: {
 716:   CONST char *p;
 717:   CONST char *e = str+l;
 718:   for(p=str; p<e; p++)
 719:     SGML_character(context, *p);
 720: }
 721: 
 722: /*_______________________________________________________________________
 723: */
 724: 
 725: /*   Structured Object Class
 726: **   -----------------------
 727: */
 728: PUBLIC CONST HTStreamClass SGMLParser = 
 729: {       
 730:    "SGMLParser",
 731:    SGML_free,
1.8 timbl 732:    SGML_abort,
1.9 timbl 733:    SGML_character, 
 734:    SGML_string,
 735:    SGML_write,
1.2 timbl 736: }; 
 737: 
 738: /*   Create SGML Engine
 739: **   ------------------
 740: **
 741: ** On entry,
 742: **   dtd       represents the DTD, along with
 743: **   actions     is the sink for the data as a set of routines.
 744: **
 745: */
 746: 
 747: PUBLIC HTStream* SGML_new ARGS2(
 748:    CONST SGML_dtd *,    dtd,
 749:    HTStructured *,     target)
 750: {
 751:   int i;
 752:   HTStream* context = (HTStream *) malloc(sizeof(*context));
 753:   if (!context) outofmem(__FILE__, "SGML_begin");
 754: 
 755:   context->isa = &SGMLParser;
 756:   context->string = HTChunkCreate(128);   /* Grow by this much */
 757:   context->dtd = dtd;
 758:   context->target = target;
 759:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 760:                    /* Ugh: no OO */
 761:   context->state = S_text;
 762:   context->element_stack = 0;            /* empty */
 763: #ifdef CALLERDATA        
 764:   context->callerData = (void*) callerData;
 765: #endif  
 766:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 767: 
 768:   return context;
 769: }
1.14 frystyk 770: 
 771: 
 772: 
 773: 
 774: 
 775: 
 776: 
 777: 
 778: 
 779: 
 780: 
1.2 timbl 781: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /