[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.36

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This module implements an HTStream object. To parse an
1.1 timbl 8: **   SGML file, create this object which is a parser. The object
1.2 timbl 9: **   is (currently) created by being passed a DTD structure,
 10: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 11: **   
1.19 duns 12: **   6 Feb 93    Binary seraches used. Intreface modified.
 13: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 14: */
 15: 
1.25 frystyk 16: /* Library include files */
 17: #include "tcp.h"
1.1 timbl 18: #include "HTUtils.h"
1.25 frystyk 19: #include "HTString.h"
1.1 timbl 20: #include "HTChunk.h"
1.20 frystyk 21: #include "SGML.h"
1.1 timbl 22: 
1.2 timbl 23: #define INVALID (-1)
 24: 
1.1 timbl 25: /*   The State (context) of the parser
 26: **
1.2 timbl 27: **   This is passed with each call to make the parser reentrant
1.1 timbl 28: **
 29: */
 30: 
1.16 frystyk 31: 
1.2 timbl 32: 
 33:    
 34: /*       Element Stack
 35: **       -------------
 36: **   This allows us to return down the stack reselcting styles.
 37: **   As we return, attribute values will be garbage in general.
 38: */
 39: typedef struct _HTElement HTElement;
 40: struct _HTElement {
 41:    HTElement *   next;  /* Previously nested element or 0 */
 42:    HTTag*     tag;  /* The tag at this level */
 43: };
 44: 
 45: 
1.21 frystyk 46: typedef enum _sgml_state {
 47:   S_text, S_literal, S_tag, S_tag_gap, 
 48:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 49:   S_nl, S_nl_tago,
 50:   S_ero, S_cro,
 51: #ifdef ISO_2022_JP
 52:   S_esc, S_dollar, S_paren, S_nonascii_text,
 53: #endif
 54:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 55: } sgml_state;
 56: 
 57: 
1.2 timbl 58: /*   Internal Context Data Structure
 59: **   -------------------------------
 60: */
 61: struct _HTStream {
 62: 
 63:   CONST HTStreamClass *   isa;      /* inherited from HTStream */
 64:   
 65:   CONST SGML_dtd       *dtd;
 66:   HTStructuredClass *actions;    /* target class */
 67:   HTStructured    *target;    /* target object */
 68: 
1.1 timbl 69:   HTTag       *current_tag;
1.2 timbl 70:   int        current_attribute_number;
1.1 timbl 71:   HTChunk      *string;
 72:   HTElement     *element_stack;
1.21 frystyk 73:   sgml_state     state;
1.2 timbl 74: #ifdef CALLERDATA        
1.1 timbl 75:   void *       callerData;
1.2 timbl 76: #endif
 77:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 78:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 79: } ;
 80: 
 81: 
 82: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 83: 
1.1 timbl 84: 
1.17 timbl 85: /*   Find Attribute Number
 86: **   ---------------------
 87: */
 88: 
1.31 frystyk 89: PUBLIC int SGMLFindAttribute (HTTag* tag, CONST char * s)
1.17 timbl 90: {
 91:   attr* attributes = tag->attributes;
 92: 
 93:   int high, low, i, diff;      /* Binary search for attribute name */
 94:   for(low=0, high=tag->number_of_attributes;
 95:        high > low ;
 96:        diff < 0 ? (low = i+1) : (high = i) ) {
 97:    i = (low + (high-low)/2);
 98:    diff = strcasecomp(attributes[i].name, s);
 99:    if (diff==0) return i;         /* success: found it */
 100:   } /* for */
 101:   
 102:   return -1;
 103: }
 104: 
1.1 timbl 105: 
 106: /*   Handle Attribute
 107: **   ----------------
 108: */
 109: /* PUBLIC CONST char * SGML_default = "";  ?? */
 110: 
1.31 frystyk 111: PRIVATE void handle_attribute_name (HTStream * context, CONST char * s)
1.1 timbl 112: {
1.2 timbl 113: 
 114:   HTTag * tag = context->current_tag;
 115: 
1.17 timbl 116:   int i = SGMLFindAttribute(tag, s);
 117:   if (i>=0) {
 118:    context->current_attribute_number = i;
 119:    context->present[i] = YES;
 120:    if (context->value[i]) {
1.36 ! frystyk 121:      HT_FREE(context->value[i]);
1.17 timbl 122:      context->value[i] = NULL;
 123:    }
 124:    return;
 125:   } /* if */
1.2 timbl 126:    
1.20 frystyk 127:   if (SGML_TRACE)
1.29 frystyk 128:    TTYPrint(TDEST, "SGML: Unknown attribute %s for tag %s\n",
1.2 timbl 129:      s, context->current_tag->name);
 130:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 131: }
 132: 
 133: 
 134: /*   Handle attribute value
 135: **   ----------------------
 136: */
1.31 frystyk 137: PRIVATE void handle_attribute_value (HTStream * context, CONST char * s)
1.1 timbl 138: {
1.2 timbl 139:   if (context->current_attribute_number != INVALID) {
 140:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 141:   } else {
1.29 frystyk 142:     if (SGML_TRACE) TTYPrint(TDEST, "SGML: Attribute value %s ignored\n", s);
1.1 timbl 143:   }
1.2 timbl 144:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 145: }
 146: 
1.2 timbl 147: 
1.1 timbl 148: /*   Handle entity
 149: **   -------------
 150: **
 151: ** On entry,
 152: **   s    contains the entity name zero terminated
 153: ** Bugs:
 154: **   If the entity name is unknown, the terminator is treated as
 155: **   a printable non-special character in all cases, even if it is '<'
 156: */
1.31 frystyk 157: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 158: {
1.2 timbl 159: 
1.3 timbl 160:   CONST char ** entities = context->dtd->entity_names;
1.1 timbl 161:   CONST char *s = context->string->data;
1.2 timbl 162:   
 163:   int high, low, i, diff;
 164:   for(low=0, high = context->dtd->number_of_entities;
 165:        high > low ;
 166:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 167:    i = (low + (high-low)/2);
 168:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 169:    if (diff==0) {         /* success: found it */
 170:      (*context->actions->put_entity)(context->target, i);
 171:      return;
1.1 timbl 172:    }
 173:   }
 174:   /* If entity string not found, display as text */
1.20 frystyk 175:   if (SGML_TRACE)
1.29 frystyk 176:    TTYPrint(TDEST, "SGML: Unknown entity %s\n", s); 
1.2 timbl 177:   PUTC('&');
1.1 timbl 178:   {
 179:    CONST char *p;
 180:    for (p=s; *p; p++) {
1.2 timbl 181:      PUTC(*p);
1.1 timbl 182:    }
 183:   }
1.2 timbl 184:   PUTC(term);
1.1 timbl 185: }
 186: 
1.35 frystyk 187: /*
 188: **   Helper function to check if the tag is on the stack
 189: */
 190: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
 191: {
 192:   HTElement* elem;
 193:   for (elem = stack; elem != NULL; elem = elem->next)
 194:   {
 195:     if (elem->tag == tag) return YES;
 196:   }
 197:   return NO;
 198: }
1.2 timbl 199: 
1.1 timbl 200: /*   End element
1.2 timbl 201: **   -----------
1.1 timbl 202: */
1.31 frystyk 203: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 204: {
1.29 frystyk 205:   if (SGML_TRACE) TTYPrint(TDEST, "SGML: End  </%s>\n", old_tag->name);
1.2 timbl 206:   if (old_tag->contents == SGML_EMPTY) {
1.29 frystyk 207:     if (SGML_TRACE) TTYPrint(TDEST,"SGML: Illegal end tag </%s> found.\n",
1.1 timbl 208:        old_tag->name);
 209:    return;
 210:   }
 211:   while (context->element_stack)   {/* Loop is error path only */
 212:    HTElement * N = context->element_stack;
 213:    HTTag * t = N->tag;
 214:    
 215:    if (old_tag != t) {       /* Mismatch: syntax error */
1.35 frystyk 216:      /*
 217:      ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
 218:      ** See explanation in ../User/Patch/lib_4.0_1.fix
 219:      */
 220:       if (context->element_stack->next  /* This is not the last level */
 221:        && lookup_element_stack(context->element_stack, old_tag)) {
1.29 frystyk 222:        if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 223:        "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
 224:          old_tag->name, t->name, t->name);
 225:      } else {          /* last level */
1.29 frystyk 226:        if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 227:          "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
 228:          old_tag->name, t->name, old_tag->name);
 229:        return;         /* Ignore */
 230:      }
 231:    }
 232:    
 233:    context->element_stack = N->next;        /* Remove from stack */
1.36 ! frystyk 234:    HT_FREE(N);
1.2 timbl 235:    (*context->actions->end_element)(context->target,
 236:         t - context->dtd->tags);
1.1 timbl 237:    if (old_tag == t) return; /* Correct sequence */
 238:    
 239:    /* Syntax error path only */
 240:    
 241:   }
1.29 frystyk 242:   if (SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 243:    "SGML: Extra end tag </%s> found and ignored.\n", old_tag->name);
 244: }
 245: 
 246: 
1.17 timbl 247: /*   Start an element
 248: **   ----------------
1.1 timbl 249: */
1.31 frystyk 250: PRIVATE void start_element (HTStream * context)
1.1 timbl 251: {
 252:   HTTag * new_tag = context->current_tag;
 253:   
1.29 frystyk 254:   if (SGML_TRACE) TTYPrint(TDEST, "SGML: Start <%s>\n", new_tag->name);
1.2 timbl 255:   (*context->actions->start_element)(
 256:    context->target,
 257:    new_tag - context->dtd->tags,
 258:    context->present,
1.3 timbl 259:    (CONST char**) context->value); /* coerce type for think c */
1.2 timbl 260:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.36 ! frystyk 261:    HTElement * N;
 ! 262:    if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
 ! 263:      HT_OUTOFMEM("start_element");
1.1 timbl 264:    N->next = context->element_stack;
 265:    N->tag = new_tag;
 266:    context->element_stack = N;
 267:   }
 268: }
 269: 
 270: 
1.2 timbl 271: /*       Find Tag in DTD tag list
 272: **       ------------------------
1.1 timbl 273: **
 274: ** On entry,
1.2 timbl 275: **   dtd   points to dtd structire including valid tag list
 276: **   string points to name of tag in question
1.1 timbl 277: **
1.2 timbl 278: ** On exit,
 279: **   returns:
1.7 timbl 280: **       NULL      tag not found
 281: **       else      address of tag structure in dtd
1.2 timbl 282: */
1.31 frystyk 283: PUBLIC HTTag * SGMLFindTag (CONST SGML_dtd* dtd, CONST char * string)
1.2 timbl 284: {
 285:   int high, low, i, diff;
 286:   for(low=0, high=dtd->number_of_tags;
 287:        high > low ;
 288:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 289:    i = (low + (high-low)/2);
1.3 timbl 290:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 291:    if (diff==0) {         /* success: found it */
1.7 timbl 292:      return &dtd->tags[i];
1.2 timbl 293:    }
 294:   }
1.7 timbl 295:   return NULL;
1.2 timbl 296: }
 297: 
 298: /*________________________________________________________________________
 299: **           Public Methods
1.1 timbl 300: */
 301: 
1.2 timbl 302: 
 303: /*   Could check that we are back to bottom of stack! @@ */
1.31 frystyk 304: PUBLIC int SGML_flush (HTStream * context)
1.26 frystyk 305: {
 306:   while (context->element_stack) {
 307:    HTElement *ptr = context->element_stack;
 308:    if (SGML_TRACE)
1.29 frystyk 309:      TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 310:          context->element_stack->tag->name);
 311:    context->element_stack = ptr->next;
1.36 ! frystyk 312:    HT_FREE(ptr);
1.26 frystyk 313:   }
 314:   return (*context->actions->flush)(context->target);
 315: }
1.1 timbl 316: 
1.31 frystyk 317: PUBLIC int SGML_free (HTStream * context)
1.8 timbl 318: {
1.26 frystyk 319:   int status;
1.14 frystyk 320:   int cnt;
1.15 frystyk 321:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 322:    HTElement *ptr = context->element_stack;
 323: 
1.26 frystyk 324:    if (SGML_TRACE)
1.29 frystyk 325:      TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 326:          context->element_stack->tag->name);
1.15 frystyk 327:    context->element_stack = ptr->next;
1.36 ! frystyk 328:    HT_FREE(ptr);
1.15 frystyk 329:   }
1.26 frystyk 330:   if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 331:    return status;
1.33 frystyk 332:   HTChunk_delete(context->string);
1.15 frystyk 333:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 334:    if(context->value[cnt])
1.36 ! frystyk 335:      HT_FREE(context->value[cnt]);
 ! 336:   HT_FREE(context);
1.26 frystyk 337:   return HT_OK;
1.1 timbl 338: }
 339: 
1.31 frystyk 340: PUBLIC int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 341: {
1.14 frystyk 342:   int cnt;
1.15 frystyk 343:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 344:    HTElement *ptr = context->element_stack;
1.26 frystyk 345:    if (SGML_TRACE)
1.29 frystyk 346:      TTYPrint(TDEST, "SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 347:          context->element_stack->tag->name);
1.15 frystyk 348:    context->element_stack = ptr->next;
1.36 ! frystyk 349:    HT_FREE(ptr);
1.15 frystyk 350:   }
1.8 timbl 351:   (*context->actions->abort)(context->target, e);
1.33 frystyk 352:   HTChunk_delete(context->string);
1.14 frystyk 353:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 354:    if(context->value[cnt])
1.36 ! frystyk 355:      HT_FREE(context->value[cnt]);
 ! 356:   HT_FREE(context);
1.26 frystyk 357:   return HT_ERROR;
1.1 timbl 358: }
 359: 
1.2 timbl 360: 
1.1 timbl 361: /*   Read and write user callback handle
 362: **   -----------------------------------
 363: **
 364: **  The callbacks from the SGML parser have an SGML context parameter.
 365: **  These calls allow the caller to associate his own context with a
 366: **  particular SGML context.
 367: */
 368: 
1.2 timbl 369: #ifdef CALLERDATA        
1.31 frystyk 370: PUBLIC void* SGML_callerData (HTStream * context)
1.1 timbl 371: {
 372:   return context->callerData;
 373: }
 374: 
1.31 frystyk 375: PUBLIC void SGML_setCallerData (HTStream * context, void* data)
1.1 timbl 376: {
 377:   context->callerData = data;
 378: }
1.34 frystyk 379: #else
 380: #ifdef WWW_WIN_DLL
 381: PUBLIC void * SGML_callerData (HTStream * context) {return NULL;}
 382: PUBLIC void SGML_setCallerData (HTStream * context, void* data) {}
 383: #endif /* WWW_WIN_DLL */
 384: #endif /* CALLERDATA */
1.1 timbl 385: 
1.31 frystyk 386: PUBLIC int SGML_character (HTStream * context, char c)
1.1 timbl 387: 
 388: {
1.2 timbl 389:   CONST SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 390:   HTChunk  *string =    context->string;
 391: 
 392:   switch(context->state) {
1.18 timbl 393:   
 394:   case S_after_open: /* Strip one trainling newline
 395:            only after opening nonempty element. - SGML:Ugh! */
 396:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 397:      break;
 398:    }
 399:    context->state = S_text;
 400:    goto normal_text;
 401:    /* (***falls through***) */
 402:    
1.1 timbl 403:   case S_text:
1.18 timbl 404: normal_text:
 405: 
1.13 timbl 406: #ifdef ISO_2022_JP
 407:    if (c=='033円') {
 408:      context->state = S_esc;
 409:      PUTC(c);
 410:      break;
 411:    }
 412: #endif /* ISO_2022_JP */
1.6 timbl 413:    if (c=='&' && (!context->element_stack || (
 414:             context->element_stack->tag &&
 415:             ( context->element_stack->tag->contents == SGML_MIXED
 416:              || context->element_stack->tag->contents ==
 417:                             SGML_RCDATA)
 418:            ))) {
1.1 timbl 419:      string->size = 0;
 420:      context->state = S_ero;
 421:      
 422:    } else if (c=='<') {
 423:      string->size = 0;
 424:      context->state = (context->element_stack &&
1.13 timbl 425:        context->element_stack->tag &&
 426:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 427:                S_literal : S_tag;
1.18 timbl 428:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 429:      context->state = S_nl;
1.2 timbl 430:    } else PUTC(c);
1.1 timbl 431:    break;
1.13 timbl 432: 
1.18 timbl 433:   case S_nl:
 434:     if (c=='<') {
 435:      string->size = 0;
 436:      context->state = (context->element_stack &&
 437:        context->element_stack->tag &&
 438:        context->element_stack->tag->contents == SGML_LITERAL) ?
 439:                S_literal : S_nl_tago;
 440:    } else {
 441:      PUTC('\n');
 442:      context->state = S_text;
 443:      goto normal_text;
 444:    }
 445:    break;
 446: 
 447:   case S_nl_tago:      /* Had newline and tag opener */
 448:     if (c != '/') {
 449:      PUTC('\n');     /* Only ignore newline before </ */
 450:    }
 451:    context->state = S_tag;
 452:    goto handle_S_tag;
 453: 
1.13 timbl 454: #ifdef ISO_2022_JP
 455:   case S_esc:
 456:    if (c=='$') {
 457:      context->state = S_dollar;
 458:    } else if (c=='(') {
 459:      context->state = S_paren;
 460:    } else {
 461:      context->state = S_text;
 462:    }
 463:    PUTC(c);
 464:    break;
 465:   case S_dollar:
 466:    if (c=='@' || c=='B') {
 467:      context->state = S_nonascii_text;
 468:    } else {
 469:      context->state = S_text;
 470:    }
 471:    PUTC(c);
 472:    break;
 473:   case S_paren:
 474:    if (c=='B' || c=='J') {
 475:      context->state = S_text;
 476:    } else {
 477:      context->state = S_text;
 478:    }
 479:    PUTC(c);
 480:    break;
 481:   case S_nonascii_text:
 482:    if (c=='033円') {
 483:      context->state = S_esc;
 484:      PUTC(c);
 485:    } else {
 486:      PUTC(c);
 487:    }
 488:    break;
 489: #endif /* ISO_2022_JP */
1.1 timbl 490: 
1.12 timbl 491: /*   In literal mode, waits only for specific end tag!
1.2 timbl 492: **   Only foir compatibility with old servers.
1.1 timbl 493: */
1.12 timbl 494:   case S_literal :
1.33 frystyk 495:    HTChunk_putc(string, c);
1.1 timbl 496:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 497:        : context->element_stack->tag->name[string->size-2])) {
 498:      int i;
 499:      
1.12 timbl 500:      /* If complete match, end literal */
1.1 timbl 501:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 502:        end_element(context, context->element_stack->tag);
 503:        string->size = 0;
1.2 timbl 504:        context->current_attribute_number = INVALID;
1.1 timbl 505:        context->state = S_text;
 506:        break;
 507:      }      /* If Mismatch: recover string. */
1.2 timbl 508:      PUTC( '<');
1.1 timbl 509:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 510:        PUTC(
1.1 timbl 511:                       string->data[i]);
 512:      context->state = S_text;  
 513:    }
 514:    
 515:     break;
 516: 
 517: /*   Character reference or Entity
 518: */
 519:  case S_ero:
 520:    if (c=='#') {
 521:      context->state = S_cro; /*  &# is Char Ref Open */ 
 522:      break;
 523:    }
 524:    context->state = S_entity;  /* Fall through! */
 525:    
 526: /*   Handle Entities
 527: */
 528:   case S_entity:
 529:    if (isalnum(c))
1.33 frystyk 530:      HTChunk_putc(string, c);
1.1 timbl 531:    else {
1.33 frystyk 532:      HTChunk_terminate(string);
1.1 timbl 533:      handle_entity(context, c);
 534:      context->state = S_text;
 535:    }
 536:    break;
 537: 
 538: /*   Character reference
 539: */
 540:   case S_cro:
 541:    if (isalnum(c))
1.33 frystyk 542:      HTChunk_putc(string, c);  /* accumulate a character NUMBER */
1.1 timbl 543:    else {
 544:      int value;
1.33 frystyk 545:      HTChunk_terminate(string);
1.1 timbl 546:      if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 547:        PUTC((char) value);
1.1 timbl 548:      context->state = S_text;
 549:    }
 550:    break;
 551: 
 552: /*       Tag
 553: */     
 554:   case S_tag:                /* new tag */
1.18 timbl 555: handle_S_tag:
 556: 
1.1 timbl 557:    if (isalnum(c))
1.33 frystyk 558:      HTChunk_putc(string, c);
1.1 timbl 559:    else {             /* End of tag name */
1.7 timbl 560:      HTTag * t;
1.1 timbl 561:      if (c=='/') {
1.20 frystyk 562:        if (SGML_TRACE) if (string->size!=0)
1.29 frystyk 563:          TTYPrint(TDEST,"SGML: `<%s/' found!\n", string->data);
1.1 timbl 564:        context->state = S_end;
 565:        break;
 566:      }
1.33 frystyk 567:      HTChunk_terminate(string) ;
1.2 timbl 568: 
1.10 timbl 569:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 570:      if (!t) {
1.29 frystyk 571:        if(SGML_TRACE) TTYPrint(TDEST, "SGML: *** Unknown element %s\n",
1.1 timbl 572:            string->data);
 573:        context->state = (c=='>') ? S_text : S_junk_tag;
 574:        break;
 575:      }
1.7 timbl 576:      context->current_tag = t;
1.2 timbl 577:      
 578:      /* Clear out attributes
 579:      */
1.1 timbl 580:      
1.2 timbl 581:      {
 582:        int i;
 583:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 584:          context->present[i] = NO;
1.1 timbl 585:      }
 586:      string->size = 0;
1.2 timbl 587:      context->current_attribute_number = INVALID;
1.1 timbl 588:      
 589:      if (c=='>') {
 590:        if (context->current_tag->name) start_element(context);
1.18 timbl 591:        context->state = S_after_open;
1.1 timbl 592:      } else {
 593:        context->state = S_tag_gap;
 594:      }
 595:    }
 596:    break;
 597: 
 598:        
 599:   case S_tag_gap:      /* Expecting attribute or > */
 600:    if (WHITE(c)) break;  /* Gap between attributes */
 601:    if (c=='>') {      /* End of tag */
 602:      if (context->current_tag->name) start_element(context);
1.18 timbl 603:      context->state = S_after_open;
1.1 timbl 604:      break;
 605:    }
1.33 frystyk 606:    HTChunk_putc(string, c);
1.1 timbl 607:    context->state = S_attr;        /* Get attribute */
 608:    break;
 609:    
 610:                /* accumulating value */
 611:   case S_attr:
 612:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
1.33 frystyk 613:      HTChunk_terminate(string) ;
1.1 timbl 614:      handle_attribute_name(context, string->data);
 615:      string->size = 0;
 616:      if (c=='>') {        /* End of tag */
 617:        if (context->current_tag->name) start_element(context);
1.18 timbl 618:        context->state = S_after_open;
1.1 timbl 619:        break;
 620:      }
 621:      context->state = (c=='=' ? S_equals: S_attr_gap);
 622:    } else {
1.33 frystyk 623:      HTChunk_putc(string, c);
1.1 timbl 624:    }
 625:    break;
 626:        
 627:   case S_attr_gap:      /* Expecting attribute or = or > */
 628:    if (WHITE(c)) break;  /* Gap after attribute */
 629:    if (c=='>') {      /* End of tag */
 630:      if (context->current_tag->name) start_element(context);
1.18 timbl 631:      context->state = S_after_open;
1.1 timbl 632:      break;
 633:    } else if (c=='=') {
 634:      context->state = S_equals;
 635:      break;
 636:    }
1.33 frystyk 637:    HTChunk_putc(string, c);
1.1 timbl 638:    context->state = S_attr;        /* Get next attribute */
 639:    break;
 640:    
 641:   case S_equals:           /* After attr = */ 
 642:    if (WHITE(c)) break;  /* Before attribute value */
 643:    if (c=='>') {      /* End of tag */
1.29 frystyk 644:      if (SGML_TRACE) TTYPrint(TDEST, "SGML: found = but no value\n");
1.1 timbl 645:      if (context->current_tag->name) start_element(context);
1.18 timbl 646:      context->state = S_after_open;
1.1 timbl 647:      break;
 648:      
 649:    } else if (c=='\'') {
 650:      context->state = S_squoted;
 651:      break;
 652: 
 653:    } else if (c=='"') {
 654:      context->state = S_dquoted;
 655:      break;
 656:    }
1.33 frystyk 657:    HTChunk_putc(string, c);
1.1 timbl 658:    context->state = S_value;
 659:    break;
 660:    
 661:   case S_value:
 662:    if (WHITE(c) || (c=='>')) {       /* End of word */
1.33 frystyk 663:      HTChunk_terminate(string) ;
1.1 timbl 664:      handle_attribute_value(context, string->data);
 665:      string->size = 0;
 666:      if (c=='>') {        /* End of tag */
 667:        if (context->current_tag->name) start_element(context);
1.18 timbl 668:        context->state = S_after_open;
1.1 timbl 669:        break;
 670:      }
 671:      else context->state = S_tag_gap;
 672:    } else {
1.33 frystyk 673:      HTChunk_putc(string, c);
1.1 timbl 674:    }
 675:    break;
 676:        
 677:   case S_squoted:      /* Quoted attribute value */
 678:    if (c=='\'') {     /* End of attribute value */
1.33 frystyk 679:      HTChunk_terminate(string) ;
1.1 timbl 680:      handle_attribute_value(context, string->data);
 681:      string->size = 0;
 682:      context->state = S_tag_gap;
 683:    } else {
1.33 frystyk 684:      HTChunk_putc(string, c);
1.1 timbl 685:    }
 686:    break;
 687:    
 688:   case S_dquoted:      /* Quoted attribute value */
 689:    if (c=='"') {      /* End of attribute value */
1.33 frystyk 690:      HTChunk_terminate(string) ;
1.1 timbl 691:      handle_attribute_value(context, string->data);
 692:      string->size = 0;
 693:      context->state = S_tag_gap;
 694:    } else {
1.33 frystyk 695:      HTChunk_putc(string, c);
1.1 timbl 696:    }
 697:    break;
 698:    
 699:   case S_end:                    /* </ */
 700:    if (isalnum(c))
1.33 frystyk 701:      HTChunk_putc(string, c);
1.1 timbl 702:    else {             /* End of end tag name */
1.7 timbl 703:      HTTag * t;
1.33 frystyk 704:      HTChunk_terminate(string) ;
1.7 timbl 705:      if (!*string->data) {    /* Empty end tag */
 706:        t = context->element_stack->tag;
 707:      } else {
1.10 timbl 708:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 709:      }
1.7 timbl 710:      if (!t) {
1.29 frystyk 711:        if(SGML_TRACE) TTYPrint(TDEST,
1.1 timbl 712:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 713:      } else {
1.7 timbl 714:        context->current_tag = t;
1.2 timbl 715:        end_element( context, context->current_tag);
1.1 timbl 716:      }
1.2 timbl 717: 
1.1 timbl 718:      string->size = 0;
1.2 timbl 719:      context->current_attribute_number = INVALID;
1.7 timbl 720:      if (c!='>') {
1.20 frystyk 721:        if (SGML_TRACE && !WHITE(c))
1.29 frystyk 722:          TTYPrint(TDEST,"SGML: `</%s%c' found!\n",
1.7 timbl 723:            string->data, c);
 724:        context->state = S_junk_tag;
 725:      } else {
 726:        context->state = S_text;
 727:      }
1.1 timbl 728:    }
 729:    break;
 730: 
 731:        
 732:   case S_junk_tag:
 733:    if (c=='>') {
 734:      context->state = S_text;
 735:    }
 736:   } /* switch on context->state */
1.26 frystyk 737:   return HT_OK;
 738: }
1.2 timbl 739: 
 740: 
1.31 frystyk 741: PUBLIC int SGML_string (HTStream * context, CONST char* s)
1.2 timbl 742: {
1.26 frystyk 743:   while (*s)
 744:     SGML_character(context, *s++);
 745:   return HT_OK;
1.2 timbl 746: }
 747: 
 748: 
1.31 frystyk 749: PUBLIC int SGML_write (HTStream * context, CONST char* b, int l)
1.2 timbl 750: {
1.26 frystyk 751:   while (l-- > 0)
 752:     SGML_character(context, *b++);
 753:   return HT_OK;
1.2 timbl 754: }
 755: 
 756: /*_______________________________________________________________________
 757: */
 758: 
 759: /*   Structured Object Class
 760: **   -----------------------
 761: */
1.32 frystyk 762: PRIVATE CONST HTStreamClass SGMLParser = 
1.2 timbl 763: {       
1.32 frystyk 764:   "SGMLParser",
 765:   SGML_flush,
 766:   SGML_free,
 767:   SGML_abort,
 768:   SGML_character, 
 769:   SGML_string,
 770:   SGML_write,
1.2 timbl 771: }; 
 772: 
 773: /*   Create SGML Engine
 774: **   ------------------
 775: **
 776: ** On entry,
 777: **   dtd       represents the DTD, along with
 778: **   actions     is the sink for the data as a set of routines.
 779: **
 780: */
1.32 frystyk 781: PUBLIC HTStream * SGML_new (CONST SGML_dtd * dtd, HTStructured * target)
1.2 timbl 782: {
 783:   int i;
1.36 ! frystyk 784:   HTStream* context;
 ! 785:   if ((context = (HTStream *) HT_MALLOC(sizeof(*context))) == NULL)
 ! 786:     HT_OUTOFMEM("SGML_begin");
1.2 timbl 787: 
 788:   context->isa = &SGMLParser;
1.33 frystyk 789:   context->string = HTChunk_new(128);    /* Grow by this much */
1.2 timbl 790:   context->dtd = dtd;
 791:   context->target = target;
 792:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 793:                    /* Ugh: no OO */
 794:   context->state = S_text;
 795:   context->element_stack = 0;            /* empty */
 796: #ifdef CALLERDATA        
 797:   context->callerData = (void*) callerData;
 798: #endif  
 799:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 800: 
 801:   return context;
 802: }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /