[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.41

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.41 ! frystyk 6: **   @(#) $Id: SGML.c,v 1.40 1996年06月02日 00:35:05 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.1 timbl 15: */
 16: 
1.25 frystyk 17: /* Library include files */
1.38 frystyk 18: #include "sysdep.h"
1.1 timbl 19: #include "HTUtils.h"
1.25 frystyk 20: #include "HTString.h"
1.1 timbl 21: #include "HTChunk.h"
1.20 frystyk 22: #include "SGML.h"
1.1 timbl 23: 
1.2 timbl 24: #define INVALID (-1)
 25: 
1.1 timbl 26: /*   The State (context) of the parser
 27: **
1.2 timbl 28: **   This is passed with each call to make the parser reentrant
1.1 timbl 29: **
 30: */
 31: 
1.16 frystyk 32: 
1.2 timbl 33: 
 34:    
 35: /*       Element Stack
 36: **       -------------
 37: **   This allows us to return down the stack reselcting styles.
 38: **   As we return, attribute values will be garbage in general.
 39: */
 40: typedef struct _HTElement HTElement;
 41: struct _HTElement {
 42:    HTElement *   next;  /* Previously nested element or 0 */
 43:    HTTag*     tag;  /* The tag at this level */
 44: };
 45: 
 46: 
1.21 frystyk 47: typedef enum _sgml_state {
 48:   S_text, S_literal, S_tag, S_tag_gap, 
 49:   S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 50:   S_nl, S_nl_tago,
 51:   S_ero, S_cro,
 52: #ifdef ISO_2022_JP
 53:   S_esc, S_dollar, S_paren, S_nonascii_text,
 54: #endif
 55:   S_squoted, S_dquoted, S_end, S_entity, S_junk_tag
 56: } sgml_state;
 57: 
 58: 
1.2 timbl 59: /*   Internal Context Data Structure
 60: **   -------------------------------
 61: */
 62: struct _HTStream {
 63: 
1.38 frystyk 64:   const HTStreamClass *   isa;      /* inherited from HTStream */
1.2 timbl 65:   
1.38 frystyk 66:   const SGML_dtd       *dtd;
1.2 timbl 67:   HTStructuredClass *actions;    /* target class */
 68:   HTStructured    *target;    /* target object */
 69: 
1.1 timbl 70:   HTTag       *current_tag;
1.2 timbl 71:   int        current_attribute_number;
1.1 timbl 72:   HTChunk      *string;
 73:   HTElement     *element_stack;
1.21 frystyk 74:   sgml_state     state;
1.2 timbl 75:   BOOL present[MAX_ATTRIBUTES];   /* Flags: attribute is present? */
 76:   char * value[MAX_ATTRIBUTES];   /* malloc'd strings or NULL if none */
 77: } ;
 78: 
 79: 
 80: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
 81: 
1.1 timbl 82: 
1.17 timbl 83: /*   Find Attribute Number
 84: **   ---------------------
 85: */
 86: 
1.40 frystyk 87: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.17 timbl 88: {
 89:   attr* attributes = tag->attributes;
 90: 
 91:   int high, low, i, diff;      /* Binary search for attribute name */
 92:   for(low=0, high=tag->number_of_attributes;
 93:        high > low ;
 94:        diff < 0 ? (low = i+1) : (high = i) ) {
 95:    i = (low + (high-low)/2);
 96:    diff = strcasecomp(attributes[i].name, s);
 97:    if (diff==0) return i;         /* success: found it */
 98:   } /* for */
 99:   
 100:   return -1;
 101: }
 102: 
1.1 timbl 103: 
 104: /*   Handle Attribute
 105: **   ----------------
 106: */
1.38 frystyk 107: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 108: 
1.38 frystyk 109: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.1 timbl 110: {
1.2 timbl 111: 
 112:   HTTag * tag = context->current_tag;
 113: 
1.17 timbl 114:   int i = SGMLFindAttribute(tag, s);
 115:   if (i>=0) {
 116:    context->current_attribute_number = i;
 117:    context->present[i] = YES;
 118:    if (context->value[i]) {
1.36 frystyk 119:      HT_FREE(context->value[i]);
1.17 timbl 120:      context->value[i] = NULL;
 121:    }
 122:    return;
 123:   } /* if */
1.2 timbl 124:    
1.20 frystyk 125:   if (SGML_TRACE)
1.41 ! frystyk 126:    HTTrace("SGML Parser. Unknown attribute %s for tag %s\n",
1.2 timbl 127:      s, context->current_tag->name);
 128:   context->current_attribute_number = INVALID;    /* Invalid */
1.1 timbl 129: }
 130: 
 131: 
 132: /*   Handle attribute value
 133: **   ----------------------
 134: */
1.38 frystyk 135: PRIVATE void handle_attribute_value (HTStream * context, const char * s)
1.1 timbl 136: {
1.2 timbl 137:   if (context->current_attribute_number != INVALID) {
 138:    StrAllocCopy(context->value[context->current_attribute_number], s);
1.1 timbl 139:   } else {
1.41 ! frystyk 140:     if (SGML_TRACE) HTTrace("SGML Parser. Attribute value %s ignored\n", s);
1.1 timbl 141:   }
1.2 timbl 142:   context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143: }
 144: 
1.2 timbl 145: 
1.1 timbl 146: /*   Handle entity
 147: **   -------------
 148: **
 149: ** On entry,
 150: **   s    contains the entity name zero terminated
 151: ** Bugs:
 152: **   If the entity name is unknown, the terminator is treated as
 153: **   a printable non-special character in all cases, even if it is '<'
 154: */
1.31 frystyk 155: PRIVATE void handle_entity (HTStream * context, char term)
1.1 timbl 156: {
1.2 timbl 157: 
1.38 frystyk 158:   const char ** entities = context->dtd->entity_names;
 159:   const char *s = context->string->data;
1.2 timbl 160:   
 161:   int high, low, i, diff;
 162:   for(low=0, high = context->dtd->number_of_entities;
 163:        high > low ;
 164:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 165:    i = (low + (high-low)/2);
 166:    diff = strcmp(entities[i], s); /* Csse sensitive! */
 167:    if (diff==0) {         /* success: found it */
 168:      (*context->actions->put_entity)(context->target, i);
 169:      return;
1.1 timbl 170:    }
 171:   }
 172:   /* If entity string not found, display as text */
1.20 frystyk 173:   if (SGML_TRACE)
1.41 ! frystyk 174:    HTTrace("SGML Parser. Unknown entity %s\n", s); 
1.2 timbl 175:   PUTC('&');
1.1 timbl 176:   {
1.38 frystyk 177:    const char *p;
1.1 timbl 178:    for (p=s; *p; p++) {
1.2 timbl 179:      PUTC(*p);
1.1 timbl 180:    }
 181:   }
1.2 timbl 182:   PUTC(term);
1.1 timbl 183: }
 184: 
1.35 frystyk 185: /*
 186: **   Helper function to check if the tag is on the stack
 187: */
 188: PRIVATE BOOL lookup_element_stack (HTElement* stack, HTTag *tag)
 189: {
 190:   HTElement* elem;
 191:   for (elem = stack; elem != NULL; elem = elem->next)
 192:   {
 193:     if (elem->tag == tag) return YES;
 194:   }
 195:   return NO;
 196: }
1.2 timbl 197: 
1.1 timbl 198: /*   End element
1.2 timbl 199: **   -----------
1.1 timbl 200: */
1.31 frystyk 201: PRIVATE void end_element (HTStream * context, HTTag * old_tag)
1.1 timbl 202: {
1.41 ! frystyk 203:   if (SGML_TRACE) HTTrace("SGML Parser. End  </%s>\n", old_tag->name);
1.2 timbl 204:   if (old_tag->contents == SGML_EMPTY) {
1.41 ! frystyk 205:     if (SGML_TRACE) HTTrace("SGML Parser. Illegal end tag </%s> found.\n",
1.1 timbl 206:        old_tag->name);
 207:    return;
 208:   }
 209:   while (context->element_stack)   {/* Loop is error path only */
 210:    HTElement * N = context->element_stack;
 211:    HTTag * t = N->tag;
 212:    
 213:    if (old_tag != t) {       /* Mismatch: syntax error */
1.35 frystyk 214:      /*
 215:      ** Patch from Maciej Puzio, puzio@laser.mimuw.edu.pl
 216:      ** See explanation in ../User/Patch/lib_4.0_1.fix
 217:      */
 218:       if (context->element_stack->next  /* This is not the last level */
 219:        && lookup_element_stack(context->element_stack, old_tag)) {
1.37 eric 220:        if (SGML_TRACE) HTTrace(
1.41 ! frystyk 221:        "SGML Parser. Found </%s> when expecting </%s>. </%s> assumed.\n",
1.1 timbl 222:          old_tag->name, t->name, t->name);
 223:      } else {          /* last level */
1.37 eric 224:        if (SGML_TRACE) HTTrace(
1.41 ! frystyk 225:          "SGML Parser. Found </%s> when expecting </%s>. </%s> Ignored.\n",
1.1 timbl 226:          old_tag->name, t->name, old_tag->name);
 227:        return;         /* Ignore */
 228:      }
 229:    }
 230:    
 231:    context->element_stack = N->next;        /* Remove from stack */
1.36 frystyk 232:    HT_FREE(N);
1.2 timbl 233:    (*context->actions->end_element)(context->target,
 234:         t - context->dtd->tags);
1.1 timbl 235:    if (old_tag == t) return; /* Correct sequence */
 236:    
 237:    /* Syntax error path only */
 238:    
 239:   }
1.37 eric 240:   if (SGML_TRACE) HTTrace(
1.41 ! frystyk 241:    "SGML Parser. Extra end tag </%s> found and ignored.\n", old_tag->name);
1.1 timbl 242: }
 243: 
 244: 
1.17 timbl 245: /*   Start an element
 246: **   ----------------
1.1 timbl 247: */
1.31 frystyk 248: PRIVATE void start_element (HTStream * context)
1.1 timbl 249: {
 250:   HTTag * new_tag = context->current_tag;
 251:   
1.41 ! frystyk 252:   if (SGML_TRACE) HTTrace("SGML Parser. Start <%s>\n", new_tag->name);
1.2 timbl 253:   (*context->actions->start_element)(
 254:    context->target,
 255:    new_tag - context->dtd->tags,
 256:    context->present,
1.38 frystyk 257:    (const char**) context->value); /* coerce type for think c */
1.2 timbl 258:   if (new_tag->contents != SGML_EMPTY) {       /* i.e. tag not empty */
1.36 frystyk 259:    HTElement * N;
 260:    if ((N = (HTElement *) HT_MALLOC(sizeof(HTElement))) == NULL)
 261:      HT_OUTOFMEM("start_element");
1.1 timbl 262:    N->next = context->element_stack;
 263:    N->tag = new_tag;
 264:    context->element_stack = N;
 265:   }
 266: }
 267: 
 268: 
1.2 timbl 269: /*       Find Tag in DTD tag list
 270: **       ------------------------
1.1 timbl 271: **
 272: ** On entry,
1.2 timbl 273: **   dtd   points to dtd structire including valid tag list
 274: **   string points to name of tag in question
1.1 timbl 275: **
1.2 timbl 276: ** On exit,
 277: **   returns:
1.7 timbl 278: **       NULL      tag not found
 279: **       else      address of tag structure in dtd
1.2 timbl 280: */
1.40 frystyk 281: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.2 timbl 282: {
 283:   int high, low, i, diff;
 284:   for(low=0, high=dtd->number_of_tags;
 285:        high > low ;
 286:        diff < 0 ? (low = i+1) : (high = i))  { /* Binary serach */
 287:    i = (low + (high-low)/2);
1.3 timbl 288:    diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
1.2 timbl 289:    if (diff==0) {         /* success: found it */
1.7 timbl 290:      return &dtd->tags[i];
1.2 timbl 291:    }
 292:   }
1.7 timbl 293:   return NULL;
1.2 timbl 294: }
 295: 
 296: /*________________________________________________________________________
 297: **           Public Methods
1.1 timbl 298: */
 299: 
1.2 timbl 300: 
 301: /*   Could check that we are back to bottom of stack! @@ */
1.40 frystyk 302: PRIVATE int SGML_flush (HTStream * context)
1.26 frystyk 303: {
 304:   while (context->element_stack) {
 305:    HTElement *ptr = context->element_stack;
 306:    if (SGML_TRACE)
1.37 eric 307:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 308:          context->element_stack->tag->name);
 309:    context->element_stack = ptr->next;
1.36 frystyk 310:    HT_FREE(ptr);
1.26 frystyk 311:   }
 312:   return (*context->actions->flush)(context->target);
 313: }
1.1 timbl 314: 
1.40 frystyk 315: PRIVATE int SGML_free (HTStream * context)
1.8 timbl 316: {
1.26 frystyk 317:   int status;
1.14 frystyk 318:   int cnt;
1.15 frystyk 319:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 320:    HTElement *ptr = context->element_stack;
 321: 
1.26 frystyk 322:    if (SGML_TRACE)
1.37 eric 323:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 324:          context->element_stack->tag->name);
1.15 frystyk 325:    context->element_stack = ptr->next;
1.36 frystyk 326:    HT_FREE(ptr);
1.15 frystyk 327:   }
1.26 frystyk 328:   if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 329:    return status;
1.33 frystyk 330:   HTChunk_delete(context->string);
1.15 frystyk 331:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)        /* Leak fix Henrik 18/02-94 */
1.14 frystyk 332:    if(context->value[cnt])
1.36 frystyk 333:      HT_FREE(context->value[cnt]);
 334:   HT_FREE(context);
1.26 frystyk 335:   return HT_OK;
1.1 timbl 336: }
 337: 
1.40 frystyk 338: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.1 timbl 339: {
1.14 frystyk 340:   int cnt;
1.15 frystyk 341:   while (context->element_stack) {  /* Make sure, that all tags are gone */
 342:    HTElement *ptr = context->element_stack;
1.26 frystyk 343:    if (SGML_TRACE)
1.37 eric 344:      HTTrace("SGML........ Non-matched tag found: <%s>\n",
1.26 frystyk 345:          context->element_stack->tag->name);
1.15 frystyk 346:    context->element_stack = ptr->next;
1.36 frystyk 347:    HT_FREE(ptr);
1.15 frystyk 348:   }
1.8 timbl 349:   (*context->actions->abort)(context->target, e);
1.33 frystyk 350:   HTChunk_delete(context->string);
1.14 frystyk 351:   for(cnt=0; cnt<MAX_ATTRIBUTES; cnt++)       /* Leak fix Henrik 18/02-94 */
 352:    if(context->value[cnt])
1.36 frystyk 353:      HT_FREE(context->value[cnt]);
 354:   HT_FREE(context);
1.26 frystyk 355:   return HT_ERROR;
1.1 timbl 356: }
 357: 
1.41 ! frystyk 358: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.1 timbl 359: {
1.38 frystyk 360:   const SGML_dtd   *dtd  =    context->dtd;
1.1 timbl 361:   HTChunk  *string =    context->string;
 362: 
1.41 ! frystyk 363:   while (l-- > 0) {
 ! 364:    char c = *b++;
 ! 365:    switch(context->state) {
1.18 timbl 366:   
1.41 ! frystyk 367:    case S_after_open:   /* Strip one trainling newline
 ! 368:             only after opening nonempty element. - SGML: Ugh! */
1.18 timbl 369:     if (c=='\n' && (context->current_tag->contents != SGML_EMPTY)) {
 370:      break;
 371:    }
 372:    context->state = S_text;
 373:    goto normal_text;
 374:    /* (***falls through***) */
 375:    
1.1 timbl 376:   case S_text:
1.18 timbl 377: normal_text:
 378: 
1.13 timbl 379: #ifdef ISO_2022_JP
 380:    if (c=='033円') {
 381:      context->state = S_esc;
 382:      PUTC(c);
 383:      break;
 384:    }
 385: #endif /* ISO_2022_JP */
1.6 timbl 386:    if (c=='&' && (!context->element_stack || (
 387:             context->element_stack->tag &&
 388:             ( context->element_stack->tag->contents == SGML_MIXED
 389:              || context->element_stack->tag->contents ==
 390:                             SGML_RCDATA)
 391:            ))) {
1.1 timbl 392:      string->size = 0;
 393:      context->state = S_ero;
 394:      
 395:    } else if (c=='<') {
 396:      string->size = 0;
 397:      context->state = (context->element_stack &&
1.13 timbl 398:        context->element_stack->tag &&
 399:        context->element_stack->tag->contents == SGML_LITERAL) ?
1.12 timbl 400:                S_literal : S_tag;
1.18 timbl 401:    } else if (c=='\n') {  /* Newline - ignore if before tag end! */
 402:      context->state = S_nl;
1.2 timbl 403:    } else PUTC(c);
1.1 timbl 404:    break;
1.13 timbl 405: 
1.18 timbl 406:   case S_nl:
 407:     if (c=='<') {
 408:      string->size = 0;
 409:      context->state = (context->element_stack &&
 410:        context->element_stack->tag &&
 411:        context->element_stack->tag->contents == SGML_LITERAL) ?
 412:                S_literal : S_nl_tago;
 413:    } else {
 414:      PUTC('\n');
 415:      context->state = S_text;
 416:      goto normal_text;
 417:    }
 418:    break;
 419: 
 420:   case S_nl_tago:      /* Had newline and tag opener */
 421:     if (c != '/') {
 422:      PUTC('\n');     /* Only ignore newline before </ */
 423:    }
 424:    context->state = S_tag;
 425:    goto handle_S_tag;
 426: 
1.13 timbl 427: #ifdef ISO_2022_JP
 428:   case S_esc:
 429:    if (c=='$') {
 430:      context->state = S_dollar;
 431:    } else if (c=='(') {
 432:      context->state = S_paren;
 433:    } else {
 434:      context->state = S_text;
 435:    }
 436:    PUTC(c);
 437:    break;
 438:   case S_dollar:
 439:    if (c=='@' || c=='B') {
 440:      context->state = S_nonascii_text;
 441:    } else {
 442:      context->state = S_text;
 443:    }
 444:    PUTC(c);
 445:    break;
 446:   case S_paren:
 447:    if (c=='B' || c=='J') {
 448:      context->state = S_text;
 449:    } else {
 450:      context->state = S_text;
 451:    }
 452:    PUTC(c);
 453:    break;
 454:   case S_nonascii_text:
 455:    if (c=='033円') {
 456:      context->state = S_esc;
 457:      PUTC(c);
 458:    } else {
 459:      PUTC(c);
 460:    }
 461:    break;
 462: #endif /* ISO_2022_JP */
1.1 timbl 463: 
1.12 timbl 464: /*   In literal mode, waits only for specific end tag!
1.2 timbl 465: **   Only foir compatibility with old servers.
1.1 timbl 466: */
1.12 timbl 467:   case S_literal :
1.33 frystyk 468:    HTChunk_putc(string, c);
1.1 timbl 469:    if ( TOUPPER(c) != ((string->size ==1) ? '/'
 470:        : context->element_stack->tag->name[string->size-2])) {
 471:      int i;
 472:      
1.12 timbl 473:      /* If complete match, end literal */
1.1 timbl 474:      if ((c=='>') && (!context->element_stack->tag->name[string->size-2])) {
 475:        end_element(context, context->element_stack->tag);
 476:        string->size = 0;
1.2 timbl 477:        context->current_attribute_number = INVALID;
1.1 timbl 478:        context->state = S_text;
 479:        break;
 480:      }      /* If Mismatch: recover string. */
1.2 timbl 481:      PUTC( '<');
1.1 timbl 482:      for (i=0; i<string->size; i++)   /* recover */
1.2 timbl 483:        PUTC(
1.1 timbl 484:                       string->data[i]);
 485:      context->state = S_text;  
 486:    }
 487:    
 488:     break;
 489: 
 490: /*   Character reference or Entity
 491: */
 492:  case S_ero:
 493:    if (c=='#') {
 494:      context->state = S_cro; /*  &# is Char Ref Open */ 
 495:      break;
 496:    }
 497:    context->state = S_entity;  /* Fall through! */
 498:    
 499: /*   Handle Entities
 500: */
 501:   case S_entity:
 502:    if (isalnum(c))
1.33 frystyk 503:      HTChunk_putc(string, c);
1.1 timbl 504:    else {
1.33 frystyk 505:      HTChunk_terminate(string);
1.1 timbl 506:      handle_entity(context, c);
 507:      context->state = S_text;
 508:    }
 509:    break;
 510: 
 511: /*   Character reference
 512: */
 513:   case S_cro:
 514:    if (isalnum(c))
1.33 frystyk 515:      HTChunk_putc(string, c);  /* accumulate a character NUMBER */
1.1 timbl 516:    else {
 517:      int value;
1.33 frystyk 518:      HTChunk_terminate(string);
1.1 timbl 519:      if (sscanf(string->data, "%d", &value)==1)
1.28 frystyk 520:        PUTC((char) value);
1.1 timbl 521:      context->state = S_text;
 522:    }
 523:    break;
 524: 
 525: /*       Tag
 526: */     
 527:   case S_tag:                /* new tag */
1.18 timbl 528: handle_S_tag:
 529: 
1.1 timbl 530:    if (isalnum(c))
1.33 frystyk 531:      HTChunk_putc(string, c);
1.1 timbl 532:    else {             /* End of tag name */
1.7 timbl 533:      HTTag * t;
1.1 timbl 534:      if (c=='/') {
1.20 frystyk 535:        if (SGML_TRACE) if (string->size!=0)
1.41 ! frystyk 536:          HTTrace("SGML Parser. `<%s/' found!\n", string->data);
1.1 timbl 537:        context->state = S_end;
 538:        break;
 539:      }
1.33 frystyk 540:      HTChunk_terminate(string) ;
1.2 timbl 541: 
1.10 timbl 542:      t = SGMLFindTag(dtd, string->data);
1.7 timbl 543:      if (!t) {
1.41 ! frystyk 544:        if(SGML_TRACE) HTTrace("SGML Parser. *** Unknown element %s\n",
1.1 timbl 545:            string->data);
 546:        context->state = (c=='>') ? S_text : S_junk_tag;
 547:        break;
 548:      }
1.7 timbl 549:      context->current_tag = t;
1.2 timbl 550:      
 551:      /* Clear out attributes
 552:      */
1.1 timbl 553:      
1.2 timbl 554:      {
 555:        int i;
 556:        for (i=0; i< context->current_tag->number_of_attributes; i++)
 557:          context->present[i] = NO;
1.1 timbl 558:      }
 559:      string->size = 0;
1.2 timbl 560:      context->current_attribute_number = INVALID;
1.1 timbl 561:      
 562:      if (c=='>') {
 563:        if (context->current_tag->name) start_element(context);
1.18 timbl 564:        context->state = S_after_open;
1.1 timbl 565:      } else {
 566:        context->state = S_tag_gap;
 567:      }
 568:    }
 569:    break;
 570: 
 571:        
 572:   case S_tag_gap:      /* Expecting attribute or > */
 573:    if (WHITE(c)) break;  /* Gap between attributes */
 574:    if (c=='>') {      /* End of tag */
 575:      if (context->current_tag->name) start_element(context);
1.18 timbl 576:      context->state = S_after_open;
1.1 timbl 577:      break;
 578:    }
1.33 frystyk 579:    HTChunk_putc(string, c);
1.1 timbl 580:    context->state = S_attr;        /* Get attribute */
 581:    break;
 582:    
 583:                /* accumulating value */
 584:   case S_attr:
 585:    if (WHITE(c) || (c=='>') || (c=='=')) {     /* End of word */
1.33 frystyk 586:      HTChunk_terminate(string) ;
1.1 timbl 587:      handle_attribute_name(context, string->data);
 588:      string->size = 0;
 589:      if (c=='>') {        /* End of tag */
 590:        if (context->current_tag->name) start_element(context);
1.18 timbl 591:        context->state = S_after_open;
1.1 timbl 592:        break;
 593:      }
 594:      context->state = (c=='=' ? S_equals: S_attr_gap);
 595:    } else {
1.33 frystyk 596:      HTChunk_putc(string, c);
1.1 timbl 597:    }
 598:    break;
 599:        
 600:   case S_attr_gap:      /* Expecting attribute or = or > */
 601:    if (WHITE(c)) break;  /* Gap after attribute */
 602:    if (c=='>') {      /* End of tag */
 603:      if (context->current_tag->name) start_element(context);
1.18 timbl 604:      context->state = S_after_open;
1.1 timbl 605:      break;
 606:    } else if (c=='=') {
 607:      context->state = S_equals;
 608:      break;
 609:    }
1.33 frystyk 610:    HTChunk_putc(string, c);
1.1 timbl 611:    context->state = S_attr;        /* Get next attribute */
 612:    break;
 613:    
 614:   case S_equals:           /* After attr = */ 
 615:    if (WHITE(c)) break;  /* Before attribute value */
 616:    if (c=='>') {      /* End of tag */
1.41 ! frystyk 617:      if (SGML_TRACE) HTTrace("SGML Parser. found = but no value\n");
1.1 timbl 618:      if (context->current_tag->name) start_element(context);
1.18 timbl 619:      context->state = S_after_open;
1.1 timbl 620:      break;
 621:      
 622:    } else if (c=='\'') {
 623:      context->state = S_squoted;
 624:      break;
 625: 
 626:    } else if (c=='"') {
 627:      context->state = S_dquoted;
 628:      break;
 629:    }
1.33 frystyk 630:    HTChunk_putc(string, c);
1.1 timbl 631:    context->state = S_value;
 632:    break;
 633:    
 634:   case S_value:
 635:    if (WHITE(c) || (c=='>')) {       /* End of word */
1.33 frystyk 636:      HTChunk_terminate(string) ;
1.1 timbl 637:      handle_attribute_value(context, string->data);
 638:      string->size = 0;
 639:      if (c=='>') {        /* End of tag */
 640:        if (context->current_tag->name) start_element(context);
1.18 timbl 641:        context->state = S_after_open;
1.1 timbl 642:        break;
 643:      }
 644:      else context->state = S_tag_gap;
 645:    } else {
1.33 frystyk 646:      HTChunk_putc(string, c);
1.1 timbl 647:    }
 648:    break;
 649:        
 650:   case S_squoted:      /* Quoted attribute value */
 651:    if (c=='\'') {     /* End of attribute value */
1.33 frystyk 652:      HTChunk_terminate(string) ;
1.1 timbl 653:      handle_attribute_value(context, string->data);
 654:      string->size = 0;
 655:      context->state = S_tag_gap;
 656:    } else {
1.33 frystyk 657:      HTChunk_putc(string, c);
1.1 timbl 658:    }
 659:    break;
 660:    
 661:   case S_dquoted:      /* Quoted attribute value */
 662:    if (c=='"') {      /* End of attribute value */
1.33 frystyk 663:      HTChunk_terminate(string) ;
1.1 timbl 664:      handle_attribute_value(context, string->data);
 665:      string->size = 0;
 666:      context->state = S_tag_gap;
 667:    } else {
1.33 frystyk 668:      HTChunk_putc(string, c);
1.1 timbl 669:    }
 670:    break;
 671:    
 672:   case S_end:                    /* </ */
 673:    if (isalnum(c))
1.33 frystyk 674:      HTChunk_putc(string, c);
1.1 timbl 675:    else {             /* End of end tag name */
1.7 timbl 676:      HTTag * t;
1.33 frystyk 677:      HTChunk_terminate(string) ;
1.7 timbl 678:      if (!*string->data) {    /* Empty end tag */
 679:        t = context->element_stack->tag;
 680:      } else {
1.10 timbl 681:        t = SGMLFindTag(dtd, string->data);
1.1 timbl 682:      }
1.7 timbl 683:      if (!t) {
1.37 eric 684:        if(SGML_TRACE) HTTrace(
1.1 timbl 685:          "Unknown end tag </%s>\n", string->data); 
1.2 timbl 686:      } else {
1.7 timbl 687:        context->current_tag = t;
1.2 timbl 688:        end_element( context, context->current_tag);
1.1 timbl 689:      }
1.2 timbl 690: 
1.1 timbl 691:      string->size = 0;
1.2 timbl 692:      context->current_attribute_number = INVALID;
1.7 timbl 693:      if (c!='>') {
1.20 frystyk 694:        if (SGML_TRACE && !WHITE(c))
1.41 ! frystyk 695:          HTTrace("SGML Parser. `</%s%c' found!\n",
1.7 timbl 696:            string->data, c);
 697:        context->state = S_junk_tag;
 698:      } else {
 699:        context->state = S_text;
 700:      }
1.1 timbl 701:    }
 702:    break;
 703: 
 704:        
 705:   case S_junk_tag:
 706:    if (c=='>') {
 707:      context->state = S_text;
 708:    }
 709:   } /* switch on context->state */
1.41 ! frystyk 710:  }
1.26 frystyk 711:   return HT_OK;
 712: }
1.2 timbl 713: 
 714: 
1.40 frystyk 715: PRIVATE int SGML_string (HTStream * context, const char* s)
1.2 timbl 716: {
1.41 ! frystyk 717:   return SGML_write(context, s, (int) strlen(s));
1.2 timbl 718: }
 719: 
 720: 
1.41 ! frystyk 721: PRIVATE int SGML_character (HTStream * context, char c)
1.2 timbl 722: {
1.41 ! frystyk 723:   return SGML_write(context, &c, 1);
1.2 timbl 724: }
 725: 
 726: /*_______________________________________________________________________
 727: */
 728: 
 729: /*   Structured Object Class
 730: **   -----------------------
 731: */
1.38 frystyk 732: PRIVATE const HTStreamClass SGMLParser = 
1.2 timbl 733: {       
1.32 frystyk 734:   "SGMLParser",
 735:   SGML_flush,
 736:   SGML_free,
 737:   SGML_abort,
 738:   SGML_character, 
 739:   SGML_string,
 740:   SGML_write,
1.2 timbl 741: }; 
 742: 
 743: /*   Create SGML Engine
 744: **   ------------------
 745: **
 746: ** On entry,
 747: **   dtd       represents the DTD, along with
 748: **   actions     is the sink for the data as a set of routines.
 749: **
 750: */
1.38 frystyk 751: PUBLIC HTStream * SGML_new (const SGML_dtd * dtd, HTStructured * target)
1.2 timbl 752: {
 753:   int i;
1.36 frystyk 754:   HTStream* context;
1.40 frystyk 755:   if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
1.36 frystyk 756:     HT_OUTOFMEM("SGML_begin");
1.2 timbl 757: 
 758:   context->isa = &SGMLParser;
1.33 frystyk 759:   context->string = HTChunk_new(128);    /* Grow by this much */
1.2 timbl 760:   context->dtd = dtd;
 761:   context->target = target;
 762:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 763:                    /* Ugh: no OO */
 764:   context->state = S_text;
 765:   context->element_stack = 0;            /* empty */
 766:   for(i=0; i<MAX_ATTRIBUTES; i++) context->value[i] = 0;
 767: 
 768:   return context;
 769: }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /