[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.42

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.42 ! frystyk 6: **   @(#) $Id: SGML.c,v 1.41 1996年07月02日 22:55:21 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.42 ! frystyk 15: **   Nov 1996  msa Strip down the parser to minimal HTML tokenizer,
 ! 16: **           Stop allocating space for the attribute values,
 ! 17: **           use pointers to the string chunk instead.
1.1 timbl 18: */
 19: 
1.25 frystyk 20: /* Library include files */
1.38 frystyk 21: #include "sysdep.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26: 
1.2 timbl 27: #define INVALID (-1)
 28: 
1.1 timbl 29: /*   The State (context) of the parser
 30: **
1.2 timbl 31: **   This is passed with each call to make the parser reentrant
1.1 timbl 32: **
 33: */
1.42 ! frystyk 34: typedef enum _sgml_state
 ! 35:   {
 ! 36:    S_text, S_literal, S_tag, S_tag_gap, 
 ! 37:    S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 ! 38:    S_nl, S_nl_tago,
 ! 39:    S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 ! frystyk 41:    S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 ! frystyk 43:    S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
 ! 44:    S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2
 ! 45:   } sgml_state;
1.21 frystyk 46: 
 47: 
1.2 timbl 48: /*   Internal Context Data Structure
 49: **   -------------------------------
 50: */
1.42 ! frystyk 51: struct _HTStream
 ! 52:   {
 ! 53:    const HTStreamClass *isa;    /* inherited from HTStream */
 ! 54:    const SGML_dtd *dtd;
 ! 55:    HTStructuredClass *actions;   /* target class */
 ! 56:    HTStructured *target;      /* target object */
1.2 timbl 57: 
1.42 ! frystyk 58:    HTTag *current_tag;
 ! 59:    int current_attribute_number;
 ! 60:    SGMLContent contents;      /* current content mode */
 ! 61:    HTChunk *string;
 ! 62:    int token;           /* ptr into string buffer */
 ! 63:    sgml_state state;
 ! 64:    BOOL present[MAX_ATTRIBUTES];  /* Flags: attribute is present? */
 ! 65:    int value[MAX_ATTRIBUTES];   /* Offset pointers to the string */
 ! 66:   };
1.2 timbl 67: 
 68: 
 69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 ! frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71: 
1.42 ! frystyk 72: #define TRACE1(f,a) \
 ! 73:    do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a); } while(0)
 ! 74: #define TRACE2(f,a,b) \
 ! 75:    do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a,b); } while(0)
1.1 timbl 76: 
1.17 timbl 77: /*   Find Attribute Number
 78: **   ---------------------
 79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 ! frystyk 81:   {
 ! 82:    attr* attributes = tag->attributes;
1.17 timbl 83: 
1.42 ! frystyk 84:    int high, low, i, diff;     /* Binary search for attribute name */
 ! 85:    for(low=0, high=tag->number_of_attributes;
 ! 86:      high > low ;
 ! 87:      diff < 0 ? (low = i+1) : (high = i) )
 ! 88:      {
 ! 89:        i = (low + (high-low)/2);
 ! 90:        diff = strcasecomp(attributes[i].name, s);
 ! 91:        if (diff==0)
 ! 92:            return i;    /* success: found it */
 ! 93:      }
 ! 94:    return -1;
 ! 95:   }
1.17 timbl 96: 
1.1 timbl 97: 
 98: /*   Handle Attribute
 99: **   ----------------
 100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 102: 
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 ! frystyk 104:   {
 ! 105:    HTTag * tag = context->current_tag;
1.2 timbl 106: 
1.42 ! frystyk 107:    /* Note: if tag==NULL, we are skipping unknown tag... */
 ! 108:    if (tag)
 ! 109:      {
 ! 110:        int i = SGMLFindAttribute(tag, s);
 ! 111:        if (i >= 0)
 ! 112:          {
 ! 113:            context->current_attribute_number = i;
 ! 114:            context->present[i] = YES;
 ! 115:            return;
 ! 116:          }
 ! 117:        TRACE2("Unknown attribute %s for tag %s\n",
 ! 118:            s, context->current_tag->name);
 ! 119:      }
 ! 120:    context->current_attribute_number = INVALID;  /* Invalid */
 ! 121:   }
1.2 timbl 122: 
1.1 timbl 123: 
 124: /*   Handle attribute value
 125: **   ----------------------
 126: */
1.42 ! frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
 ! 128:   {
 ! 129:    /* Deal with attributes only if tag is known,
 ! 130:      ignore silently otherwise */
 ! 131: 
 ! 132:    if (context->current_tag)
 ! 133:      {
 ! 134:        if (context->current_attribute_number != INVALID)
 ! 135:            context->value[context->current_attribute_number] =
 ! 136:                context->token;
 ! 137:        else
 ! 138:            TRACE1("Attribute value %s ignored\n",
 ! 139:                context->string->data + context->token);
 ! 140: 
 ! 141:      }
 ! 142:    context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143:   }
 144: 
 145: /*   Handle entity
 146: **   -------------
 147: **
 148: ** On entry,
 149: **   s    contains the entity name zero terminated
 150: */
1.42 ! frystyk 151: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 152:   {
1.42 ! frystyk 153:    const char ** entities = context->dtd->entity_names;
 ! 154:    const char *s = context->string->data;
1.1 timbl 155: 
1.42 ! frystyk 156:    int high, low, i, diff;
 ! 157:    for(low=0, high = context->dtd->number_of_entities;
 ! 158:      high > low ;
 ! 159:      diff < 0 ? (low = i+1) : (high = i))
 ! 160:      {
 ! 161:        i = (low + (high-low)/2);
 ! 162:        diff = strcmp(entities[i], s); /* Case sensitive! */
 ! 163:        if (diff==0)
 ! 164:          {  /* success: found it */
 ! 165:            (*context->actions->put_entity)(context->target, i);
 ! 166:            return;
 ! 167:          }
 ! 168:      }
 ! 169:    /* If entity string not found, display as text */
 ! 170:    TRACE1("Unknown entity %s\n", s);
 ! 171:    PUTC('&');
 ! 172:      {
 ! 173:        const char *p;
 ! 174:        for (p=s; *p; p++)
 ! 175:            PUTC(*p);
 ! 176:      }
1.35 frystyk 177:   }
1.2 timbl 178: 
1.1 timbl 179: /*   End element
1.2 timbl 180: **   -----------
1.1 timbl 181: */
1.42 ! frystyk 182: PRIVATE void end_element (HTStream * context, HTTag *tag)
 ! 183:   {
 ! 184:    TRACE1("End  </%s>\n", tag->name);
 ! 185:    (*context->actions->end_element)
 ! 186:        (context->target, tag - context->dtd->tags);
1.1 timbl 187:   }
 188: 
1.17 timbl 189: /*   Start an element
 190: **   ----------------
1.1 timbl 191: */
1.31 frystyk 192: PRIVATE void start_element (HTStream * context)
1.42 ! frystyk 193:   {
 ! 194:    int i;
 ! 195:    char *value[MAX_ATTRIBUTES];
 ! 196:    HTTag *tag = context->current_tag;
 ! 197: 
 ! 198:    TRACE1("Start <%s>\n", tag->name);
 ! 199:    context->contents = tag->contents;
 ! 200: 
 ! 201:    /*
 ! 202:    ** Build the actual pointers to the value strings stored in the
 ! 203:    ** chunk buffer. (Must use offsets while collecting the values,
 ! 204:    ** because the string chunk may get resized during the collection
 ! 205:    ** and potentially relocated).
 ! 206:    */
 ! 207:    for (i = 0; i < MAX_ATTRIBUTES; ++i)
 ! 208:        value[i] = context->value[i] < 0 ? NULL :
 ! 209:            context->string->data + context->value[i];
 ! 210:    (*context->actions->start_element)
 ! 211:        (context->target,
 ! 212:         tag - context->dtd->tags,
 ! 213:         context->present,
 ! 214:         (const char**)value); /* coerce type for think c */
1.1 timbl 215:   }
 216: 
 217: 
1.2 timbl 218: /*       Find Tag in DTD tag list
 219: **       ------------------------
1.1 timbl 220: **
 221: ** On entry,
1.2 timbl 222: **   dtd   points to dtd structire including valid tag list
 223: **   string points to name of tag in question
1.1 timbl 224: **
1.2 timbl 225: ** On exit,
 226: **   returns:
1.7 timbl 227: **       NULL      tag not found
 228: **       else      address of tag structure in dtd
1.2 timbl 229: */
1.40 frystyk 230: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 ! frystyk 231:   {
 ! 232:    int high, low, i, diff;
 ! 233:    for(low=0, high=dtd->number_of_tags;
 ! 234:      high > low ;
 ! 235:      diff < 0 ? (low = i+1) : (high = i))
 ! 236:      { /* Binary serach */
 ! 237:        i = (low + (high-low)/2);
 ! 238:        diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
 ! 239:        if (diff==0)
 ! 240:            /* success: found it */
 ! 241:            return &dtd->tags[i];
 ! 242:      }
 ! 243:    return NULL;
1.2 timbl 244:   }
 245: 
 246: /*________________________________________________________________________
 247: **           Public Methods
1.1 timbl 248: */
 249: 
1.2 timbl 250: 
 251: /*   Could check that we are back to bottom of stack! @@ */
1.40 frystyk 252: PRIVATE int SGML_flush (HTStream * context)
1.42 ! frystyk 253:   {
 ! 254:    return (*context->actions->flush)(context->target);
1.26 frystyk 255:   }
1.1 timbl 256: 
1.40 frystyk 257: PRIVATE int SGML_free (HTStream * context)
1.42 ! frystyk 258:   {
 ! 259:    int status;
1.15 frystyk 260: 
1.42 ! frystyk 261:    if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 ! 262:        return status;
 ! 263:    HTChunk_delete(context->string);
 ! 264:    HT_FREE(context);
 ! 265:    return HT_OK;
1.15 frystyk 266:   }
1.1 timbl 267: 
1.40 frystyk 268: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 ! frystyk 269:   {
 ! 270:    (*context->actions->abort)(context->target, e);
 ! 271:    HTChunk_delete(context->string);
 ! 272:    HT_FREE(context);
 ! 273:    return HT_ERROR;
1.15 frystyk 274:   }
1.1 timbl 275: 
1.41 frystyk 276: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 ! frystyk 277:   {
 ! 278:    const SGML_dtd *dtd = context->dtd;
 ! 279:    HTChunk *string = context->string;
 ! 280:    const char *text = b;
 ! 281:    int count = 0;
1.18 timbl 282:    
1.42 ! frystyk 283:    while (l-- > 0)
 ! 284:      {
 ! 285:        char c = *b++;
 ! 286:        switch(context->state)
 ! 287:          {
 ! 288:          got_element_open:
 ! 289:            /*
 ! 290:            ** The label is jumped when the '>' of a the element
 ! 291:            ** start tag has been detected. This DOES NOT FALL TO
 ! 292:            ** THE CODE S_after_open, only processes the tag and
 ! 293:            ** sets the state (c should still contain the
 ! 294:            ** terminating character of the tag ('>'))
 ! 295:            */
 ! 296:            if (context->current_tag && context->current_tag->name)
 ! 297:                start_element(context);
 ! 298:            context->state = S_after_open;
 ! 299:            break;
1.18 timbl 300: 
1.42 ! frystyk 301:          case S_after_open:
 ! 302:            /*
 ! 303:            ** State S_after_open is entered only for single
 ! 304:            ** character after the element opening tag to test
 ! 305:            ** against newline. Strip one trainling newline only
 ! 306:            ** after opening nonempty element. - SGML: Ugh!
 ! 307:            */
 ! 308:            text = b;
 ! 309:            count = 0;
 ! 310:            if (c == '\n' && (context->contents != SGML_EMPTY))
 ! 311:              {
 ! 312:                context->state = S_text;
 ! 313:                break;
 ! 314:              }
 ! 315:            --text;
 ! 316:            goto S_text;
 ! 317: 
 ! 318:          S_text:
 ! 319:            context->state = S_text;
 ! 320:          case S_text:
1.13 timbl 321: #ifdef ISO_2022_JP
1.42 ! frystyk 322:            if (c == '033円')
 ! 323:              {
 ! 324:                context->state = S_esc;
 ! 325:                ++count;
 ! 326:                break;
 ! 327:              }
1.13 timbl 328: #endif /* ISO_2022_JP */
1.42 ! frystyk 329:            if (c == '&')
 ! 330:              {
 ! 331:                if (count > 0)
 ! 332:                    PUTB(text, count);
 ! 333:                count = 0;
 ! 334:                string->size = 0;
 ! 335:                context->state = S_ero;
 ! 336:              }
 ! 337:            else if (c == '<')
 ! 338:              {
 ! 339:                if (count > 0)
 ! 340:                    PUTB(text, count);
 ! 341:                count = 0;
 ! 342:                string->size = 0;
 ! 343:                /* should scrap LITERAL, and use CDATA and
 ! 344:                  RCDATA -- msa */
 ! 345:                context->state =
 ! 346:                    (context->contents == SGML_LITERAL) ?
 ! 347:                        S_literal : S_tag;
 ! 348:              }
 ! 349:            else if (c == '\n')
 ! 350:                /* Newline - ignore if before end tag! */
 ! 351:                context->state = S_nl;
 ! 352:            else
 ! 353:                ++count;
 ! 354:            break;
1.13 timbl 355: 
1.42 ! frystyk 356:          case S_nl:
 ! 357:            if (c == '<')
 ! 358:              {
 ! 359:                if (count > 0)
 ! 360:                    PUTB(text, count);
 ! 361:                count = 0;
 ! 362:                string->size = 0;
 ! 363:                context->state =
 ! 364:                    (context->contents == SGML_LITERAL) ?
 ! 365:                        S_literal : S_nl_tago;
 ! 366:              }
 ! 367:            else
 ! 368:              {
 ! 369:                ++count;
 ! 370:                goto S_text;
 ! 371:              }
 ! 372:            break;
1.18 timbl 373: 
1.42 ! frystyk 374:          case S_nl_tago:   /* Had newline and tag opener */
 ! 375:            if (c != '/')
 ! 376:                PUTC('\n'); /* Only ignore newline before </ */
 ! 377:            context->state = S_tag;
 ! 378:            goto handle_S_tag;
1.18 timbl 379: 
1.13 timbl 380: #ifdef ISO_2022_JP
1.42 ! frystyk 381:          case S_esc:
 ! 382:            if (c=='$')
 ! 383:                context->state = S_dollar;
 ! 384:            else if (c=='(')
 ! 385:                context->state = S_paren;
 ! 386:            else
 ! 387:                context->state = S_text;
 ! 388:            ++count;
 ! 389:            break;
 ! 390: 
 ! 391:          case S_dollar:
 ! 392:            if (c=='@' || c=='B')
 ! 393:                context->state = S_nonascii_text;
 ! 394:            else
 ! 395:                context->state = S_text;
 ! 396:            ++count;
 ! 397:            break;
 ! 398: 
 ! 399:          case S_paren:
 ! 400:            if (c=='B' || c=='J')
 ! 401:                context->state = S_text;
 ! 402:            else
 ! 403:                context->state = S_text;
 ! 404:            ++count;
 ! 405:            break;
 ! 406: 
 ! 407:          case S_nonascii_text:
 ! 408:            if (c == '033円')
 ! 409:                context->state = S_esc;
 ! 410:            ++count;
 ! 411:            break;
1.13 timbl 412: #endif /* ISO_2022_JP */
1.1 timbl 413: 
1.42 ! frystyk 414:            /* In literal mode, waits only for specific end tag!
 ! 415:            ** Only foir compatibility with old servers.
 ! 416:            */
 ! 417:          case S_literal:
 ! 418:            HTChunk_putc(string, c);
 ! 419:            if ( TOUPPER(c) !=
 ! 420:              ((string->size == 1) ? '/'
 ! 421:               : context->current_tag->name[string->size-2]))
 ! 422:              {
1.1 timbl 423: 
1.42 ! frystyk 424:                /* If complete match, end literal */
 ! 425:                if ((c == '>') &&
 ! 426:                  (!context->current_tag->name[string->size-2]))
 ! 427:                  {
 ! 428:                    end_element
 ! 429:                        (context,context->current_tag);
 ! 430:                    /*
 ! 431:                     ...setting SGML_MIXED below is a
 ! 432:                     bit of kludge, but a good guess that
 ! 433:                     currently works, anything other than
 ! 434:                     SGML_LITERAL would work... -- msa */
 ! 435:                    context->contents = SGML_MIXED;
 ! 436:                  }
 ! 437:                else
 ! 438:                  {
 ! 439:                    /* If Mismatch: recover string. */
 ! 440:                    PUTC( '<');
 ! 441:                    PUTB(string->data, string->size);
 ! 442:                  }
 ! 443:                context->state = S_text;
 ! 444:                text = b;
 ! 445:                count = 0;
 ! 446:              }
 ! 447:            break;
1.1 timbl 448: 
1.42 ! frystyk 449:            /*
 ! 450:            ** Character reference or Entity
 ! 451:            */
 ! 452:          case S_ero:
 ! 453:            if (c == '#')
 ! 454:              {
 ! 455:                /*  &# is Char Ref Open */ 
 ! 456:                context->state = S_cro;
 ! 457:                break;
 ! 458:              }
 ! 459:            context->state = S_entity;
1.1 timbl 460: 
1.42 ! frystyk 461:            /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 462: 
1.42 ! frystyk 463:            /*
 ! 464:            ** Handle Entities
 ! 465:            */
 ! 466:          case S_entity:
 ! 467:            if (isalnum(c))
 ! 468:                HTChunk_putc(string, c);
 ! 469:            else
 ! 470:              {
 ! 471:                HTChunk_terminate(string);
 ! 472:                handle_entity(context);
 ! 473:                text = b;
 ! 474:                count = 0;
 ! 475:                if (c != ';')
 ! 476:                  {
 ! 477:                    --text;
 ! 478:                    goto S_text;
 ! 479:                  }
 ! 480:                context->state = S_text;
 ! 481:              }
 ! 482:            break;
1.2 timbl 483: 
1.42 ! frystyk 484:            /*   Character reference
 ! 485:             */
 ! 486:          case S_cro:
 ! 487:            if (isalnum(c))
 ! 488:                /* accumulate a character NUMBER */
 ! 489:                HTChunk_putc(string, c);
 ! 490:            else
 ! 491:              {
 ! 492:                int value;
 ! 493:                HTChunk_terminate(string);
 ! 494:                if (sscanf(string->data, "%d", &value)==1)
 ! 495:                    PUTC((char)value);
 ! 496:                else
 ! 497:                  {
 ! 498:                    PUTB("&#", 2);
 ! 499:                    PUTB(string->data, string->size-1);
 ! 500:                  }
 ! 501:                text = b;
 ! 502:                count = 0;
 ! 503:                if (c != ';')
 ! 504:                  {
 ! 505:                    --text;
 ! 506:                    goto S_text;
 ! 507:                  }
 ! 508:                context->state = S_text;
 ! 509:              }
 ! 510:            break;
1.1 timbl 511: 
1.42 ! frystyk 512:          case S_tag:     /* new tag */
 ! 513:          handle_S_tag:
 ! 514:            if (isalnum(c))
 ! 515:                HTChunk_putc(string, c);
 ! 516:            else
 ! 517:              { /* End of tag name */
 ! 518:                int i;
1.1 timbl 519: 
1.42 ! frystyk 520:                if (c == '/')
 ! 521:                  {
 ! 522:                    if (string->size != 0)
 ! 523:                        TRACE1("`<%s/' found!\n",
 ! 524:                            string->data);
 ! 525:                    context->state = S_end;
 ! 526:                    break;
 ! 527:                  }
 ! 528:                else if (c == '!')
 ! 529:                  {
 ! 530:                    if (string->size != 0)
 ! 531:                        TRACE1(" `<%s!' found!\n",
 ! 532:                            string->data);
 ! 533:                    context->state = S_md;
 ! 534:                    break;
 ! 535:                  }
 ! 536:                HTChunk_terminate(string);
 ! 537:                context->current_tag = SGMLFindTag(dtd, string->data);
 ! 538:                if (context->current_tag == NULL)
 ! 539:                    TRACE1("*** Unknown element %s\n",
 ! 540:                        string->data);
 ! 541:                else for (i=0;
 ! 542:                     i < context->current_tag->number_of_attributes; i++)
 ! 543:                  {
 ! 544:                    context->present[i] = NO;
 ! 545:                    context->value[i] = -1;
 ! 546:                  }
 ! 547:                context->token = string->size = 0;
 ! 548:                context->current_attribute_number = INVALID;
 ! 549:                goto S_tag_gap;
 ! 550:              }
 ! 551:            break;
 ! 552: 
 ! 553:          S_tag_gap:
 ! 554:            context->state = S_tag_gap;
 ! 555:          case S_tag_gap:       /* Expecting attribute or > */
 ! 556:            if (WHITE(c))
 ! 557:                break; /* Gap between attributes */
 ! 558: 
 ! 559:            if (c == '>')
 ! 560:                goto got_element_open;
 ! 561:            else
 ! 562:                goto S_attr;
 ! 563: 
 ! 564:          S_attr:
 ! 565:            /*
 ! 566:            ** Start collecting the attribute name and collect
 ! 567:            ** it in S_attr.
 ! 568:            */
 ! 569:            context->state = S_attr;
 ! 570:            string->size = context->token;
 ! 571:          case S_attr:
 ! 572:            if (WHITE(c) || c == '>' || c == '=')
 ! 573:                goto got_attribute_name;
 ! 574:            else
 ! 575:                HTChunk_putc(string, c);
 ! 576:            break;
 ! 577: 
 ! 578:          got_attribute_name:
 ! 579:            /*
 ! 580:            ** This label is entered when attribute name has been
 ! 581:            ** collected. Process it and enter S_attr_gap for
 ! 582:            ** potential value or start of the next attribute.
 ! 583:            */
 ! 584:            HTChunk_terminate(string) ;
 ! 585:            handle_attribute_name
 ! 586:                (context, string->data + context->token);
 ! 587:            string->size = context->token;
 ! 588:            context->state = S_attr_gap;
 ! 589:          case S_attr_gap:  /* Expecting attribute or = or > */
 ! 590:            if (WHITE(c))
 ! 591:                break; /* Gap after attribute */
 ! 592: 
 ! 593:            if (c == '>')
 ! 594:                goto got_element_open;
 ! 595:            else if (c == '=')
 ! 596:                context->state = S_equals;
 ! 597:            else
 ! 598:                goto S_attr; /* Get next attribute */
 ! 599:            break;
 ! 600: 
 ! 601:          case S_equals:   /* After attr = */ 
 ! 602:            if (WHITE(c))
 ! 603:                break; /* Before attribute value */
 ! 604: 
 ! 605:            if (c == '>')
 ! 606:              {      /* End of tag */
 ! 607:                TRACE1("found = but no value\n", NULL);
 ! 608:                goto got_element_open;
 ! 609:              }
 ! 610:            else if (c == '\'')
 ! 611:                context->state = S_squoted;
 ! 612:            else if (c == '"')
 ! 613:                context->state = S_dquoted;
 ! 614:            else
 ! 615:                goto S_value;
 ! 616:            break;
 ! 617: 
 ! 618:          S_value:
 ! 619:            context->state = S_value;
 ! 620:            string->size = context->token;
 ! 621:          case S_value:
 ! 622:            if (WHITE(c) || c == '>')
 ! 623:              {
 ! 624:                HTChunk_terminate(string);
 ! 625:                handle_attribute_value(context);
 ! 626:                context->token = string->size;
 ! 627:                goto S_tag_gap;
 ! 628:              }
 ! 629:            else
 ! 630:                HTChunk_putc(string, c);
 ! 631:            break;
1.1 timbl 632:        
1.42 ! frystyk 633:          case S_squoted:   /* Quoted attribute value */
 ! 634:            if (c == '\'')
 ! 635:              {
 ! 636:                HTChunk_terminate(string);
 ! 637:                handle_attribute_value(context);
 ! 638:                context->token = string->size;
 ! 639:                context->state = S_tag_gap;
 ! 640:              }
 ! 641:            else if (c && c != '\n' && c != '\r')
 ! 642:                HTChunk_putc(string, c);
 ! 643:            break;
1.1 timbl 644:    
1.42 ! frystyk 645:          case S_dquoted:   /* Quoted attribute value */
 ! 646:            if (c == '"')
 ! 647:              {
 ! 648:                HTChunk_terminate(string);
 ! 649:                handle_attribute_value(context);
 ! 650:                context->token = string->size;
 ! 651:                context->state = S_tag_gap;
 ! 652:              }
 ! 653:            else if (c && c != '\n' && c != '\r')
 ! 654:                HTChunk_putc(string, c);
 ! 655:            break;
1.2 timbl 656: 
1.42 ! frystyk 657:          case S_end: /* </ */
 ! 658:            if (isalnum(c))
 ! 659:                HTChunk_putc(string, c);
 ! 660:            else
 ! 661:              {      /* End of end tag name */
 ! 662:                HTTag *t;
 ! 663: 
 ! 664:                HTChunk_terminate(string);
 ! 665:                if (*string->data)
 ! 666:                    t = SGMLFindTag(dtd, string->data);
 ! 667:                else
 ! 668:                    /* Empty end tag */
 ! 669:                    /* Original code popped here one
 ! 670:                      from the stack. If this feature
 ! 671:                      is required, I have to put the
 ! 672:                      stack back... -- msa */
 ! 673:                    t = NULL;
 ! 674:                if (!t)
 ! 675:                    TRACE1("Unknown end tag </%s>\n",
 ! 676:                        string->data);
 ! 677:                else
 ! 678:                  {
 ! 679:                    context->current_tag = NULL;
 ! 680:                    end_element(context, t);
 ! 681:                  }
 ! 682:                string->size = 0;
 ! 683:                context->current_attribute_number = INVALID;
 ! 684:                if (c != '>')
 ! 685:                  {
 ! 686:                    if (!WHITE(c))
 ! 687:                        TRACE2("`</%s%c' found!\n",
 ! 688:                            string->data, c);
 ! 689:                    context->state = S_junk_tag;
 ! 690:                  }
 ! 691:                else
 ! 692:                  {
 ! 693:                    text = b;
 ! 694:                    count = 0;
 ! 695:                    context->state = S_text;
 ! 696:                  }
 ! 697:              }
 ! 698:            break;
 ! 699: 
 ! 700:          S_junk_tag:
 ! 701:            context->state = S_junk_tag;
 ! 702:          case S_junk_tag:
 ! 703:            if (c == '>')
 ! 704:              {
 ! 705:                text = b;
 ! 706:                count = 0;
 ! 707:                context->state = S_text;
 ! 708:              }
 ! 709:            break;
 ! 710: 
 ! 711:            /*
 ! 712:            ** Scanning (actually skipping) declarations
 ! 713:            */
 ! 714:          case S_md:
 ! 715:            if (c == '-')
 ! 716:                context->state = S_com_1;
 ! 717:            else if (c == '"')
 ! 718:                context->state = S_md_dqs;
 ! 719:            else if (c == '\'')
 ! 720:                context->state = S_md_sqs;
 ! 721:            else if (c == '>')
 ! 722:              {
 ! 723:                text = b;
 ! 724:                count = 0;
 ! 725:                context->state = S_text;
 ! 726:              }
 ! 727:            break;
 ! 728: 
 ! 729:          case S_md_dqs: /* Skip double quoted string */
 ! 730:            if (c == '"')
 ! 731:                context->state = S_md;
 ! 732:            break;
 ! 733: 
 ! 734:          case S_md_sqs: /* Skip single quoted string */
 ! 735:            if (c == '\'')
 ! 736:                context->state = S_md;
 ! 737:            break;
 ! 738: 
 ! 739:          case S_com_1: /* Starting a comment? */
 ! 740:            context->state = (c == '-') ? S_com : S_md;
 ! 741:            break;
 ! 742: 
 ! 743:          case S_com: /* ..within comment */
 ! 744:            if (c == '-')
 ! 745:                context->state = S_com_2;
 ! 746:            break;
 ! 747: 
 ! 748:          case S_com_2: /* Ending a comment ? */
 ! 749:            context->state = (c == '-') ? S_md : S_com;
 ! 750:            break;
 ! 751:          }
1.7 timbl 752:      }
1.42 ! frystyk 753:    if (count > 0)
 ! 754:        PUTB(text, count);
 ! 755:    return HT_OK;
 ! 756:   }
1.1 timbl 757: 
1.2 timbl 758: 
1.40 frystyk 759: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 ! frystyk 760:   {
 ! 761:    return SGML_write(context, s, (int) strlen(s));
 ! 762:   }
1.2 timbl 763: 
 764: 
1.41 frystyk 765: PRIVATE int SGML_character (HTStream * context, char c)
1.42 ! frystyk 766:   {
 ! 767:    return SGML_write(context, &c, 1);
 ! 768:   }
1.2 timbl 769: 
 770: /*_______________________________________________________________________
 771: */
 772: 
 773: /*   Structured Object Class
 774: **   -----------------------
 775: */
1.38 frystyk 776: PRIVATE const HTStreamClass SGMLParser = 
1.42 ! frystyk 777:   {     
 ! 778:    "SGMLParser",
 ! 779:    SGML_flush,
 ! 780:    SGML_free,
 ! 781:    SGML_abort,
 ! 782:    SGML_character, 
 ! 783:    SGML_string,
 ! 784:    SGML_write,
 ! 785:   }; 
1.2 timbl 786: 
 787: /*   Create SGML Engine
 788: **   ------------------
 789: **
 790: ** On entry,
 791: **   dtd       represents the DTD, along with
 792: **   actions     is the sink for the data as a set of routines.
 793: **
 794: */
1.42 ! frystyk 795: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
 ! 796:   {
 ! 797:    int i;
 ! 798:    HTStream* context;
 ! 799:    if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
 ! 800:        HT_OUTOFMEM("SGML_begin");
1.2 timbl 801: 
1.42 ! frystyk 802:    context->isa = &SGMLParser;
 ! 803:    context->string = HTChunk_new(128);   /* Grow by this much */
 ! 804:    context->dtd = dtd;
 ! 805:    context->target = target;
 ! 806:    context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 ! 807:                      /* Ugh: no OO */
 ! 808:    context->state = S_text;
 ! 809:    for(i=0; i<MAX_ATTRIBUTES; i++)
 ! 810:        context->value[i] = 0;
 ! 811:    return context;
 ! 812:   }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /