[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.43

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.43 ! frystyk 6: **   @(#) $Id: SGML.c,v 1.42 1997年03月21日 19:33:28 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.42 frystyk 15: **   Nov 1996  msa Strip down the parser to minimal HTML tokenizer,
 16: **           Stop allocating space for the attribute values,
 17: **           use pointers to the string chunk instead.
1.1 timbl 18: */
 19: 
1.25 frystyk 20: /* Library include files */
1.38 frystyk 21: #include "sysdep.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26: 
1.2 timbl 27: #define INVALID (-1)
 28: 
1.1 timbl 29: /*   The State (context) of the parser
 30: **
1.2 timbl 31: **   This is passed with each call to make the parser reentrant
1.1 timbl 32: **
 33: */
1.42 frystyk 34: typedef enum _sgml_state
 35:   {
 36:    S_text, S_literal, S_tag, S_tag_gap, 
 37:    S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 38:    S_nl, S_nl_tago,
 39:    S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41:    S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43:    S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
 44:    S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2
 45:   } sgml_state;
1.21 frystyk 46: 
 47: 
1.2 timbl 48: /*   Internal Context Data Structure
 49: **   -------------------------------
 50: */
1.42 frystyk 51: struct _HTStream
 52:   {
 53:    const HTStreamClass *isa;    /* inherited from HTStream */
 54:    const SGML_dtd *dtd;
 55:    HTStructuredClass *actions;   /* target class */
 56:    HTStructured *target;      /* target object */
1.2 timbl 57: 
1.42 frystyk 58:    HTTag *current_tag;
 59:    int current_attribute_number;
 60:    SGMLContent contents;      /* current content mode */
 61:    HTChunk *string;
 62:    int token;           /* ptr into string buffer */
 63:    sgml_state state;
 64:    BOOL present[MAX_ATTRIBUTES];  /* Flags: attribute is present? */
 65:    int value[MAX_ATTRIBUTES];   /* Offset pointers to the string */
 66:   };
1.2 timbl 67: 
 68: 
 69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71: 
1.42 frystyk 72: #define TRACE1(f,a) \
 73:    do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a); } while(0)
 74: #define TRACE2(f,a,b) \
 75:    do {if (SGML_TRACE) HTTrace("SGML Parser. " f,a,b); } while(0)
1.1 timbl 76: 
1.17 timbl 77: /*   Find Attribute Number
 78: **   ---------------------
 79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 81:   {
 82:    attr* attributes = tag->attributes;
1.17 timbl 83: 
1.42 frystyk 84:    int high, low, i, diff;     /* Binary search for attribute name */
 85:    for(low=0, high=tag->number_of_attributes;
 86:      high > low ;
 87:      diff < 0 ? (low = i+1) : (high = i) )
 88:      {
 89:        i = (low + (high-low)/2);
 90:        diff = strcasecomp(attributes[i].name, s);
 91:        if (diff==0)
 92:            return i;    /* success: found it */
 93:      }
 94:    return -1;
 95:   }
1.17 timbl 96: 
1.1 timbl 97: 
 98: /*   Handle Attribute
 99: **   ----------------
 100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 102: 
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 104:   {
 105:    HTTag * tag = context->current_tag;
1.2 timbl 106: 
1.42 frystyk 107:    /* Note: if tag==NULL, we are skipping unknown tag... */
 108:    if (tag)
 109:      {
 110:        int i = SGMLFindAttribute(tag, s);
 111:        if (i >= 0)
 112:          {
 113:            context->current_attribute_number = i;
 114:            context->present[i] = YES;
 115:            return;
 116:          }
 117:        TRACE2("Unknown attribute %s for tag %s\n",
 118:            s, context->current_tag->name);
 119:      }
 120:    context->current_attribute_number = INVALID;  /* Invalid */
 121:   }
1.2 timbl 122: 
1.1 timbl 123: 
 124: /*   Handle attribute value
 125: **   ----------------------
 126: */
1.42 frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
 128:   {
 129:    /* Deal with attributes only if tag is known,
 130:      ignore silently otherwise */
 131: 
 132:    if (context->current_tag)
 133:      {
 134:        if (context->current_attribute_number != INVALID)
 135:            context->value[context->current_attribute_number] =
 136:                context->token;
 137:        else
 138:            TRACE1("Attribute value %s ignored\n",
 139:                context->string->data + context->token);
 140: 
 141:      }
 142:    context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 143:   }
 144: 
 145: /*   Handle entity
 146: **   -------------
 147: **
 148: ** On entry,
 149: **   s    contains the entity name zero terminated
 150: */
1.42 frystyk 151: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 152:   {
1.42 frystyk 153:    const char ** entities = context->dtd->entity_names;
 154:    const char *s = context->string->data;
1.1 timbl 155: 
1.42 frystyk 156:    int high, low, i, diff;
 157:    for(low=0, high = context->dtd->number_of_entities;
 158:      high > low ;
 159:      diff < 0 ? (low = i+1) : (high = i))
 160:      {
 161:        i = (low + (high-low)/2);
 162:        diff = strcmp(entities[i], s); /* Case sensitive! */
 163:        if (diff==0)
 164:          {  /* success: found it */
 165:            (*context->actions->put_entity)(context->target, i);
 166:            return;
 167:          }
 168:      }
 169:    /* If entity string not found, display as text */
 170:    TRACE1("Unknown entity %s\n", s);
 171:    PUTC('&');
 172:      {
 173:        const char *p;
 174:        for (p=s; *p; p++)
 175:            PUTC(*p);
 176:      }
1.35 frystyk 177:   }
1.2 timbl 178: 
1.1 timbl 179: /*   End element
1.2 timbl 180: **   -----------
1.1 timbl 181: */
1.42 frystyk 182: PRIVATE void end_element (HTStream * context, HTTag *tag)
 183:   {
 184:    TRACE1("End  </%s>\n", tag->name);
 185:    (*context->actions->end_element)
 186:        (context->target, tag - context->dtd->tags);
1.1 timbl 187:   }
 188: 
1.17 timbl 189: /*   Start an element
 190: **   ----------------
1.1 timbl 191: */
1.31 frystyk 192: PRIVATE void start_element (HTStream * context)
1.42 frystyk 193:   {
 194:    int i;
 195:    char *value[MAX_ATTRIBUTES];
 196:    HTTag *tag = context->current_tag;
 197: 
 198:    TRACE1("Start <%s>\n", tag->name);
 199:    context->contents = tag->contents;
 200: 
 201:    /*
 202:    ** Build the actual pointers to the value strings stored in the
 203:    ** chunk buffer. (Must use offsets while collecting the values,
 204:    ** because the string chunk may get resized during the collection
 205:    ** and potentially relocated).
 206:    */
 207:    for (i = 0; i < MAX_ATTRIBUTES; ++i)
 208:        value[i] = context->value[i] < 0 ? NULL :
 209:            context->string->data + context->value[i];
 210:    (*context->actions->start_element)
 211:        (context->target,
 212:         tag - context->dtd->tags,
 213:         context->present,
 214:         (const char**)value); /* coerce type for think c */
1.1 timbl 215:   }
 216: 
 217: 
1.2 timbl 218: /*       Find Tag in DTD tag list
 219: **       ------------------------
1.1 timbl 220: **
 221: ** On entry,
1.2 timbl 222: **   dtd   points to dtd structire including valid tag list
 223: **   string points to name of tag in question
1.1 timbl 224: **
1.2 timbl 225: ** On exit,
 226: **   returns:
1.7 timbl 227: **       NULL      tag not found
 228: **       else      address of tag structure in dtd
1.2 timbl 229: */
1.40 frystyk 230: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 231:   {
 232:    int high, low, i, diff;
 233:    for(low=0, high=dtd->number_of_tags;
 234:      high > low ;
 235:      diff < 0 ? (low = i+1) : (high = i))
 236:      { /* Binary serach */
 237:        i = (low + (high-low)/2);
 238:        diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
 239:        if (diff==0)
 240:            /* success: found it */
 241:            return &dtd->tags[i];
 242:      }
 243:    return NULL;
1.2 timbl 244:   }
 245: 
 246: /*________________________________________________________________________
 247: **           Public Methods
1.1 timbl 248: */
 249: 
1.2 timbl 250: 
 251: /*   Could check that we are back to bottom of stack! @@ */
1.40 frystyk 252: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 253:   {
 254:    return (*context->actions->flush)(context->target);
1.26 frystyk 255:   }
1.1 timbl 256: 
1.40 frystyk 257: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 258:   {
 259:    int status;
1.15 frystyk 260: 
1.42 frystyk 261:    if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 262:        return status;
 263:    HTChunk_delete(context->string);
 264:    HT_FREE(context);
 265:    return HT_OK;
1.15 frystyk 266:   }
1.1 timbl 267: 
1.40 frystyk 268: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 269:   {
 270:    (*context->actions->abort)(context->target, e);
 271:    HTChunk_delete(context->string);
 272:    HT_FREE(context);
 273:    return HT_ERROR;
1.15 frystyk 274:   }
1.1 timbl 275: 
1.41 frystyk 276: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 277:   {
 278:    const SGML_dtd *dtd = context->dtd;
 279:    HTChunk *string = context->string;
 280:    const char *text = b;
 281:    int count = 0;
1.18 timbl 282:    
1.42 frystyk 283:    while (l-- > 0)
 284:      {
 285:        char c = *b++;
 286:        switch(context->state)
 287:          {
 288:          got_element_open:
 289:            /*
 290:            ** The label is jumped when the '>' of a the element
 291:            ** start tag has been detected. This DOES NOT FALL TO
 292:            ** THE CODE S_after_open, only processes the tag and
 293:            ** sets the state (c should still contain the
 294:            ** terminating character of the tag ('>'))
 295:            */
 296:            if (context->current_tag && context->current_tag->name)
 297:                start_element(context);
 298:            context->state = S_after_open;
 299:            break;
1.18 timbl 300: 
1.42 frystyk 301:          case S_after_open:
 302:            /*
 303:            ** State S_after_open is entered only for single
 304:            ** character after the element opening tag to test
 305:            ** against newline. Strip one trainling newline only
 306:            ** after opening nonempty element. - SGML: Ugh!
 307:            */
 308:            text = b;
 309:            count = 0;
 310:            if (c == '\n' && (context->contents != SGML_EMPTY))
 311:              {
 312:                context->state = S_text;
 313:                break;
 314:              }
 315:            --text;
 316:            goto S_text;
 317: 
 318:          S_text:
 319:            context->state = S_text;
 320:          case S_text:
1.13 timbl 321: #ifdef ISO_2022_JP
1.42 frystyk 322:            if (c == '033円')
 323:              {
 324:                context->state = S_esc;
 325:                ++count;
 326:                break;
 327:              }
1.13 timbl 328: #endif /* ISO_2022_JP */
1.42 frystyk 329:            if (c == '&')
 330:              {
 331:                if (count > 0)
 332:                    PUTB(text, count);
 333:                count = 0;
 334:                string->size = 0;
 335:                context->state = S_ero;
 336:              }
 337:            else if (c == '<')
 338:              {
 339:                if (count > 0)
 340:                    PUTB(text, count);
 341:                count = 0;
 342:                string->size = 0;
 343:                /* should scrap LITERAL, and use CDATA and
 344:                  RCDATA -- msa */
 345:                context->state =
 346:                    (context->contents == SGML_LITERAL) ?
 347:                        S_literal : S_tag;
 348:              }
 349:            else if (c == '\n')
 350:                /* Newline - ignore if before end tag! */
 351:                context->state = S_nl;
 352:            else
 353:                ++count;
 354:            break;
1.13 timbl 355: 
1.42 frystyk 356:          case S_nl:
 357:            if (c == '<')
 358:              {
 359:                if (count > 0)
 360:                    PUTB(text, count);
 361:                count = 0;
 362:                string->size = 0;
 363:                context->state =
 364:                    (context->contents == SGML_LITERAL) ?
 365:                        S_literal : S_nl_tago;
 366:              }
 367:            else
 368:              {
 369:                ++count;
 370:                goto S_text;
 371:              }
 372:            break;
1.18 timbl 373: 
1.42 frystyk 374:          case S_nl_tago:   /* Had newline and tag opener */
 375:            if (c != '/')
 376:                PUTC('\n'); /* Only ignore newline before </ */
 377:            context->state = S_tag;
 378:            goto handle_S_tag;
1.18 timbl 379: 
1.13 timbl 380: #ifdef ISO_2022_JP
1.42 frystyk 381:          case S_esc:
 382:            if (c=='$')
 383:                context->state = S_dollar;
 384:            else if (c=='(')
 385:                context->state = S_paren;
 386:            else
 387:                context->state = S_text;
 388:            ++count;
 389:            break;
 390: 
 391:          case S_dollar:
 392:            if (c=='@' || c=='B')
 393:                context->state = S_nonascii_text;
 394:            else
 395:                context->state = S_text;
 396:            ++count;
 397:            break;
 398: 
 399:          case S_paren:
 400:            if (c=='B' || c=='J')
 401:                context->state = S_text;
 402:            else
 403:                context->state = S_text;
 404:            ++count;
 405:            break;
 406: 
 407:          case S_nonascii_text:
 408:            if (c == '033円')
 409:                context->state = S_esc;
 410:            ++count;
 411:            break;
1.13 timbl 412: #endif /* ISO_2022_JP */
1.1 timbl 413: 
1.42 frystyk 414:            /* In literal mode, waits only for specific end tag!
 415:            ** Only foir compatibility with old servers.
 416:            */
 417:          case S_literal:
 418:            HTChunk_putc(string, c);
 419:            if ( TOUPPER(c) !=
 420:              ((string->size == 1) ? '/'
 421:               : context->current_tag->name[string->size-2]))
 422:              {
1.1 timbl 423: 
1.42 frystyk 424:                /* If complete match, end literal */
 425:                if ((c == '>') &&
 426:                  (!context->current_tag->name[string->size-2]))
 427:                  {
 428:                    end_element
 429:                        (context,context->current_tag);
 430:                    /*
 431:                     ...setting SGML_MIXED below is a
 432:                     bit of kludge, but a good guess that
 433:                     currently works, anything other than
 434:                     SGML_LITERAL would work... -- msa */
 435:                    context->contents = SGML_MIXED;
 436:                  }
 437:                else
 438:                  {
 439:                    /* If Mismatch: recover string. */
 440:                    PUTC( '<');
 441:                    PUTB(string->data, string->size);
 442:                  }
 443:                context->state = S_text;
 444:                text = b;
 445:                count = 0;
 446:              }
 447:            break;
1.1 timbl 448: 
1.42 frystyk 449:            /*
 450:            ** Character reference or Entity
 451:            */
 452:          case S_ero:
 453:            if (c == '#')
 454:              {
 455:                /*  &# is Char Ref Open */ 
 456:                context->state = S_cro;
 457:                break;
 458:              }
 459:            context->state = S_entity;
1.1 timbl 460: 
1.42 frystyk 461:            /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 462: 
1.42 frystyk 463:            /*
 464:            ** Handle Entities
 465:            */
 466:          case S_entity:
1.43 ! frystyk 467:            if (isalnum((int) c))
1.42 frystyk 468:                HTChunk_putc(string, c);
 469:            else
 470:              {
 471:                HTChunk_terminate(string);
 472:                handle_entity(context);
 473:                text = b;
 474:                count = 0;
 475:                if (c != ';')
 476:                  {
 477:                    --text;
 478:                    goto S_text;
 479:                  }
 480:                context->state = S_text;
 481:              }
 482:            break;
1.2 timbl 483: 
1.42 frystyk 484:            /*   Character reference
 485:             */
 486:          case S_cro:
1.43 ! frystyk 487:            if (isalnum((int)c))
1.42 frystyk 488:                /* accumulate a character NUMBER */
 489:                HTChunk_putc(string, c);
 490:            else
 491:              {
 492:                int value;
 493:                HTChunk_terminate(string);
 494:                if (sscanf(string->data, "%d", &value)==1)
 495:                    PUTC((char)value);
 496:                else
 497:                  {
 498:                    PUTB("&#", 2);
 499:                    PUTB(string->data, string->size-1);
 500:                  }
 501:                text = b;
 502:                count = 0;
 503:                if (c != ';')
 504:                  {
 505:                    --text;
 506:                    goto S_text;
 507:                  }
 508:                context->state = S_text;
 509:              }
 510:            break;
1.1 timbl 511: 
1.42 frystyk 512:          case S_tag:     /* new tag */
 513:          handle_S_tag:
1.43 ! frystyk 514:            if (isalnum((int)c))
1.42 frystyk 515:                HTChunk_putc(string, c);
 516:            else
 517:              { /* End of tag name */
 518:                int i;
1.1 timbl 519: 
1.42 frystyk 520:                if (c == '/')
 521:                  {
 522:                    if (string->size != 0)
 523:                        TRACE1("`<%s/' found!\n",
 524:                            string->data);
 525:                    context->state = S_end;
 526:                    break;
 527:                  }
 528:                else if (c == '!')
 529:                  {
 530:                    if (string->size != 0)
 531:                        TRACE1(" `<%s!' found!\n",
 532:                            string->data);
 533:                    context->state = S_md;
 534:                    break;
 535:                  }
 536:                HTChunk_terminate(string);
 537:                context->current_tag = SGMLFindTag(dtd, string->data);
 538:                if (context->current_tag == NULL)
 539:                    TRACE1("*** Unknown element %s\n",
 540:                        string->data);
 541:                else for (i=0;
 542:                     i < context->current_tag->number_of_attributes; i++)
 543:                  {
 544:                    context->present[i] = NO;
 545:                    context->value[i] = -1;
 546:                  }
 547:                context->token = string->size = 0;
 548:                context->current_attribute_number = INVALID;
 549:                goto S_tag_gap;
 550:              }
 551:            break;
 552: 
 553:          S_tag_gap:
 554:            context->state = S_tag_gap;
 555:          case S_tag_gap:       /* Expecting attribute or > */
1.43 ! frystyk 556:            if (isspace((int) c))
1.42 frystyk 557:                break; /* Gap between attributes */
 558: 
 559:            if (c == '>')
 560:                goto got_element_open;
 561:            else
 562:                goto S_attr;
 563: 
 564:          S_attr:
 565:            /*
 566:            ** Start collecting the attribute name and collect
 567:            ** it in S_attr.
 568:            */
 569:            context->state = S_attr;
 570:            string->size = context->token;
 571:          case S_attr:
1.43 ! frystyk 572:            if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 573:                goto got_attribute_name;
 574:            else
 575:                HTChunk_putc(string, c);
 576:            break;
 577: 
 578:          got_attribute_name:
 579:            /*
 580:            ** This label is entered when attribute name has been
 581:            ** collected. Process it and enter S_attr_gap for
 582:            ** potential value or start of the next attribute.
 583:            */
 584:            HTChunk_terminate(string) ;
 585:            handle_attribute_name
 586:                (context, string->data + context->token);
 587:            string->size = context->token;
 588:            context->state = S_attr_gap;
 589:          case S_attr_gap:  /* Expecting attribute or = or > */
1.43 ! frystyk 590:            if (isspace((int) c))
1.42 frystyk 591:                break; /* Gap after attribute */
 592: 
 593:            if (c == '>')
 594:                goto got_element_open;
 595:            else if (c == '=')
 596:                context->state = S_equals;
 597:            else
 598:                goto S_attr; /* Get next attribute */
 599:            break;
 600: 
 601:          case S_equals:   /* After attr = */ 
1.43 ! frystyk 602:            if (isspace((int) c))
1.42 frystyk 603:                break; /* Before attribute value */
 604: 
 605:            if (c == '>')
 606:              {      /* End of tag */
 607:                TRACE1("found = but no value\n", NULL);
 608:                goto got_element_open;
 609:              }
 610:            else if (c == '\'')
 611:                context->state = S_squoted;
 612:            else if (c == '"')
 613:                context->state = S_dquoted;
 614:            else
 615:                goto S_value;
 616:            break;
 617: 
 618:          S_value:
 619:            context->state = S_value;
 620:            string->size = context->token;
 621:          case S_value:
1.43 ! frystyk 622:            if (isspace((int) c) || c == '>')
1.42 frystyk 623:              {
 624:                HTChunk_terminate(string);
 625:                handle_attribute_value(context);
 626:                context->token = string->size;
 627:                goto S_tag_gap;
 628:              }
 629:            else
 630:                HTChunk_putc(string, c);
 631:            break;
1.1 timbl 632:        
1.42 frystyk 633:          case S_squoted:   /* Quoted attribute value */
 634:            if (c == '\'')
 635:              {
 636:                HTChunk_terminate(string);
 637:                handle_attribute_value(context);
 638:                context->token = string->size;
 639:                context->state = S_tag_gap;
 640:              }
 641:            else if (c && c != '\n' && c != '\r')
 642:                HTChunk_putc(string, c);
 643:            break;
1.1 timbl 644:    
1.42 frystyk 645:          case S_dquoted:   /* Quoted attribute value */
 646:            if (c == '"')
 647:              {
 648:                HTChunk_terminate(string);
 649:                handle_attribute_value(context);
 650:                context->token = string->size;
 651:                context->state = S_tag_gap;
 652:              }
 653:            else if (c && c != '\n' && c != '\r')
 654:                HTChunk_putc(string, c);
 655:            break;
1.2 timbl 656: 
1.42 frystyk 657:          case S_end: /* </ */
1.43 ! frystyk 658:            if (isalnum((int) c))
1.42 frystyk 659:                HTChunk_putc(string, c);
 660:            else
 661:              {      /* End of end tag name */
 662:                HTTag *t;
 663: 
 664:                HTChunk_terminate(string);
 665:                if (*string->data)
 666:                    t = SGMLFindTag(dtd, string->data);
 667:                else
 668:                    /* Empty end tag */
 669:                    /* Original code popped here one
 670:                      from the stack. If this feature
 671:                      is required, I have to put the
 672:                      stack back... -- msa */
 673:                    t = NULL;
 674:                if (!t)
 675:                    TRACE1("Unknown end tag </%s>\n",
 676:                        string->data);
 677:                else
 678:                  {
 679:                    context->current_tag = NULL;
 680:                    end_element(context, t);
 681:                  }
 682:                string->size = 0;
 683:                context->current_attribute_number = INVALID;
 684:                if (c != '>')
 685:                  {
1.43 ! frystyk 686:                    if (!isspace((int) c))
1.42 frystyk 687:                        TRACE2("`</%s%c' found!\n",
 688:                            string->data, c);
 689:                    context->state = S_junk_tag;
 690:                  }
 691:                else
 692:                  {
 693:                    text = b;
 694:                    count = 0;
 695:                    context->state = S_text;
 696:                  }
 697:              }
 698:            break;
 699: 
 700:          S_junk_tag:
 701:            context->state = S_junk_tag;
 702:          case S_junk_tag:
 703:            if (c == '>')
 704:              {
 705:                text = b;
 706:                count = 0;
 707:                context->state = S_text;
 708:              }
 709:            break;
 710: 
 711:            /*
 712:            ** Scanning (actually skipping) declarations
 713:            */
 714:          case S_md:
 715:            if (c == '-')
 716:                context->state = S_com_1;
 717:            else if (c == '"')
 718:                context->state = S_md_dqs;
 719:            else if (c == '\'')
 720:                context->state = S_md_sqs;
 721:            else if (c == '>')
 722:              {
 723:                text = b;
 724:                count = 0;
 725:                context->state = S_text;
 726:              }
 727:            break;
 728: 
 729:          case S_md_dqs: /* Skip double quoted string */
 730:            if (c == '"')
 731:                context->state = S_md;
 732:            break;
 733: 
 734:          case S_md_sqs: /* Skip single quoted string */
 735:            if (c == '\'')
 736:                context->state = S_md;
 737:            break;
 738: 
 739:          case S_com_1: /* Starting a comment? */
 740:            context->state = (c == '-') ? S_com : S_md;
 741:            break;
 742: 
 743:          case S_com: /* ..within comment */
 744:            if (c == '-')
 745:                context->state = S_com_2;
 746:            break;
 747: 
 748:          case S_com_2: /* Ending a comment ? */
 749:            context->state = (c == '-') ? S_md : S_com;
 750:            break;
 751:          }
1.7 timbl 752:      }
1.42 frystyk 753:    if (count > 0)
 754:        PUTB(text, count);
 755:    return HT_OK;
 756:   }
1.1 timbl 757: 
1.2 timbl 758: 
1.40 frystyk 759: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 760:   {
 761:    return SGML_write(context, s, (int) strlen(s));
 762:   }
1.2 timbl 763: 
 764: 
1.41 frystyk 765: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 766:   {
 767:    return SGML_write(context, &c, 1);
 768:   }
1.2 timbl 769: 
 770: /*_______________________________________________________________________
 771: */
 772: 
 773: /*   Structured Object Class
 774: **   -----------------------
 775: */
1.38 frystyk 776: PRIVATE const HTStreamClass SGMLParser = 
1.42 frystyk 777:   {     
 778:    "SGMLParser",
 779:    SGML_flush,
 780:    SGML_free,
 781:    SGML_abort,
 782:    SGML_character, 
 783:    SGML_string,
 784:    SGML_write,
 785:   }; 
1.2 timbl 786: 
 787: /*   Create SGML Engine
 788: **   ------------------
 789: **
 790: ** On entry,
 791: **   dtd       represents the DTD, along with
 792: **   actions     is the sink for the data as a set of routines.
 793: **
 794: */
1.42 frystyk 795: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
 796:   {
 797:    int i;
 798:    HTStream* context;
 799:    if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
 800:        HT_OUTOFMEM("SGML_begin");
1.2 timbl 801: 
1.42 frystyk 802:    context->isa = &SGMLParser;
 803:    context->string = HTChunk_new(128);   /* Grow by this much */
 804:    context->dtd = dtd;
 805:    context->target = target;
 806:    context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 807:                      /* Ugh: no OO */
 808:    context->state = S_text;
 809:    for(i=0; i<MAX_ATTRIBUTES; i++)
 810:        context->value[i] = 0;
 811:    return context;
 812:   }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /