[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.48

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.48 ! frystyk 6: **   @(#) $Id: SGML.c,v 1.47 1999年01月06日 15:38:48 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.42 frystyk 15: **   Nov 1996  msa Strip down the parser to minimal HTML tokenizer,
 16: **           Stop allocating space for the attribute values,
 17: **           use pointers to the string chunk instead.
1.1 timbl 18: */
 19: 
1.25 frystyk 20: /* Library include files */
1.45 frystyk 21: #include "wwwsys.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26: 
1.2 timbl 27: #define INVALID (-1)
 28: 
1.1 timbl 29: /*   The State (context) of the parser
 30: **
1.2 timbl 31: **   This is passed with each call to make the parser reentrant
1.1 timbl 32: **
 33: */
1.42 frystyk 34: typedef enum _sgml_state
 35:   {
 36:    S_text, S_literal, S_tag, S_tag_gap, 
 37:    S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 38:    S_nl, S_nl_tago,
 39:    S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41:    S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43:    S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
1.44 frystyk 44:    S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2, S_com_2a
1.42 frystyk 45:   } sgml_state;
1.21 frystyk 46: 
 47: 
1.2 timbl 48: /*   Internal Context Data Structure
 49: **   -------------------------------
 50: */
1.42 frystyk 51: struct _HTStream
 52:   {
 53:    const HTStreamClass *isa;    /* inherited from HTStream */
 54:    const SGML_dtd *dtd;
 55:    HTStructuredClass *actions;   /* target class */
 56:    HTStructured *target;      /* target object */
1.2 timbl 57: 
1.42 frystyk 58:    HTTag *current_tag;
 59:    int current_attribute_number;
 60:    SGMLContent contents;      /* current content mode */
 61:    HTChunk *string;
 62:    int token;           /* ptr into string buffer */
 63:    sgml_state state;
 64:    BOOL present[MAX_ATTRIBUTES];  /* Flags: attribute is present? */
 65:    int value[MAX_ATTRIBUTES];   /* Offset pointers to the string */
 66:   };
1.2 timbl 67: 
 68: 
 69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71: 
1.42 frystyk 72: #define TRACE1(f,a) \
1.44 frystyk 73:    do {if (SGML_TRACE) HTTrace((f),(a)); } while(0)
1.42 frystyk 74: #define TRACE2(f,a,b) \
1.44 frystyk 75:    do {if (SGML_TRACE) HTTrace((f),(a),(b)); } while(0)
1.1 timbl 76: 
1.17 timbl 77: /*   Find Attribute Number
 78: **   ---------------------
 79: */
1.40 frystyk 80: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 81:   {
1.47 frystyk 82:    HTAttr* attributes = tag->attributes;
1.17 timbl 83: 
1.42 frystyk 84:    int high, low, i, diff;     /* Binary search for attribute name */
 85:    for(low=0, high=tag->number_of_attributes;
 86:      high > low ;
 87:      diff < 0 ? (low = i+1) : (high = i) )
 88:      {
 89:        i = (low + (high-low)/2);
 90:        diff = strcasecomp(attributes[i].name, s);
 91:        if (diff==0)
 92:            return i;    /* success: found it */
 93:      }
 94:    return -1;
 95:   }
1.17 timbl 96: 
1.1 timbl 97: 
 98: /*   Handle Attribute
 99: **   ----------------
 100: */
1.38 frystyk 101: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 102: 
1.38 frystyk 103: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 104:   {
 105:    HTTag * tag = context->current_tag;
1.2 timbl 106: 
1.42 frystyk 107:    /* Note: if tag==NULL, we are skipping unknown tag... */
 108:    if (tag)
 109:      {
 110:        int i = SGMLFindAttribute(tag, s);
 111:        if (i >= 0)
 112:          {
 113:            context->current_attribute_number = i;
 114:            context->present[i] = YES;
 115:            return;
 116:          }
 117:        TRACE2("Unknown attribute %s for tag %s\n",
 118:            s, context->current_tag->name);
 119:      }
 120:    context->current_attribute_number = INVALID;  /* Invalid */
 121:   }
1.2 timbl 122: 
1.1 timbl 123: 
 124: /*   Handle attribute value
 125: **   ----------------------
 126: */
1.42 frystyk 127: PRIVATE void handle_attribute_value (HTStream * context)
 128:   {
 129:    /* Deal with attributes only if tag is known,
 130:      ignore silently otherwise */
 131: 
 132:    if (context->current_tag)
 133:      {
 134:        if (context->current_attribute_number != INVALID)
 135:            context->value[context->current_attribute_number] =
 136:                context->token;
1.48 ! frystyk 137:        else {
 ! 138:          char * data = HTChunk_data(context->string);
 ! 139:          TRACE1("Attribute value %s ignored\n",
 ! 140:              data ? data+context->token : "<null>");
 ! 141:        }
1.42 frystyk 142:      }
 143:    context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 144:   }
 145: 
 146: /*   Handle entity
 147: **   -------------
 148: **
 149: ** On entry,
 150: **   s    contains the entity name zero terminated
 151: */
1.42 frystyk 152: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 153:   {
1.42 frystyk 154:    const char ** entities = context->dtd->entity_names;
1.48 ! frystyk 155:    const char *s = HTChunk_data(context->string);
1.1 timbl 156: 
1.42 frystyk 157:    int high, low, i, diff;
 158:    for(low=0, high = context->dtd->number_of_entities;
 159:      high > low ;
 160:      diff < 0 ? (low = i+1) : (high = i))
 161:      {
 162:        i = (low + (high-low)/2);
 163:        diff = strcmp(entities[i], s); /* Case sensitive! */
 164:        if (diff==0)
 165:          {  /* success: found it */
 166:            (*context->actions->put_entity)(context->target, i);
 167:            return;
 168:          }
 169:      }
1.47 frystyk 170: 
 171:    /* If entity string not found */
1.42 frystyk 172:    TRACE1("Unknown entity %s\n", s);
1.47 frystyk 173:    (*context->actions->unparsed_entity)
1.48 ! frystyk 174:      (context->target, HTChunk_data(context->string), HTChunk_size(context->string));
1.35 frystyk 175:   }
1.2 timbl 176: 
1.1 timbl 177: /*   End element
1.2 timbl 178: **   -----------
1.1 timbl 179: */
1.42 frystyk 180: PRIVATE void end_element (HTStream * context, HTTag *tag)
 181:   {
 182:    TRACE1("End  </%s>\n", tag->name);
 183:    (*context->actions->end_element)
 184:        (context->target, tag - context->dtd->tags);
1.1 timbl 185:   }
 186: 
1.17 timbl 187: /*   Start an element
 188: **   ----------------
1.1 timbl 189: */
1.31 frystyk 190: PRIVATE void start_element (HTStream * context)
1.42 frystyk 191:   {
 192:    int i;
 193:    char *value[MAX_ATTRIBUTES];
 194:    HTTag *tag = context->current_tag;
 195: 
 196:    TRACE1("Start <%s>\n", tag->name);
 197:    context->contents = tag->contents;
 198: 
 199:    /*
 200:    ** Build the actual pointers to the value strings stored in the
 201:    ** chunk buffer. (Must use offsets while collecting the values,
 202:    ** because the string chunk may get resized during the collection
 203:    ** and potentially relocated).
 204:    */
 205:    for (i = 0; i < MAX_ATTRIBUTES; ++i)
 206:        value[i] = context->value[i] < 0 ? NULL :
1.48 ! frystyk 207:            HTChunk_data(context->string) + context->value[i];
1.42 frystyk 208:    (*context->actions->start_element)
 209:        (context->target,
 210:         tag - context->dtd->tags,
 211:         context->present,
 212:         (const char**)value); /* coerce type for think c */
1.1 timbl 213:   }
 214: 
 215: 
1.2 timbl 216: /*       Find Tag in DTD tag list
 217: **       ------------------------
1.1 timbl 218: **
 219: ** On entry,
1.2 timbl 220: **   dtd   points to dtd structire including valid tag list
 221: **   string points to name of tag in question
1.1 timbl 222: **
1.2 timbl 223: ** On exit,
 224: **   returns:
1.7 timbl 225: **       NULL      tag not found
 226: **       else      address of tag structure in dtd
1.2 timbl 227: */
1.40 frystyk 228: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 229:   {
 230:    int high, low, i, diff;
 231:    for(low=0, high=dtd->number_of_tags;
 232:      high > low ;
 233:      diff < 0 ? (low = i+1) : (high = i))
 234:      { /* Binary serach */
 235:        i = (low + (high-low)/2);
 236:        diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
 237:        if (diff==0)
 238:            /* success: found it */
 239:            return &dtd->tags[i];
 240:      }
 241:    return NULL;
1.2 timbl 242:   }
 243: 
 244: /*________________________________________________________________________
 245: **           Public Methods
1.1 timbl 246: */
 247: 
1.2 timbl 248: 
 249: /*   Could check that we are back to bottom of stack! @@ */
1.40 frystyk 250: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 251:   {
 252:    return (*context->actions->flush)(context->target);
1.26 frystyk 253:   }
1.1 timbl 254: 
1.40 frystyk 255: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 256:   {
 257:    int status;
1.15 frystyk 258: 
1.42 frystyk 259:    if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 260:        return status;
 261:    HTChunk_delete(context->string);
 262:    HT_FREE(context);
 263:    return HT_OK;
1.15 frystyk 264:   }
1.1 timbl 265: 
1.40 frystyk 266: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 267:   {
 268:    (*context->actions->abort)(context->target, e);
 269:    HTChunk_delete(context->string);
 270:    HT_FREE(context);
 271:    return HT_ERROR;
1.15 frystyk 272:   }
1.1 timbl 273: 
1.41 frystyk 274: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 275:   {
 276:    const SGML_dtd *dtd = context->dtd;
 277:    HTChunk *string = context->string;
 278:    const char *text = b;
 279:    int count = 0;
1.18 timbl 280:    
1.42 frystyk 281:    while (l-- > 0)
 282:      {
 283:        char c = *b++;
 284:        switch(context->state)
 285:          {
 286:          got_element_open:
 287:            /*
 288:            ** The label is jumped when the '>' of a the element
 289:            ** start tag has been detected. This DOES NOT FALL TO
 290:            ** THE CODE S_after_open, only processes the tag and
 291:            ** sets the state (c should still contain the
 292:            ** terminating character of the tag ('>'))
 293:            */
 294:            if (context->current_tag && context->current_tag->name)
 295:                start_element(context);
 296:            context->state = S_after_open;
 297:            break;
1.18 timbl 298: 
1.42 frystyk 299:          case S_after_open:
 300:            /*
 301:            ** State S_after_open is entered only for single
 302:            ** character after the element opening tag to test
 303:            ** against newline. Strip one trainling newline only
 304:            ** after opening nonempty element. - SGML: Ugh!
 305:            */
 306:            text = b;
 307:            count = 0;
 308:            if (c == '\n' && (context->contents != SGML_EMPTY))
 309:              {
 310:                context->state = S_text;
 311:                break;
 312:              }
 313:            --text;
 314:            goto S_text;
 315: 
 316:          S_text:
 317:            context->state = S_text;
 318:          case S_text:
1.13 timbl 319: #ifdef ISO_2022_JP
1.42 frystyk 320:            if (c == '033円')
 321:              {
 322:                context->state = S_esc;
 323:                ++count;
 324:                break;
 325:              }
1.13 timbl 326: #endif /* ISO_2022_JP */
1.42 frystyk 327:            if (c == '&')
 328:              {
 329:                if (count > 0)
 330:                    PUTB(text, count);
 331:                count = 0;
1.48 ! frystyk 332:                HTChunk_clear(string);
1.42 frystyk 333:                context->state = S_ero;
 334:              }
 335:            else if (c == '<')
 336:              {
 337:                if (count > 0)
 338:                    PUTB(text, count);
 339:                count = 0;
1.48 ! frystyk 340:                HTChunk_clear(string);
1.42 frystyk 341:                /* should scrap LITERAL, and use CDATA and
 342:                  RCDATA -- msa */
 343:                context->state =
 344:                    (context->contents == SGML_LITERAL) ?
 345:                        S_literal : S_tag;
 346:              }
 347:            else if (c == '\n')
 348:                /* Newline - ignore if before end tag! */
 349:                context->state = S_nl;
 350:            else
 351:                ++count;
 352:            break;
1.13 timbl 353: 
1.42 frystyk 354:          case S_nl:
 355:            if (c == '<')
 356:              {
 357:                if (count > 0)
 358:                    PUTB(text, count);
 359:                count = 0;
1.48 ! frystyk 360:                HTChunk_clear(string);
1.42 frystyk 361:                context->state =
 362:                    (context->contents == SGML_LITERAL) ?
 363:                        S_literal : S_nl_tago;
 364:              }
 365:            else
 366:              {
 367:                ++count;
 368:                goto S_text;
 369:              }
 370:            break;
1.18 timbl 371: 
1.42 frystyk 372:          case S_nl_tago:   /* Had newline and tag opener */
 373:            if (c != '/')
 374:                PUTC('\n'); /* Only ignore newline before </ */
 375:            context->state = S_tag;
 376:            goto handle_S_tag;
1.18 timbl 377: 
1.13 timbl 378: #ifdef ISO_2022_JP
1.42 frystyk 379:          case S_esc:
 380:            if (c=='$')
 381:                context->state = S_dollar;
 382:            else if (c=='(')
 383:                context->state = S_paren;
 384:            else
 385:                context->state = S_text;
 386:            ++count;
 387:            break;
 388: 
 389:          case S_dollar:
 390:            if (c=='@' || c=='B')
 391:                context->state = S_nonascii_text;
 392:            else
 393:                context->state = S_text;
 394:            ++count;
 395:            break;
 396: 
 397:          case S_paren:
 398:            if (c=='B' || c=='J')
 399:                context->state = S_text;
 400:            else
 401:                context->state = S_text;
 402:            ++count;
 403:            break;
 404: 
 405:          case S_nonascii_text:
 406:            if (c == '033円')
 407:                context->state = S_esc;
 408:            ++count;
 409:            break;
1.13 timbl 410: #endif /* ISO_2022_JP */
1.1 timbl 411: 
1.42 frystyk 412:            /* In literal mode, waits only for specific end tag!
 413:            ** Only foir compatibility with old servers.
 414:            */
 415:          case S_literal:
 416:            HTChunk_putc(string, c);
 417:            if ( TOUPPER(c) !=
1.48 ! frystyk 418:              ((HTChunk_size(string) == 1) ? '/'
 ! 419:               : context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 420:              {
1.1 timbl 421: 
1.42 frystyk 422:                /* If complete match, end literal */
 423:                if ((c == '>') &&
1.48 ! frystyk 424:                  (!context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 425:                  {
 426:                    end_element
 427:                        (context,context->current_tag);
 428:                    /*
 429:                     ...setting SGML_MIXED below is a
 430:                     bit of kludge, but a good guess that
 431:                     currently works, anything other than
 432:                     SGML_LITERAL would work... -- msa */
 433:                    context->contents = SGML_MIXED;
 434:                  }
 435:                else
 436:                  {
 437:                    /* If Mismatch: recover string. */
 438:                    PUTC( '<');
1.48 ! frystyk 439:                    PUTB(HTChunk_data(string), HTChunk_size(string));
1.42 frystyk 440:                  }
 441:                context->state = S_text;
 442:                text = b;
 443:                count = 0;
 444:              }
 445:            break;
1.1 timbl 446: 
1.42 frystyk 447:            /*
 448:            ** Character reference or Entity
 449:            */
 450:          case S_ero:
 451:            if (c == '#')
 452:              {
 453:                /*  &# is Char Ref Open */ 
 454:                context->state = S_cro;
 455:                break;
 456:              }
 457:            context->state = S_entity;
1.1 timbl 458: 
1.42 frystyk 459:            /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 460: 
1.42 frystyk 461:            /*
 462:            ** Handle Entities
 463:            */
 464:          case S_entity:
1.43 frystyk 465:            if (isalnum((int) c))
1.42 frystyk 466:                HTChunk_putc(string, c);
 467:            else
 468:              {
 469:                HTChunk_terminate(string);
 470:                handle_entity(context);
 471:                text = b;
 472:                count = 0;
 473:                if (c != ';')
 474:                  {
 475:                    --text;
 476:                    goto S_text;
 477:                  }
 478:                context->state = S_text;
 479:              }
 480:            break;
1.2 timbl 481: 
1.42 frystyk 482:            /*   Character reference
 483:             */
 484:          case S_cro:
1.43 frystyk 485:            if (isalnum((int)c))
1.42 frystyk 486:                /* accumulate a character NUMBER */
 487:                HTChunk_putc(string, c);
 488:            else
 489:              {
 490:                int value;
 491:                HTChunk_terminate(string);
1.48 ! frystyk 492:                if (sscanf(HTChunk_data(string), "%d", &value)==1)
1.42 frystyk 493:                    PUTC((char)value);
 494:                else
 495:                  {
 496:                    PUTB("&#", 2);
1.48 ! frystyk 497:                    PUTB(HTChunk_data(string), HTChunk_size(string)-1);
1.42 frystyk 498:                  }
 499:                text = b;
 500:                count = 0;
 501:                if (c != ';')
 502:                  {
 503:                    --text;
 504:                    goto S_text;
 505:                  }
 506:                context->state = S_text;
 507:              }
 508:            break;
1.1 timbl 509: 
1.42 frystyk 510:          case S_tag:     /* new tag */
 511:          handle_S_tag:
1.43 frystyk 512:            if (isalnum((int)c))
1.42 frystyk 513:                HTChunk_putc(string, c);
1.48 ! frystyk 514:            else { /* End of tag name */
 ! 515:              int i;
 ! 516:              if (c == '/') {
 ! 517:                if (HTChunk_size(string) > 0)
 ! 518:                  TRACE1("`<%s/' found!\n", HTChunk_data(string));
 ! 519:                context->state = S_end;
 ! 520:                break;
 ! 521:              } else if (c == '!') {
 ! 522:                if (HTChunk_size(string) > 0)
 ! 523:                  TRACE1(" `<%s!' found!\n", HTChunk_data(string));
 ! 524:                context->state = S_md;
 ! 525:                break;
 ! 526:              }
 ! 527:              HTChunk_terminate(string);
 ! 528:              context->current_tag = SGMLFindTag(dtd, HTChunk_data(string));
 ! 529:              if (context->current_tag == NULL) {
 ! 530:                TRACE1("*** Unknown element %s\n", HTChunk_data(string));
 ! 531:                (*context->actions->unparsed_begin_element)
 ! 532:                  (context->target, HTChunk_data(string), HTChunk_size(string));
 ! 533:              } else {
 ! 534:                for (i=0; i<context->current_tag->number_of_attributes; i++) {
 ! 535:                  context->present[i] = NO;
 ! 536:                  context->value[i] = -1;
1.47 frystyk 537:                }
1.42 frystyk 538:              }
1.48 ! frystyk 539:              context->token = 0;
 ! 540:              HTChunk_clear(string);
 ! 541:              context->current_attribute_number = INVALID;
 ! 542:              goto S_tag_gap;
 ! 543:            }
1.42 frystyk 544:            break;
 545: 
 546:          S_tag_gap:
 547:            context->state = S_tag_gap;
 548:          case S_tag_gap:       /* Expecting attribute or > */
1.43 frystyk 549:            if (isspace((int) c))
1.42 frystyk 550:                break; /* Gap between attributes */
 551: 
 552:            if (c == '>')
 553:                goto got_element_open;
 554:            else
 555:                goto S_attr;
 556: 
 557:          S_attr:
 558:            /*
 559:            ** Start collecting the attribute name and collect
 560:            ** it in S_attr.
 561:            */
 562:            context->state = S_attr;
1.48 ! frystyk 563:            HTChunk_truncate(string, context->token);
1.42 frystyk 564:          case S_attr:
1.43 frystyk 565:            if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 566:                goto got_attribute_name;
 567:            else
 568:                HTChunk_putc(string, c);
 569:            break;
 570: 
 571:          got_attribute_name:
 572:            /*
 573:            ** This label is entered when attribute name has been
 574:            ** collected. Process it and enter S_attr_gap for
 575:            ** potential value or start of the next attribute.
 576:            */
 577:            HTChunk_terminate(string) ;
 578:            handle_attribute_name
1.48 ! frystyk 579:                (context, HTChunk_data(string) + context->token);
 ! 580:            HTChunk_truncate(string, context->token);
1.42 frystyk 581:            context->state = S_attr_gap;
 582:          case S_attr_gap:  /* Expecting attribute or = or > */
1.43 frystyk 583:            if (isspace((int) c))
1.42 frystyk 584:                break; /* Gap after attribute */
 585: 
 586:            if (c == '>')
 587:                goto got_element_open;
 588:            else if (c == '=')
 589:                context->state = S_equals;
 590:            else
 591:                goto S_attr; /* Get next attribute */
 592:            break;
 593: 
 594:          case S_equals:   /* After attr = */ 
1.43 frystyk 595:            if (isspace((int) c))
1.42 frystyk 596:                break; /* Before attribute value */
 597: 
 598:            if (c == '>')
 599:              {      /* End of tag */
 600:                TRACE1("found = but no value\n", NULL);
 601:                goto got_element_open;
 602:              }
 603:            else if (c == '\'')
 604:                context->state = S_squoted;
 605:            else if (c == '"')
 606:                context->state = S_dquoted;
 607:            else
 608:                goto S_value;
 609:            break;
 610: 
 611:          S_value:
 612:            context->state = S_value;
1.48 ! frystyk 613:            HTChunk_truncate(string, context->token);
1.42 frystyk 614:          case S_value:
1.43 frystyk 615:            if (isspace((int) c) || c == '>')
1.42 frystyk 616:              {
 617:                HTChunk_terminate(string);
 618:                handle_attribute_value(context);
1.48 ! frystyk 619:                context->token = HTChunk_size(string);
1.42 frystyk 620:                goto S_tag_gap;
 621:              }
 622:            else
 623:                HTChunk_putc(string, c);
 624:            break;
1.1 timbl 625:        
1.42 frystyk 626:          case S_squoted:   /* Quoted attribute value */
 627:            if (c == '\'')
 628:              {
 629:                HTChunk_terminate(string);
 630:                handle_attribute_value(context);
1.48 ! frystyk 631:                context->token = HTChunk_size(string);
1.42 frystyk 632:                context->state = S_tag_gap;
 633:              }
 634:            else if (c && c != '\n' && c != '\r')
 635:                HTChunk_putc(string, c);
 636:            break;
1.1 timbl 637:    
1.42 frystyk 638:          case S_dquoted:   /* Quoted attribute value */
 639:            if (c == '"')
 640:              {
 641:                HTChunk_terminate(string);
 642:                handle_attribute_value(context);
1.48 ! frystyk 643:                context->token = HTChunk_size(string);
1.42 frystyk 644:                context->state = S_tag_gap;
 645:              }
 646:            else if (c && c != '\n' && c != '\r')
 647:                HTChunk_putc(string, c);
 648:            break;
1.2 timbl 649: 
1.42 frystyk 650:          case S_end: /* </ */
1.43 frystyk 651:            if (isalnum((int) c))
1.42 frystyk 652:                HTChunk_putc(string, c);
 653:            else
 654:              {      /* End of end tag name */
 655:                HTTag *t;
1.48 ! frystyk 656:                char * first;
1.42 frystyk 657:                HTChunk_terminate(string);
1.48 ! frystyk 658:                if ((first=HTChunk_data(string))!=NULL && *first != '0円')
 ! 659:                    t = SGMLFindTag(dtd, HTChunk_data(string));
1.42 frystyk 660:                else
 661:                    /* Empty end tag */
 662:                    /* Original code popped here one
 663:                      from the stack. If this feature
 664:                      is required, I have to put the
 665:                      stack back... -- msa */
 666:                    t = NULL;
1.47 frystyk 667:                if (!t) {
1.48 ! frystyk 668:                  TRACE1("Unknown end tag </%s>\n", HTChunk_data(string));
1.47 frystyk 669:                  (*context->actions->unparsed_end_element)
1.48 ! frystyk 670:                    (context->target, HTChunk_data(string), HTChunk_size(string));
1.47 frystyk 671:                } else {
 672:                  context->current_tag = NULL;
 673:                  end_element(context, t);
 674:                }
1.48 ! frystyk 675:                HTChunk_clear(string);
1.42 frystyk 676:                context->current_attribute_number = INVALID;
 677:                if (c != '>')
 678:                  {
1.43 frystyk 679:                    if (!isspace((int) c))
1.42 frystyk 680:                        TRACE2("`</%s%c' found!\n",
1.48 ! frystyk 681:                            HTChunk_data(string), c);
1.42 frystyk 682:                    context->state = S_junk_tag;
 683:                  }
 684:                else
 685:                  {
 686:                    text = b;
 687:                    count = 0;
 688:                    context->state = S_text;
 689:                  }
 690:              }
 691:            break;
 692: 
 693:          case S_junk_tag:
 694:            if (c == '>')
 695:              {
 696:                text = b;
 697:                count = 0;
 698:                context->state = S_text;
 699:              }
 700:            break;
 701: 
 702:            /*
 703:            ** Scanning (actually skipping) declarations
 704:            */
 705:          case S_md:
 706:            if (c == '-')
 707:                context->state = S_com_1;
 708:            else if (c == '"')
 709:                context->state = S_md_dqs;
 710:            else if (c == '\'')
 711:                context->state = S_md_sqs;
 712:            else if (c == '>')
 713:              {
 714:                text = b;
 715:                count = 0;
 716:                context->state = S_text;
 717:              }
 718:            break;
 719: 
 720:          case S_md_dqs: /* Skip double quoted string */
 721:            if (c == '"')
 722:                context->state = S_md;
1.46 frystyk 723:            else if (c == '>')
 724:              {
 725:                text = b;
 726:                count = 0;
 727:                context->state = S_text;
 728:              }
1.42 frystyk 729:            break;
 730: 
 731:          case S_md_sqs: /* Skip single quoted string */
 732:            if (c == '\'')
 733:                context->state = S_md;
1.46 frystyk 734:            else if (c == '>')
 735:              {
 736:                text = b;
 737:                count = 0;
 738:                context->state = S_text;
 739:              }
1.42 frystyk 740:            break;
 741: 
 742:          case S_com_1: /* Starting a comment? */
 743:            context->state = (c == '-') ? S_com : S_md;
1.46 frystyk 744:            if (c == '>')
 745:              {
 746:                text = b;
 747:                count = 0;
 748:                context->state = S_text;
 749:              }
1.42 frystyk 750:            break;
 751: 
 752:          case S_com: /* ..within comment */
 753:            if (c == '-')
 754:                context->state = S_com_2;
 755:            break;
 756: 
 757:          case S_com_2: /* Ending a comment ? */
1.44 frystyk 758:            context->state = (c == '-') ? S_com_2a : S_com;
 759:            break;
 760:          
 761:          case S_com_2a:
 762:            if (c == '>') {
 763:              text = b;
 764:              count = 0;
 765:              context->state = S_text;
 766:            } else
 767:              context->state = S_com;
1.42 frystyk 768:            break;
 769:          }
1.7 timbl 770:      }
1.42 frystyk 771:    if (count > 0)
 772:        PUTB(text, count);
 773:    return HT_OK;
 774:   }
1.1 timbl 775: 
1.2 timbl 776: 
1.40 frystyk 777: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 778:   {
 779:    return SGML_write(context, s, (int) strlen(s));
 780:   }
1.2 timbl 781: 
 782: 
1.41 frystyk 783: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 784:   {
 785:    return SGML_write(context, &c, 1);
 786:   }
1.2 timbl 787: 
 788: /*_______________________________________________________________________
 789: */
 790: 
 791: /*   Structured Object Class
 792: **   -----------------------
 793: */
1.38 frystyk 794: PRIVATE const HTStreamClass SGMLParser = 
1.47 frystyk 795: {
 796:   "SGML",
 797:   SGML_flush,
 798:   SGML_free,
 799:   SGML_abort,
 800:   SGML_character, 
 801:   SGML_string,
 802:   SGML_write
 803: }; 
1.2 timbl 804: 
 805: /*   Create SGML Engine
 806: **   ------------------
 807: **
 808: ** On entry,
 809: **   dtd       represents the DTD, along with
 810: **   actions     is the sink for the data as a set of routines.
 811: **
 812: */
1.42 frystyk 813: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
1.47 frystyk 814: {
 815:   int i;
 816:   HTStream* context;
 817:   if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
 818:    HT_OUTOFMEM("SGML_begin");
 819: 
 820:   context->isa = &SGMLParser;
 821:   context->string = HTChunk_new(128);    /* Grow by this much */
 822:   context->dtd = dtd;
 823:   context->target = target;
 824:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 825:   /* Ugh: no OO */
 826:   context->state = S_text;
 827:   for(i=0; i<MAX_ATTRIBUTES; i++)
 828:    context->value[i] = 0;
 829:   return context;
 830: }
 831: 
 832: PUBLIC HTTag * SGML_findTag (SGML_dtd * dtd, int element_number)
 833: {
 834:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 835:    (dtd->tags+element_number) : NULL;
 836: }
 837: 
 838: PUBLIC char * SGML_findTagName (SGML_dtd * dtd, int element_number)
 839: {
 840:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 841:    (dtd->tags+element_number)->name : NULL;
 842: }
 843: 
 844: PUBLIC SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number)
 845: {
 846:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 847:    (dtd->tags+element_number)->contents : SGML_ELEMENT;
 848: }
 849: 
 850: PUBLIC char * HTTag_name (HTTag * tag)
 851: {
 852:   return tag ? tag->name : NULL;
 853: }
 854: 
 855: PUBLIC int HTTag_attributes (HTTag * tag)
 856: {
 857:   return tag ? tag->number_of_attributes : -1;
 858: }
 859: 
 860: PUBLIC char * HTTag_attributeName (HTTag * tag, int attribute_number)
 861: {
 862:   return (tag && attribute_number>=0 && attribute_number<tag->number_of_attributes) ?
 863:    (tag->attributes+attribute_number)->name : NULL;
 864: }
 865: 
 866: PUBLIC char * HTAttr_name (HTAttr * attr)
 867: {
 868:   return attr ? attr->name : NULL;
 869: }

Webmaster

AltStyle によって変換されたページ (->オリジナル) /