[BACK] Return to SGML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/SGML.c, revision 1.51

1.23 frystyk 1: /*                                   SGML.c
 2: **   GENERAL SGML PARSER CODE
 3: **
1.27 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.23 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.51 ! frystyk 6: **   @(#) $Id: SGML.c,v 1.50 1999年02月22日 01:04:24 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This module implements an HTStream object. To parse an
1.1 timbl 9: **   SGML file, create this object which is a parser. The object
1.2 timbl 10: **   is (currently) created by being passed a DTD structure,
 11: **   and a target HTStructured oject at which to throw the parsed stuff.
1.1 timbl 12: **   
1.19 duns 13: **   6 Feb 93    Binary seraches used. Intreface modified.
 14: **   8 Jul 94 FM  Insulate free() from _free structure element.
1.42 frystyk 15: **   Nov 1996  msa Strip down the parser to minimal HTML tokenizer,
 16: **           Stop allocating space for the attribute values,
 17: **           use pointers to the string chunk instead.
1.1 timbl 18: */
 19: 
1.25 frystyk 20: /* Library include files */
1.45 frystyk 21: #include "wwwsys.h"
1.1 timbl 22: #include "HTUtils.h"
1.25 frystyk 23: #include "HTString.h"
1.1 timbl 24: #include "HTChunk.h"
1.20 frystyk 25: #include "SGML.h"
1.1 timbl 26: 
1.2 timbl 27: #define INVALID (-1)
 28: 
1.1 timbl 29: /*   The State (context) of the parser
 30: **
1.2 timbl 31: **   This is passed with each call to make the parser reentrant
1.1 timbl 32: **
 33: */
1.42 frystyk 34: typedef enum _sgml_state
 35:   {
 36:    S_text, S_literal, S_tag, S_tag_gap, 
 37:    S_attr, S_attr_gap, S_equals, S_value, S_after_open,
 38:    S_nl, S_nl_tago,
 39:    S_ero, S_cro,
1.21 frystyk 40: #ifdef ISO_2022_JP
1.42 frystyk 41:    S_esc, S_dollar, S_paren, S_nonascii_text,
1.21 frystyk 42: #endif
1.42 frystyk 43:    S_squoted, S_dquoted, S_end, S_entity, S_junk_tag,
1.44 frystyk 44:    S_md, S_md_sqs, S_md_dqs, S_com_1, S_com, S_com_2, S_com_2a
1.42 frystyk 45:   } sgml_state;
1.21 frystyk 46: 
 47: 
1.2 timbl 48: /*   Internal Context Data Structure
 49: **   -------------------------------
 50: */
1.42 frystyk 51: struct _HTStream
 52:   {
 53:    const HTStreamClass *isa;    /* inherited from HTStream */
 54:    const SGML_dtd *dtd;
 55:    HTStructuredClass *actions;   /* target class */
 56:    HTStructured *target;      /* target object */
1.2 timbl 57: 
1.42 frystyk 58:    HTTag *current_tag;
 59:    int current_attribute_number;
 60:    SGMLContent contents;      /* current content mode */
 61:    HTChunk *string;
 62:    int token;           /* ptr into string buffer */
 63:    sgml_state state;
 64:    BOOL present[MAX_ATTRIBUTES];  /* Flags: attribute is present? */
 65:    int value[MAX_ATTRIBUTES];   /* Offset pointers to the string */
 66:   };
1.2 timbl 67: 
 68: 
 69: #define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
1.42 frystyk 70: #define PUTB(b,l) ((*context->actions->put_block)(context->target, b, l))
1.2 timbl 71: 
1.17 timbl 72: /*   Find Attribute Number
 73: **   ---------------------
 74: */
1.40 frystyk 75: PRIVATE int SGMLFindAttribute (HTTag* tag, const char * s)
1.42 frystyk 76:   {
1.47 frystyk 77:    HTAttr* attributes = tag->attributes;
1.17 timbl 78: 
1.42 frystyk 79:    int high, low, i, diff;     /* Binary search for attribute name */
 80:    for(low=0, high=tag->number_of_attributes;
 81:      high > low ;
 82:      diff < 0 ? (low = i+1) : (high = i) )
 83:      {
 84:        i = (low + (high-low)/2);
 85:        diff = strcasecomp(attributes[i].name, s);
 86:        if (diff==0)
 87:            return i;    /* success: found it */
 88:      }
 89:    return -1;
 90:   }
1.17 timbl 91: 
1.1 timbl 92: 
 93: /*   Handle Attribute
 94: **   ----------------
 95: */
1.38 frystyk 96: /* PUBLIC const char * SGML_default = "";  ?? */
1.1 timbl 97: 
1.38 frystyk 98: PRIVATE void handle_attribute_name (HTStream * context, const char * s)
1.42 frystyk 99:   {
 100:    HTTag * tag = context->current_tag;
1.2 timbl 101: 
1.42 frystyk 102:    /* Note: if tag==NULL, we are skipping unknown tag... */
 103:    if (tag)
 104:      {
 105:        int i = SGMLFindAttribute(tag, s);
 106:        if (i >= 0)
 107:          {
 108:            context->current_attribute_number = i;
 109:            context->present[i] = YES;
 110:            return;
 111:          }
1.51 ! frystyk 112:        HTTRACE(SGML_TRACE, "Unknown attribute %s for tag %s\n" _
 ! 113:            s _ context->current_tag->name);
1.42 frystyk 114:      }
 115:    context->current_attribute_number = INVALID;  /* Invalid */
 116:   }
1.2 timbl 117: 
1.1 timbl 118: 
 119: /*   Handle attribute value
 120: **   ----------------------
 121: */
1.42 frystyk 122: PRIVATE void handle_attribute_value (HTStream * context)
 123:   {
 124:    /* Deal with attributes only if tag is known,
 125:      ignore silently otherwise */
 126: 
 127:    if (context->current_tag)
 128:      {
 129:        if (context->current_attribute_number != INVALID)
 130:            context->value[context->current_attribute_number] =
 131:                context->token;
1.48 frystyk 132:        else {
 133:          char * data = HTChunk_data(context->string);
1.51 ! frystyk 134:          HTTRACE(SGML_TRACE, "Attribute value %s ignored\n" _
1.48 frystyk 135:              data ? data+context->token : "<null>");
 136:        }
1.42 frystyk 137:      }
 138:    context->current_attribute_number = INVALID; /* can't have two assignments! */
1.1 timbl 139:   }
 140: 
 141: /*   Handle entity
 142: **   -------------
 143: **
 144: ** On entry,
 145: **   s    contains the entity name zero terminated
 146: */
1.42 frystyk 147: PRIVATE void handle_entity (HTStream * context)
1.1 timbl 148:   {
1.42 frystyk 149:    const char ** entities = context->dtd->entity_names;
1.48 frystyk 150:    const char *s = HTChunk_data(context->string);
1.1 timbl 151: 
1.42 frystyk 152:    int high, low, i, diff;
 153:    for(low=0, high = context->dtd->number_of_entities;
 154:      high > low ;
 155:      diff < 0 ? (low = i+1) : (high = i))
 156:      {
 157:        i = (low + (high-low)/2);
 158:        diff = strcmp(entities[i], s); /* Case sensitive! */
 159:        if (diff==0)
 160:          {  /* success: found it */
 161:            (*context->actions->put_entity)(context->target, i);
 162:            return;
 163:          }
 164:      }
1.47 frystyk 165: 
 166:    /* If entity string not found */
1.51 ! frystyk 167:    HTTRACE(SGML_TRACE, "Unknown entity %s\n" _ s);
1.47 frystyk 168:    (*context->actions->unparsed_entity)
1.48 frystyk 169:      (context->target, HTChunk_data(context->string), HTChunk_size(context->string));
1.35 frystyk 170:   }
1.2 timbl 171: 
1.1 timbl 172: /*   End element
1.2 timbl 173: **   -----------
1.1 timbl 174: */
1.42 frystyk 175: PRIVATE void end_element (HTStream * context, HTTag *tag)
 176:   {
1.51 ! frystyk 177:    HTTRACE(SGML_TRACE, "End  </%s>\n" _ tag->name);
1.42 frystyk 178:    (*context->actions->end_element)
 179:        (context->target, tag - context->dtd->tags);
1.1 timbl 180:   }
 181: 
1.17 timbl 182: /*   Start an element
 183: **   ----------------
1.1 timbl 184: */
1.31 frystyk 185: PRIVATE void start_element (HTStream * context)
1.42 frystyk 186:   {
 187:    int i;
 188:    char *value[MAX_ATTRIBUTES];
 189:    HTTag *tag = context->current_tag;
 190: 
1.51 ! frystyk 191:    HTTRACE(SGML_TRACE, "Start <%s>\n" _ tag->name);
1.42 frystyk 192:    context->contents = tag->contents;
 193: 
 194:    /*
 195:    ** Build the actual pointers to the value strings stored in the
 196:    ** chunk buffer. (Must use offsets while collecting the values,
 197:    ** because the string chunk may get resized during the collection
 198:    ** and potentially relocated).
 199:    */
 200:    for (i = 0; i < MAX_ATTRIBUTES; ++i)
 201:        value[i] = context->value[i] < 0 ? NULL :
1.48 frystyk 202:            HTChunk_data(context->string) + context->value[i];
1.42 frystyk 203:    (*context->actions->start_element)
 204:        (context->target,
 205:         tag - context->dtd->tags,
 206:         context->present,
 207:         (const char**)value); /* coerce type for think c */
1.1 timbl 208:   }
 209: 
 210: 
1.2 timbl 211: /*       Find Tag in DTD tag list
 212: **       ------------------------
1.1 timbl 213: **
 214: ** On entry,
1.2 timbl 215: **   dtd   points to dtd structire including valid tag list
 216: **   string points to name of tag in question
1.1 timbl 217: **
1.2 timbl 218: ** On exit,
 219: **   returns:
1.7 timbl 220: **       NULL      tag not found
 221: **       else      address of tag structure in dtd
1.2 timbl 222: */
1.40 frystyk 223: PRIVATE HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string)
1.42 frystyk 224:   {
 225:    int high, low, i, diff;
 226:    for(low=0, high=dtd->number_of_tags;
 227:      high > low ;
 228:      diff < 0 ? (low = i+1) : (high = i))
 229:      { /* Binary serach */
 230:        i = (low + (high-low)/2);
 231:        diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
 232:        if (diff==0)
 233:            /* success: found it */
 234:            return &dtd->tags[i];
 235:      }
 236:    return NULL;
1.2 timbl 237:   }
 238: 
 239: /*________________________________________________________________________
 240: **           Public Methods
1.1 timbl 241: */
 242: 
1.2 timbl 243: 
 244: /*   Could check that we are back to bottom of stack! @@ */
1.40 frystyk 245: PRIVATE int SGML_flush (HTStream * context)
1.42 frystyk 246:   {
 247:    return (*context->actions->flush)(context->target);
1.26 frystyk 248:   }
1.1 timbl 249: 
1.40 frystyk 250: PRIVATE int SGML_free (HTStream * context)
1.42 frystyk 251:   {
 252:    int status;
1.15 frystyk 253: 
1.42 frystyk 254:    if ((status = (*context->actions->_free)(context->target)) != HT_OK)
 255:        return status;
 256:    HTChunk_delete(context->string);
 257:    HT_FREE(context);
 258:    return HT_OK;
1.15 frystyk 259:   }
1.1 timbl 260: 
1.40 frystyk 261: PRIVATE int SGML_abort (HTStream * context, HTList * e)
1.42 frystyk 262:   {
 263:    (*context->actions->abort)(context->target, e);
 264:    HTChunk_delete(context->string);
 265:    HT_FREE(context);
 266:    return HT_ERROR;
1.15 frystyk 267:   }
1.1 timbl 268: 
1.41 frystyk 269: PRIVATE int SGML_write (HTStream * context, const char * b, int l)
1.42 frystyk 270:   {
 271:    const SGML_dtd *dtd = context->dtd;
 272:    HTChunk *string = context->string;
 273:    const char *text = b;
 274:    int count = 0;
1.18 timbl 275:    
1.42 frystyk 276:    while (l-- > 0)
 277:      {
 278:        char c = *b++;
 279:        switch(context->state)
 280:          {
 281:          got_element_open:
 282:            /*
 283:            ** The label is jumped when the '>' of a the element
 284:            ** start tag has been detected. This DOES NOT FALL TO
 285:            ** THE CODE S_after_open, only processes the tag and
 286:            ** sets the state (c should still contain the
 287:            ** terminating character of the tag ('>'))
 288:            */
 289:            if (context->current_tag && context->current_tag->name)
 290:                start_element(context);
 291:            context->state = S_after_open;
 292:            break;
1.18 timbl 293: 
1.42 frystyk 294:          case S_after_open:
 295:            /*
 296:            ** State S_after_open is entered only for single
 297:            ** character after the element opening tag to test
 298:            ** against newline. Strip one trainling newline only
 299:            ** after opening nonempty element. - SGML: Ugh!
 300:            */
 301:            text = b;
 302:            count = 0;
 303:            if (c == '\n' && (context->contents != SGML_EMPTY))
 304:              {
 305:                context->state = S_text;
 306:                break;
 307:              }
 308:            --text;
 309:            goto S_text;
 310: 
 311:          S_text:
 312:            context->state = S_text;
 313:          case S_text:
1.13 timbl 314: #ifdef ISO_2022_JP
1.42 frystyk 315:            if (c == '033円')
 316:              {
 317:                context->state = S_esc;
 318:                ++count;
 319:                break;
 320:              }
1.13 timbl 321: #endif /* ISO_2022_JP */
1.42 frystyk 322:            if (c == '&')
 323:              {
 324:                if (count > 0)
 325:                    PUTB(text, count);
 326:                count = 0;
1.48 frystyk 327:                HTChunk_clear(string);
1.42 frystyk 328:                context->state = S_ero;
 329:              }
 330:            else if (c == '<')
 331:              {
 332:                if (count > 0)
 333:                    PUTB(text, count);
 334:                count = 0;
1.48 frystyk 335:                HTChunk_clear(string);
1.42 frystyk 336:                /* should scrap LITERAL, and use CDATA and
 337:                  RCDATA -- msa */
 338:                context->state =
 339:                    (context->contents == SGML_LITERAL) ?
 340:                        S_literal : S_tag;
 341:              }
 342:            else if (c == '\n')
 343:                /* Newline - ignore if before end tag! */
 344:                context->state = S_nl;
 345:            else
 346:                ++count;
 347:            break;
1.13 timbl 348: 
1.42 frystyk 349:          case S_nl:
 350:            if (c == '<')
 351:              {
 352:                if (count > 0)
 353:                    PUTB(text, count);
 354:                count = 0;
1.48 frystyk 355:                HTChunk_clear(string);
1.42 frystyk 356:                context->state =
 357:                    (context->contents == SGML_LITERAL) ?
 358:                        S_literal : S_nl_tago;
 359:              }
 360:            else
 361:              {
 362:                ++count;
 363:                goto S_text;
 364:              }
 365:            break;
1.18 timbl 366: 
1.42 frystyk 367:          case S_nl_tago:   /* Had newline and tag opener */
 368:            if (c != '/')
 369:                PUTC('\n'); /* Only ignore newline before </ */
 370:            context->state = S_tag;
 371:            goto handle_S_tag;
1.18 timbl 372: 
1.13 timbl 373: #ifdef ISO_2022_JP
1.42 frystyk 374:          case S_esc:
 375:            if (c=='$')
 376:                context->state = S_dollar;
 377:            else if (c=='(')
 378:                context->state = S_paren;
 379:            else
 380:                context->state = S_text;
 381:            ++count;
 382:            break;
 383: 
 384:          case S_dollar:
 385:            if (c=='@' || c=='B')
 386:                context->state = S_nonascii_text;
 387:            else
 388:                context->state = S_text;
 389:            ++count;
 390:            break;
 391: 
 392:          case S_paren:
 393:            if (c=='B' || c=='J')
 394:                context->state = S_text;
 395:            else
 396:                context->state = S_text;
 397:            ++count;
 398:            break;
 399: 
 400:          case S_nonascii_text:
 401:            if (c == '033円')
 402:                context->state = S_esc;
 403:            ++count;
 404:            break;
1.13 timbl 405: #endif /* ISO_2022_JP */
1.1 timbl 406: 
1.42 frystyk 407:            /* In literal mode, waits only for specific end tag!
 408:            ** Only foir compatibility with old servers.
 409:            */
 410:          case S_literal:
 411:            HTChunk_putc(string, c);
 412:            if ( TOUPPER(c) !=
1.48 frystyk 413:              ((HTChunk_size(string) == 1) ? '/'
 414:               : context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 415:              {
1.1 timbl 416: 
1.42 frystyk 417:                /* If complete match, end literal */
 418:                if ((c == '>') &&
1.48 frystyk 419:                  (!context->current_tag->name[HTChunk_size(string)-2]))
1.42 frystyk 420:                  {
 421:                    end_element
 422:                        (context,context->current_tag);
 423:                    /*
 424:                     ...setting SGML_MIXED below is a
 425:                     bit of kludge, but a good guess that
 426:                     currently works, anything other than
 427:                     SGML_LITERAL would work... -- msa */
 428:                    context->contents = SGML_MIXED;
 429:                  }
 430:                else
 431:                  {
 432:                    /* If Mismatch: recover string. */
 433:                    PUTC( '<');
1.48 frystyk 434:                    PUTB(HTChunk_data(string), HTChunk_size(string));
1.42 frystyk 435:                  }
 436:                context->state = S_text;
 437:                text = b;
 438:                count = 0;
 439:              }
 440:            break;
1.1 timbl 441: 
1.42 frystyk 442:            /*
 443:            ** Character reference or Entity
 444:            */
 445:          case S_ero:
 446:            if (c == '#')
 447:              {
 448:                /*  &# is Char Ref Open */ 
 449:                context->state = S_cro;
 450:                break;
 451:              }
 452:            context->state = S_entity;
1.1 timbl 453: 
1.42 frystyk 454:            /** FALL THROUGH TO S_entity !! ***/
1.18 timbl 455: 
1.42 frystyk 456:            /*
 457:            ** Handle Entities
 458:            */
 459:          case S_entity:
1.43 frystyk 460:            if (isalnum((int) c))
1.42 frystyk 461:                HTChunk_putc(string, c);
 462:            else
 463:              {
 464:                HTChunk_terminate(string);
 465:                handle_entity(context);
 466:                text = b;
 467:                count = 0;
 468:                if (c != ';')
 469:                  {
 470:                    --text;
 471:                    goto S_text;
 472:                  }
 473:                context->state = S_text;
 474:              }
 475:            break;
1.2 timbl 476: 
1.42 frystyk 477:            /*   Character reference
 478:             */
 479:          case S_cro:
1.43 frystyk 480:            if (isalnum((int)c))
1.42 frystyk 481:                /* accumulate a character NUMBER */
 482:                HTChunk_putc(string, c);
 483:            else
 484:              {
 485:                int value;
 486:                HTChunk_terminate(string);
1.48 frystyk 487:                if (sscanf(HTChunk_data(string), "%d", &value)==1)
1.42 frystyk 488:                    PUTC((char)value);
 489:                else
 490:                  {
 491:                    PUTB("&#", 2);
1.48 frystyk 492:                    PUTB(HTChunk_data(string), HTChunk_size(string)-1);
1.42 frystyk 493:                  }
 494:                text = b;
 495:                count = 0;
 496:                if (c != ';')
 497:                  {
 498:                    --text;
 499:                    goto S_text;
 500:                  }
 501:                context->state = S_text;
 502:              }
 503:            break;
1.1 timbl 504: 
1.42 frystyk 505:          case S_tag:     /* new tag */
 506:          handle_S_tag:
1.43 frystyk 507:            if (isalnum((int)c))
1.42 frystyk 508:                HTChunk_putc(string, c);
1.48 frystyk 509:            else { /* End of tag name */
 510:              int i;
 511:              if (c == '/') {
 512:                if (HTChunk_size(string) > 0)
1.51 ! frystyk 513:                  HTTRACE(SGML_TRACE, "`<%s/' found!\n" _ HTChunk_data(string));
1.48 frystyk 514:                context->state = S_end;
 515:                break;
 516:              } else if (c == '!') {
 517:                if (HTChunk_size(string) > 0)
1.51 ! frystyk 518:                  HTTRACE(SGML_TRACE, " `<%s!' found!\n" _ HTChunk_data(string));
1.48 frystyk 519:                context->state = S_md;
 520:                break;
 521:              }
 522:              HTChunk_terminate(string);
 523:              context->current_tag = SGMLFindTag(dtd, HTChunk_data(string));
 524:              if (context->current_tag == NULL) {
1.51 ! frystyk 525:                HTTRACE(SGML_TRACE, "*** Unknown element %s\n" _ HTChunk_data(string));
1.48 frystyk 526:                (*context->actions->unparsed_begin_element)
 527:                  (context->target, HTChunk_data(string), HTChunk_size(string));
 528:              } else {
 529:                for (i=0; i<context->current_tag->number_of_attributes; i++) {
 530:                  context->present[i] = NO;
 531:                  context->value[i] = -1;
1.47 frystyk 532:                }
1.42 frystyk 533:              }
1.48 frystyk 534:              context->token = 0;
 535:              HTChunk_clear(string);
 536:              context->current_attribute_number = INVALID;
 537:              goto S_tag_gap;
 538:            }
1.42 frystyk 539:            break;
 540: 
 541:          S_tag_gap:
 542:            context->state = S_tag_gap;
 543:          case S_tag_gap:       /* Expecting attribute or > */
1.43 frystyk 544:            if (isspace((int) c))
1.42 frystyk 545:                break; /* Gap between attributes */
 546: 
 547:            if (c == '>')
 548:                goto got_element_open;
 549:            else
 550:                goto S_attr;
 551: 
 552:          S_attr:
 553:            /*
 554:            ** Start collecting the attribute name and collect
 555:            ** it in S_attr.
 556:            */
 557:            context->state = S_attr;
1.48 frystyk 558:            HTChunk_truncate(string, context->token);
1.42 frystyk 559:          case S_attr:
1.43 frystyk 560:            if (isspace((int) c) || c == '>' || c == '=')
1.42 frystyk 561:                goto got_attribute_name;
 562:            else
 563:                HTChunk_putc(string, c);
 564:            break;
 565: 
 566:          got_attribute_name:
 567:            /*
 568:            ** This label is entered when attribute name has been
 569:            ** collected. Process it and enter S_attr_gap for
 570:            ** potential value or start of the next attribute.
 571:            */
 572:            HTChunk_terminate(string) ;
 573:            handle_attribute_name
1.48 frystyk 574:                (context, HTChunk_data(string) + context->token);
 575:            HTChunk_truncate(string, context->token);
1.42 frystyk 576:            context->state = S_attr_gap;
 577:          case S_attr_gap:  /* Expecting attribute or = or > */
1.43 frystyk 578:            if (isspace((int) c))
1.42 frystyk 579:                break; /* Gap after attribute */
 580: 
 581:            if (c == '>')
 582:                goto got_element_open;
 583:            else if (c == '=')
 584:                context->state = S_equals;
 585:            else
 586:                goto S_attr; /* Get next attribute */
 587:            break;
 588: 
 589:          case S_equals:   /* After attr = */ 
1.43 frystyk 590:            if (isspace((int) c))
1.42 frystyk 591:                break; /* Before attribute value */
 592: 
 593:            if (c == '>')
 594:              {      /* End of tag */
1.51 ! frystyk 595:                HTTRACE(SGML_TRACE, "found = but no value\n");
1.42 frystyk 596:                goto got_element_open;
 597:              }
 598:            else if (c == '\'')
 599:                context->state = S_squoted;
 600:            else if (c == '"')
 601:                context->state = S_dquoted;
 602:            else
 603:                goto S_value;
 604:            break;
 605: 
 606:          S_value:
 607:            context->state = S_value;
1.48 frystyk 608:            HTChunk_truncate(string, context->token);
1.42 frystyk 609:          case S_value:
1.43 frystyk 610:            if (isspace((int) c) || c == '>')
1.42 frystyk 611:              {
 612:                HTChunk_terminate(string);
 613:                handle_attribute_value(context);
1.48 frystyk 614:                context->token = HTChunk_size(string);
1.42 frystyk 615:                goto S_tag_gap;
 616:              }
 617:            else
 618:                HTChunk_putc(string, c);
 619:            break;
1.1 timbl 620:        
1.42 frystyk 621:          case S_squoted:   /* Quoted attribute value */
 622:            if (c == '\'')
 623:              {
 624:                HTChunk_terminate(string);
 625:                handle_attribute_value(context);
1.48 frystyk 626:                context->token = HTChunk_size(string);
1.42 frystyk 627:                context->state = S_tag_gap;
 628:              }
 629:            else if (c && c != '\n' && c != '\r')
 630:                HTChunk_putc(string, c);
 631:            break;
1.1 timbl 632:    
1.42 frystyk 633:          case S_dquoted:   /* Quoted attribute value */
 634:            if (c == '"')
 635:              {
 636:                HTChunk_terminate(string);
 637:                handle_attribute_value(context);
1.48 frystyk 638:                context->token = HTChunk_size(string);
1.42 frystyk 639:                context->state = S_tag_gap;
 640:              }
 641:            else if (c && c != '\n' && c != '\r')
 642:                HTChunk_putc(string, c);
 643:            break;
1.2 timbl 644: 
1.42 frystyk 645:          case S_end: /* </ */
1.43 frystyk 646:            if (isalnum((int) c))
1.42 frystyk 647:                HTChunk_putc(string, c);
 648:            else
 649:              {      /* End of end tag name */
 650:                HTTag *t;
1.48 frystyk 651:                char * first;
1.42 frystyk 652:                HTChunk_terminate(string);
1.48 frystyk 653:                if ((first=HTChunk_data(string))!=NULL && *first != '0円')
 654:                    t = SGMLFindTag(dtd, HTChunk_data(string));
1.42 frystyk 655:                else
 656:                    /* Empty end tag */
 657:                    /* Original code popped here one
 658:                      from the stack. If this feature
 659:                      is required, I have to put the
 660:                      stack back... -- msa */
 661:                    t = NULL;
1.47 frystyk 662:                if (!t) {
1.51 ! frystyk 663:                  HTTRACE(SGML_TRACE, "Unknown end tag </%s>\n" _ HTChunk_data(string));
1.47 frystyk 664:                  (*context->actions->unparsed_end_element)
1.48 frystyk 665:                    (context->target, HTChunk_data(string), HTChunk_size(string));
1.47 frystyk 666:                } else {
 667:                  context->current_tag = NULL;
 668:                  end_element(context, t);
 669:                }
1.48 frystyk 670:                HTChunk_clear(string);
1.42 frystyk 671:                context->current_attribute_number = INVALID;
 672:                if (c != '>')
 673:                  {
1.43 frystyk 674:                    if (!isspace((int) c))
1.51 ! frystyk 675:                      HTTRACE(SGML_TRACE, "`</%s%c' found!\n" _ HTChunk_data(string) _ c);
1.42 frystyk 676:                    context->state = S_junk_tag;
 677:                  }
 678:                else
 679:                  {
 680:                    text = b;
 681:                    count = 0;
 682:                    context->state = S_text;
 683:                  }
 684:              }
 685:            break;
 686: 
 687:          case S_junk_tag:
 688:            if (c == '>')
 689:              {
 690:                text = b;
 691:                count = 0;
 692:                context->state = S_text;
 693:              }
 694:            break;
 695: 
 696:            /*
 697:            ** Scanning (actually skipping) declarations
 698:            */
 699:          case S_md:
 700:            if (c == '-')
 701:                context->state = S_com_1;
 702:            else if (c == '"')
 703:                context->state = S_md_dqs;
 704:            else if (c == '\'')
 705:                context->state = S_md_sqs;
 706:            else if (c == '>')
 707:              {
 708:                text = b;
 709:                count = 0;
 710:                context->state = S_text;
 711:              }
 712:            break;
 713: 
 714:          case S_md_dqs: /* Skip double quoted string */
 715:            if (c == '"')
 716:                context->state = S_md;
1.46 frystyk 717:            else if (c == '>')
 718:              {
 719:                text = b;
 720:                count = 0;
 721:                context->state = S_text;
 722:              }
1.42 frystyk 723:            break;
 724: 
 725:          case S_md_sqs: /* Skip single quoted string */
 726:            if (c == '\'')
 727:                context->state = S_md;
1.46 frystyk 728:            else if (c == '>')
 729:              {
 730:                text = b;
 731:                count = 0;
 732:                context->state = S_text;
 733:              }
1.42 frystyk 734:            break;
 735: 
 736:          case S_com_1: /* Starting a comment? */
 737:            context->state = (c == '-') ? S_com : S_md;
1.46 frystyk 738:            if (c == '>')
 739:              {
 740:                text = b;
 741:                count = 0;
 742:                context->state = S_text;
 743:              }
1.42 frystyk 744:            break;
 745: 
 746:          case S_com: /* ..within comment */
 747:            if (c == '-')
 748:                context->state = S_com_2;
 749:            break;
 750: 
 751:          case S_com_2: /* Ending a comment ? */
1.44 frystyk 752:            context->state = (c == '-') ? S_com_2a : S_com;
 753:            break;
 754:          
 755:          case S_com_2a:
 756:            if (c == '>') {
 757:              text = b;
 758:              count = 0;
 759:              context->state = S_text;
 760:            } else
 761:              context->state = S_com;
1.42 frystyk 762:            break;
 763:          }
1.7 timbl 764:      }
1.42 frystyk 765:    if (count > 0)
 766:        PUTB(text, count);
 767:    return HT_OK;
 768:   }
1.1 timbl 769: 
1.2 timbl 770: 
1.40 frystyk 771: PRIVATE int SGML_string (HTStream * context, const char* s)
1.42 frystyk 772:   {
 773:    return SGML_write(context, s, (int) strlen(s));
 774:   }
1.2 timbl 775: 
 776: 
1.41 frystyk 777: PRIVATE int SGML_character (HTStream * context, char c)
1.42 frystyk 778:   {
 779:    return SGML_write(context, &c, 1);
 780:   }
1.2 timbl 781: 
 782: /*_______________________________________________________________________
 783: */
 784: 
 785: /*   Structured Object Class
 786: **   -----------------------
 787: */
1.38 frystyk 788: PRIVATE const HTStreamClass SGMLParser = 
1.47 frystyk 789: {
 790:   "SGML",
 791:   SGML_flush,
 792:   SGML_free,
 793:   SGML_abort,
 794:   SGML_character, 
 795:   SGML_string,
 796:   SGML_write
 797: }; 
1.2 timbl 798: 
 799: /*   Create SGML Engine
 800: **   ------------------
 801: **
 802: ** On entry,
 803: **   dtd       represents the DTD, along with
 804: **   actions     is the sink for the data as a set of routines.
 805: **
 806: */
1.42 frystyk 807: PUBLIC HTStream *SGML_new(const SGML_dtd * dtd, HTStructured * target)
1.47 frystyk 808: {
 809:   int i;
 810:   HTStream* context;
 811:   if ((context = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
 812:    HT_OUTOFMEM("SGML_begin");
 813: 
 814:   context->isa = &SGMLParser;
 815:   context->string = HTChunk_new(128);    /* Grow by this much */
 816:   context->dtd = dtd;
 817:   context->target = target;
 818:   context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
 819:   /* Ugh: no OO */
 820:   context->state = S_text;
 821:   for(i=0; i<MAX_ATTRIBUTES; i++)
 822:    context->value[i] = 0;
 823:   return context;
 824: }
 825: 
 826: PUBLIC HTTag * SGML_findTag (SGML_dtd * dtd, int element_number)
 827: {
 828:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 829:    (dtd->tags+element_number) : NULL;
 830: }
 831: 
 832: PUBLIC char * SGML_findTagName (SGML_dtd * dtd, int element_number)
 833: {
 834:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 835:    (dtd->tags+element_number)->name : NULL;
 836: }
 837: 
 838: PUBLIC SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number)
 839: {
 840:   return (dtd && element_number>=0 && element_number<dtd->number_of_tags) ?
 841:    (dtd->tags+element_number)->contents : SGML_ELEMENT;
 842: }
 843: 
1.50 frystyk 844: PUBLIC int SGML_findElementNumber (SGML_dtd * dtd, char * name_element)
 845: {
 846:   if (dtd && name_element) {
 847:    int i;
 848:    HTTag *ct;
 849:    for (i = 0; i< dtd->number_of_tags; i++) {
 850:      ct = &(dtd->tags[i]);
 851:      if (!strcasecomp(ct->name,name_element))
 852:        return i;
 853:    }
 854:   }
 855:   return -1;
 856: }
 857: 
1.47 frystyk 858: PUBLIC char * HTTag_name (HTTag * tag)
 859: {
 860:   return tag ? tag->name : NULL;
1.49 frystyk 861: }
 862: 
 863: PUBLIC SGMLContent HTTag_content (HTTag * tag)
 864: {
 865:   return tag ? tag->contents : SGML_EMPTY;
1.47 frystyk 866: }
 867: 
 868: PUBLIC int HTTag_attributes (HTTag * tag)
 869: {
 870:   return tag ? tag->number_of_attributes : -1;
 871: }
 872: 
 873: PUBLIC char * HTTag_attributeName (HTTag * tag, int attribute_number)
 874: {
 875:   return (tag && attribute_number>=0 && attribute_number<tag->number_of_attributes) ?
 876:    (tag->attributes+attribute_number)->name : NULL;
 877: }
 878: 
 879: PUBLIC char * HTAttr_name (HTAttr * attr)
 880: {
 881:   return attr ? attr->name : NULL;
 882: }
1.50 frystyk 883: 
 884: 
 885: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /