[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.69

1.39 frystyk 1: /*                                   HTML.c
 2: **   STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
 3: **
1.43 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.69 ! frystyk 6: **   @(#) $Id: HTML.c,v 1.68 1998年02月01日 19:04:14 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This generates of a hypertext object. It converts from the
 9: **   structured stream interface fro HTMl events into the style-
1.47 frystyk 10: **   oriented iunterface of the HText interface. This module is
1.2 timbl 11: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 12: **
1.6 timbl 13: **   Override this module if making a new GUI browser.
1.1 timbl 14: **
1.35 duns 15: ** HISTORY:
 16: **   8 Jul 94 FM  Insulate free() from _free structure element.
 17: **
1.1 timbl 18: */
1.16 timbl 19: 
1.41 frystyk 20: /* Library include files */
1.60 frystyk 21: #include "sysdep.h"
1.63 frystyk 22: #include "WWWUtil.h"
 23: #include "WWWCore.h"
 24: #include "WWWHTML.h"
1.1 timbl 25: #include "HText.h"
 26: #include "HTStyle.h"
1.41 frystyk 27: #include "HTML.h"
1.1 timbl 28: 
 29: extern HTStyleSheet * styleSheet;   /* Application-wide */
 30: 
 31: /*   Module-wide style cache
 32: */
 33: PRIVATE int      got_styles = 0;
1.16 timbl 34: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 35: PRIVATE HTStyle *default_style;
1.1 timbl 36: 
1.62 frystyk 37: #define TAB  '0円'
1.1 timbl 38: 
 39: /*       HTML Object
 40: **       -----------
 41: */
1.2 timbl 42: #define MAX_NESTING 20     /* Should be checked by parser */
 43: 
 44: typedef struct _stack_element {
 45:     HTStyle *   style;
 46:    int       tag_number;
 47: } stack_element;
 48: 
 49: struct _HTStructured {
1.60 frystyk 50:   const HTStructuredClass * isa;
1.54 frystyk 51:   HTRequest *            request;
1.2 timbl 52:   HTParentAnchor *      node_anchor;
 53:   HText *          text;
 54: 
 55:   HTStream*         target;         /* Output stream */
 56:   HTStreamClass       targetClass;      /* Output routines */
 57: 
1.56 frystyk 58:   HTChunk *         title;     /* Grow by 128 */
1.2 timbl 59:   
 60:   char *           comment_start; /* for literate programming */
 61:   char *           comment_end;
1.16 timbl 62:   
1.60 frystyk 63:   const SGML_dtd*      dtd;
1.16 timbl 64:   
1.2 timbl 65:   HTTag *          current_tag;
 66:   BOOL            style_change;
 67:   HTStyle *         new_style;
 68:   HTStyle *         old_style;
 69:   BOOL            in_word; /* Have just had a non-white char */
1.44 frystyk 70: 
 71:   stack_element       stack[MAX_NESTING];
 72:   stack_element       *sp;         /* Style stack pointer */
 73:   int                overflow; /* Keep track of overflow nesting */
1.1 timbl 74: };
 75: 
1.2 timbl 76: struct _HTStream {
1.60 frystyk 77:   const HTStreamClass *   isa;
1.2 timbl 78:   /* .... */
 79: };
1.1 timbl 80: 
 81: /*       Forward declarations of routines
 82: */
1.52 frystyk 83: PRIVATE void get_styles (void);
1.1 timbl 84: 
 85: 
1.52 frystyk 86: PRIVATE void actually_set_style (HTStructured * me);
 87: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 88: 
 89: /*   Style buffering avoids dummy paragraph begin/ends.
 90: */
1.4 timbl 91: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 92: 
1.2 timbl 93: /*   Entity values -- for ISO Latin 1 local representation
 94: **
 95: **   This MUST match exactly the table referred to in the DTD!
 96: */
 97: static char * ISO_Latin1[] = {
 98:    "306円", /* capital AE diphthong (ligature) */ 
 99:    "301円", /* capital A, acute accent */ 
 100:    "302円", /* capital A, circumflex accent */ 
 101:    "300円", /* capital A, grave accent */ 
 102:    "305円", /* capital A, ring */ 
 103:    "303円", /* capital A, tilde */ 
 104:    "304円", /* capital A, dieresis or umlaut mark */ 
 105:    "307円", /* capital C, cedilla */ 
 106:    "320円", /* capital Eth, Icelandic */ 
 107:    "311円", /* capital E, acute accent */ 
 108:    "312円", /* capital E, circumflex accent */ 
 109:    "310円", /* capital E, grave accent */ 
 110:    "313円", /* capital E, dieresis or umlaut mark */ 
 111:    "315円", /* capital I, acute accent */ 
 112:    "316円", /* capital I, circumflex accent */ 
 113:    "314円", /* capital I, grave accent */ 
 114:    "317円", /* capital I, dieresis or umlaut mark */ 
 115:    "321円", /* capital N, tilde */ 
 116:    "323円", /* capital O, acute accent */ 
 117:    "324円", /* capital O, circumflex accent */ 
 118:    "322円", /* capital O, grave accent */ 
 119:    "330円", /* capital O, slash */ 
 120:    "325円", /* capital O, tilde */ 
 121:    "326円", /* capital O, dieresis or umlaut mark */ 
 122:    "336円", /* capital THORN, Icelandic */ 
 123:    "332円", /* capital U, acute accent */ 
 124:    "333円", /* capital U, circumflex accent */ 
 125:    "331円", /* capital U, grave accent */ 
 126:    "334円", /* capital U, dieresis or umlaut mark */ 
 127:    "335円", /* capital Y, acute accent */ 
 128:    "341円", /* small a, acute accent */ 
 129:    "342円", /* small a, circumflex accent */ 
 130:    "346円", /* small ae diphthong (ligature) */ 
 131:    "340円", /* small a, grave accent */ 
 132:    "046円", /* ampersand */ 
 133:    "345円", /* small a, ring */ 
 134:    "343円", /* small a, tilde */ 
 135:    "344円", /* small a, dieresis or umlaut mark */ 
 136:    "347円", /* small c, cedilla */ 
 137:    "351円", /* small e, acute accent */ 
 138:    "352円", /* small e, circumflex accent */ 
 139:    "350円", /* small e, grave accent */ 
 140:    "360円", /* small eth, Icelandic */ 
 141:    "353円", /* small e, dieresis or umlaut mark */ 
 142:    "076円", /* greater than */ 
 143:    "355円", /* small i, acute accent */ 
 144:    "356円", /* small i, circumflex accent */ 
 145:    "354円", /* small i, grave accent */ 
 146:    "357円", /* small i, dieresis or umlaut mark */ 
 147:    "074円", /* less than */ 
1.62 frystyk 148:    "040円", /* non-breaking space */
1.2 timbl 149:    "361円", /* small n, tilde */ 
 150:    "363円", /* small o, acute accent */ 
 151:    "364円", /* small o, circumflex accent */ 
 152:    "362円", /* small o, grave accent */ 
 153:    "370円", /* small o, slash */ 
 154:    "365円", /* small o, tilde */ 
 155:    "366円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 156:     "042円", /* double quote sign - June 94 */
1.2 timbl 157:    "337円", /* small sharp s, German (sz ligature) */ 
 158:    "376円", /* small thorn, Icelandic */ 
 159:    "372円", /* small u, acute accent */ 
 160:    "373円", /* small u, circumflex accent */ 
 161:    "371円", /* small u, grave accent */ 
 162:    "374円", /* small u, dieresis or umlaut mark */ 
 163:    "375円", /* small y, acute accent */ 
 164:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 165: };
 166: 
1.2 timbl 167: 
 168: /*   Entity values -- for NeXT local representation
 169: **
 170: **   This MUST match exactly the table referred to in the DTD!
 171: **
 172: */
 173: static char * NeXTCharacters[] = {
 174:    "341円", /* capital AE diphthong (ligature)   */ 
 175:    "202円", /* capital A, acute accent       */ 
 176:    "203円", /* capital A, circumflex accent     */ 
 177:    "201円", /* capital A, grave accent       */ 
 178:    "206円", /* capital A, ring           */ 
 179:    "204円", /* capital A, tilde           */ 
 180:    "205円", /* capital A, dieresis or umlaut mark  */ 
 181:    "207円", /* capital C, cedilla          */ 
 182:    "220円", /* capital Eth, Icelandic        */ 
 183:    "211円", /* capital E, acute accent               */ 
 184:    "212円", /* capital E, circumflex accent             */ 
 185:    "210円", /* capital E, grave accent               */ 
 186:    "213円", /* capital E, dieresis or umlaut mark          */ 
 187:    "215円", /* capital I, acute accent               */ 
 188:    "216円", /* capital I, circumflex accent     these are    */ 
 189:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 190:    "217円", /* capital I, dieresis or umlaut mark          */ 
 191:    "221円", /* capital N, tilde                   */ 
 192:    "223円", /* capital O, acute accent               */ 
 193:    "224円", /* capital O, circumflex accent             */ 
 194:    "222円", /* capital O, grave accent               */ 
 195:    "351円", /* capital O, slash       'cept this */ 
 196:    "225円", /* capital O, tilde                   */ 
 197:    "226円", /* capital O, dieresis or umlaut mark          */ 
 198:    "234円", /* capital THORN, Icelandic */ 
 199:    "230円", /* capital U, acute accent */ 
 200:    "231円", /* capital U, circumflex accent */ 
 201:    "227円", /* capital U, grave accent */ 
 202:    "232円", /* capital U, dieresis or umlaut mark */ 
 203:    "233円", /* capital Y, acute accent */ 
 204:    "326円", /* small a, acute accent */ 
 205:    "327円", /* small a, circumflex accent */ 
 206:    "361円", /* small ae diphthong (ligature) */ 
 207:    "325円", /* small a, grave accent */ 
 208:    "046円", /* ampersand */ 
 209:    "332円", /* small a, ring */ 
 210:    "330円", /* small a, tilde */ 
 211:    "331円", /* small a, dieresis or umlaut mark */ 
 212:    "333円", /* small c, cedilla */ 
 213:    "335円", /* small e, acute accent */ 
 214:    "336円", /* small e, circumflex accent */ 
 215:    "334円", /* small e, grave accent */ 
 216:    "346円", /* small eth, Icelandic     */ 
 217:    "337円", /* small e, dieresis or umlaut mark */ 
 218:    "076円", /* greater than */ 
 219:    "342円", /* small i, acute accent */ 
 220:    "344円", /* small i, circumflex accent */ 
 221:    "340円", /* small i, grave accent */ 
 222:    "345円", /* small i, dieresis or umlaut mark */ 
 223:    "074円", /* less than */ 
1.62 frystyk 224:    "040円", /* non-breaking space */
1.2 timbl 225:    "347円", /* small n, tilde */ 
 226:    "355円", /* small o, acute accent */ 
 227:    "356円", /* small o, circumflex accent */ 
 228:    "354円", /* small o, grave accent */ 
 229:    "371円", /* small o, slash */ 
 230:    "357円", /* small o, tilde */ 
 231:    "360円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 232:     "042円", /* double quote sign - June 94 */
1.2 timbl 233:    "373円", /* small sharp s, German (sz ligature) */ 
 234:    "374円", /* small thorn, Icelandic */ 
 235:    "363円", /* small u, acute accent */ 
 236:    "364円", /* small u, circumflex accent */ 
 237:    "362円", /* small u, grave accent */ 
 238:    "366円", /* small u, dieresis or umlaut mark */ 
 239:    "367円", /* small y, acute accent */ 
 240:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 241: };
 242: 
1.2 timbl 243: /*   Entity values -- for IBM/PC Code Page 850 (International)
 244: **
 245: **   This MUST match exactly the table referred to in the DTD!
 246: **
 247: */
 248: /* @@@@@@@@@@@@@@@@@ TBD */
 249: 
 250: 
 251: 
 252: /*       Set character set
 253: **       ----------------
 254: */
 255: 
 256: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 257: 
1.53 frystyk 258: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 259: {
 260:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 261:                       : ISO_Latin1;
 262: }
1.1 timbl 263: 
 264: 
 265: /*       Flattening the style structure
 266: **       ------------------------------
 267: **
 268: On the NeXT, and on any read-only browser, it is simpler for the text to have
 269: a sequence of styles, rather than a nested tree of styles. In this
 270: case we have to flatten the structure as it arrives from SGML tags into
 271: a sequence of styles.
 272: */
 273: 
 274: /*       If style really needs to be set, call this
 275: */
1.53 frystyk 276: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 277: {
1.4 timbl 278:   if (!me->text) {          /* First time through */
1.54 frystyk 279:      me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 280:      HText_beginAppend(me->text);
 281:      HText_setStyle(me->text, me->new_style);
 282:      me->in_word = NO;
1.1 timbl 283:   } else {
1.4 timbl 284:      HText_setStyle(me->text, me->new_style);
1.1 timbl 285:   }
1.4 timbl 286:   me->old_style = me->new_style;
 287:   me->style_change = NO;
1.1 timbl 288: }
 289: 
 290: /*   If you THINK you need to change style, call this
 291: */
 292: 
1.53 frystyk 293: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 294: {
1.4 timbl 295:   if (me->new_style!=style) {
 296:    me->style_change = YES;
 297:    me->new_style = style;
1.1 timbl 298:   }
1.11 timbl 299:   me->in_word = NO;
1.1 timbl 300: }
 301: 
1.2 timbl 302: /*_________________________________________________________________________
 303: **
 304: **           A C T I O N   R O U T I N E S
 305: */
 306: 
1.64 frystyk 307: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 308: {
1.4 timbl 309:   switch (me->sp[0].tag_number) {
1.2 timbl 310:   case HTML_COMMENT:
 311:    break;                 /* Do Nothing */
 312:    
 313:   case HTML_TITLE:  
1.56 frystyk 314:    HTChunk_putb(me->title, &c, 1);
1.2 timbl 315:    break;
 316: 
 317:    
 318:   case HTML_LISTING:             /* Litteral text */
 319:   case HTML_XMP:
 320:   case HTML_PLAINTEXT:
 321:   case HTML_PRE:
 322: /*   We guarrantee that the style is up-to-date in begin_litteral
 323: */
1.4 timbl 324:    HText_appendCharacter(me->text, c);
1.2 timbl 325:    break;
 326:    
 327:   default:                  /* Free format text */
1.4 timbl 328:    if (me->style_change) {
1.42 frystyk 329:      if ((c=='\n') || (c==' ')) return HT_OK;  /* Ignore it */
1.2 timbl 330:      UPDATE_STYLE;
 331:    }
1.62 frystyk 332:    if (c == TAB)
 333:      HText_appendCharacter(me->text, '\t');
1.68 frystyk 334:    else if (isspace((int) c)) {
1.4 timbl 335:      if (me->in_word) {
 336:        HText_appendCharacter(me->text, ' ');
 337:        me->in_word = NO;
1.2 timbl 338:      }
 339:    } else {
1.4 timbl 340:      HText_appendCharacter(me->text, c);
 341:      me->in_word = YES;
1.2 timbl 342:    }
 343:   } /* end switch */
1.42 frystyk 344:   return HT_OK;
1.1 timbl 345: }
 346: 
1.64 frystyk 347: 
 348: PRIVATE int HTML_write (HTStructured * me, const char * b, int l)
1.1 timbl 349: {
1.64 frystyk 350:   while (l-- > 0) HTML_put_character(me, *b++);
 351:   return HT_OK;
1.1 timbl 352: }
 353: 
1.64 frystyk 354: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 355: {
1.64 frystyk 356:   while (*s) HTML_put_character(me, *s++);
 357:   return HT_OK;
1.1 timbl 358: }
1.2 timbl 359: 
 360: /*   Start Element
 361: **   -------------
 362: */
1.53 frystyk 363: PRIVATE void HTML_start_element (
 364:    HTStructured * me,
 365:    int           element_number,
1.60 frystyk 366:    const BOOL*       present,
 367:    const char **      value)
1.2 timbl 368: {
 369:   switch (element_number) {
 370:   case HTML_A:
1.69 ! frystyk 371:   {
 ! 372:    HTChildAnchor * source = HTAnchor_findChildAndLink(
 ! 373:      me->node_anchor,                  /* parent */
 ! 374:      present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL,  /* Tag */
 ! 375:      present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL,  /* Addresss */
 ! 376:      present[HTML_A_REL] && value[HTML_A_REL] ? 
 ! 377:      (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL);
1.2 timbl 378:      
1.69 ! frystyk 379:    if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 ! 380:      HTLink * link = HTAnchor_mainLink((HTAnchor *) source);
 ! 381:      HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link));
 ! 382:      if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
1.2 timbl 383:    }
1.69 ! frystyk 384:    UPDATE_STYLE;
 ! 385:    HText_beginAnchor(me->text, source);
 ! 386:   }
 ! 387:   break;
1.2 timbl 388:    
1.63 frystyk 389:   case HTML_LINK:
1.69 ! frystyk 390:   {
 ! 391:    if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) {
 ! 392:      char * relative_to = HTAnchor_expandedAddress((HTAnchor *) me->node_anchor);
 ! 393:      char * dest_addr = HTParse(value[HTML_LINK_HREF], relative_to, PARSE_ALL);
 ! 394:      HTParentAnchor * dest = HTAnchor_parent(HTAnchor_findAddress(dest_addr));
 ! 395: 
 ! 396:      /* If forward reference */
 ! 397:      if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) {
 ! 398:        char * strval = NULL;
 ! 399:        char * ptr = NULL;
 ! 400:        char * relation = NULL;
 ! 401:        StrAllocCopy(strval, value[HTML_LINK_REL]);
 ! 402:        ptr = strval;
 ! 403:        while ((relation = HTNextLWSToken(&ptr)) != NULL) {
 ! 404:          HTLink_add((HTAnchor *) me->node_anchor, (HTAnchor *) dest,
 ! 405:                (HTLinkType) HTAtom_caseFor(relation),
 ! 406:                METHOD_INVALID);
 ! 407:        }
 ! 408:        HT_FREE(strval);
 ! 409:      }
 ! 410: 
 ! 411:      /* If reverse reference */
 ! 412:      if ((present[HTML_LINK_REV] && value[HTML_LINK_REV])) {
 ! 413:        char * strval = NULL;
 ! 414:        char * ptr = NULL;
 ! 415:        char * relation = NULL;
 ! 416:        StrAllocCopy(strval, value[HTML_LINK_REV]);
 ! 417:        ptr = strval;
 ! 418:        while ((relation = HTNextLWSToken(&ptr)) != NULL) {
 ! 419:          HTLink_add((HTAnchor *) dest, (HTAnchor *) me->node_anchor,
 ! 420:                (HTLinkType) HTAtom_caseFor(relation),
 ! 421:                METHOD_INVALID);
 ! 422:        }
 ! 423:        HT_FREE(strval);
 ! 424:      }
1.63 frystyk 425: 
1.69 ! frystyk 426:      /* If we got any type information as well */
 ! 427:      if (present[HTML_LINK_TYPE] && value[HTML_LINK_TYPE]) {
 ! 428:        if (HTAnchor_format(dest) == WWW_UNKNOWN)
 ! 429:          HTAnchor_setFormat(dest,
 ! 430:                    (HTFormat) HTAtom_caseFor(value[HTML_LINK_TYPE]));
 ! 431:      }
1.63 frystyk 432: 
1.69 ! frystyk 433:      HT_FREE(dest_addr);
 ! 434:      HT_FREE(relative_to);
 ! 435:    }
 ! 436:   }
 ! 437:   break;
1.63 frystyk 438: 
1.2 timbl 439:   case HTML_TITLE:
1.56 frystyk 440:     HTChunk_clear(me->title);
1.2 timbl 441:    break;
 442:    
 443:   case HTML_NEXTID:
 444:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 445:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 446:    break;
 447:    
 448:   case HTML_ISINDEX:
1.4 timbl 449:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 450:    break;
 451:    
1.15 timbl 452:   case HTML_BR: 
 453:    UPDATE_STYLE;
 454:    HText_appendCharacter(me->text, '\n');
 455:    me->in_word = NO;
 456:    break;
 457:    
 458:   case HTML_HR: 
 459:    UPDATE_STYLE;
 460:    HText_appendCharacter(me->text, '\n');
1.16 timbl 461:    HText_appendText(me->text, "___________________________________");
1.15 timbl 462:    HText_appendCharacter(me->text, '\n');
 463:    me->in_word = NO;
 464:    break;
 465:    
1.2 timbl 466:   case HTML_P:
 467:    UPDATE_STYLE;
1.4 timbl 468:    HText_appendParagraph(me->text);
 469:    me->in_word = NO;
1.2 timbl 470:    break;
 471: 
 472:   case HTML_DL:
1.11 timbl 473:     change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 474:        ? styles[HTML_DL]
1.2 timbl 475:        : styles[HTML_DL]);
 476:    break;
 477:    
 478:   case HTML_DT:
1.4 timbl 479:     if (!me->style_change) {
 480:      HText_appendParagraph(me->text);
 481:      me->in_word = NO;
1.2 timbl 482:    }
 483:    break;
 484:    
 485:   case HTML_DD:
 486:     UPDATE_STYLE;
1.62 frystyk 487:    HTML_put_character(me, TAB);  /* Just tab out one stop */
1.4 timbl 488:    me->in_word = NO;
 489:    break;
1.2 timbl 490: 
 491:   case HTML_UL:
 492:   case HTML_OL:
 493:   case HTML_MENU:
 494:   case HTML_DIR:
1.11 timbl 495:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 496:    break;
 497:    
 498:   case HTML_LI:
 499:     UPDATE_STYLE;
1.7 timbl 500:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 501:      HText_appendParagraph(me->text);
1.2 timbl 502:    else
1.62 frystyk 503:      HText_appendCharacter(me->text, TAB);
1.4 timbl 504:    me->in_word = NO;
1.2 timbl 505:    break;
 506:    
 507:   case HTML_LISTING:             /* Litteral text */
 508:   case HTML_XMP:
 509:   case HTML_PLAINTEXT:
 510:   case HTML_PRE:
1.11 timbl 511:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 512:    UPDATE_STYLE;
1.4 timbl 513:    if (me->comment_end)
 514:      HText_appendText(me->text, me->comment_end);
1.2 timbl 515:    break;
1.11 timbl 516: 
1.23 frystyk 517:   case HTML_IMG:           /* Images */
 518:    {
 519:      HTChildAnchor *source;
 520:      char *src = NULL;
1.49 frystyk 521:      if (present[HTML_IMG_SRC])
1.23 frystyk 522:        StrAllocCopy(src, value[HTML_IMG_SRC]);
 523:      source = HTAnchor_findChildAndLink(
 524:                        me->node_anchor,  /* parent */
 525:                        0,           /* Tag */
 526:                        src ? src : 0,  /* Addresss */
 527:                        0);
 528:      UPDATE_STYLE;
 529:      HText_appendImage(me->text, source,
1.24 frystyk 530:           present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
 531:           present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
 532:           present[HTML_IMG_ISMAP] ? YES : NO);
1.58 frystyk 533:      HT_FREE(src);
1.24 frystyk 534:    }    
 535:    break;
 536: 
1.63 frystyk 537:   case HTML_BASE:          /* Base header */
 538:    if (present[HTML_BASE_HREF]) {
 539:     char * base = (char *) value[HTML_BASE_HREF];
 540:     if (base) {
 541:       HTAnchor_setBase(me->node_anchor, base);
 542:       if (SGML_TRACE) HTTrace("HTML Parser. New base `%s\'\n", base);
 543:     } else {
 544:       if (SGML_TRACE) HTTrace("HTML Parser. No base found\n");
 545:     }
 546:    }
 547:    break;
 548: 
1.24 frystyk 549:   case HTML_HTML:          /* Ignore these altogether */
 550:   case HTML_HEAD:
 551:   case HTML_BODY:
1.62 frystyk 552:    break;
1.24 frystyk 553:   
1.10 timbl 554:   case HTML_TT:           /* Physical character highlighting */
 555:   case HTML_B:            /* Currently ignored */
 556:   case HTML_I:
 557:   case HTML_U:
1.62 frystyk 558:    UPDATE_STYLE;
1.65 frystyk 559: #if 0
1.62 frystyk 560:    HText_appendCharacter(me->text, '_');
1.65 frystyk 561: #endif
1.62 frystyk 562:    me->in_word = NO;
 563:    break;
1.10 timbl 564:   
 565:   case HTML_EM:           /* Logical character highlighting */
 566:   case HTML_STRONG:         /* Currently ignored */
 567:   case HTML_CODE:
 568:   case HTML_SAMP:
 569:   case HTML_KBD:
 570:   case HTML_VAR:
 571:   case HTML_DFN:
 572:   case HTML_CITE:
 573:    break;
 574:    
1.11 timbl 575:   case HTML_H1:           /* paragraph styles */
 576:   case HTML_H2:
 577:   case HTML_H3:
 578:   case HTML_H4:
 579:   case HTML_H5:
 580:   case HTML_H6:
 581:   case HTML_H7:
 582:   case HTML_ADDRESS:
 583:   case HTML_BLOCKQUOTE:
 584:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 585:    break;
 586: 
 587:   } /* end switch */
 588: 
1.16 timbl 589:   if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 590:     if (me->sp == me->stack) {
1.44 frystyk 591:      if (SGML_TRACE)
1.63 frystyk 592:        HTTrace("HTML Parser. Maximum nesting of %d exceded!\n",
1.44 frystyk 593:            MAX_NESTING); 
 594:      me->overflow++;
1.12 timbl 595:      return;
 596:    }
1.4 timbl 597:    --(me->sp);
 598:    me->sp[0].style = me->new_style;    /* Stack new style */
 599:    me->sp[0].tag_number = element_number;
1.10 timbl 600:   } 
1.1 timbl 601: }
1.10 timbl 602: 
1.2 timbl 603: 
1.1 timbl 604: /*       End Element
1.2 timbl 605: **       -----------
1.1 timbl 606: **
1.2 timbl 607: */
 608: /*   When we end an element, the style must be returned to that
1.1 timbl 609: **   in effect before that element. Note that anchors (etc?)
 610: **   don't have an associated style, so that we must scan down the
 611: **   stack for an element with a defined style. (In fact, the styles
 612: **   should be linked to the whole stack not just the top one.)
 613: **   TBL 921119
1.6 timbl 614: **
 615: **   We don't turn on "CAREFUL" check because the parser produces
 616: **   (internal code errors apart) good nesting. The parser checks
 617: **   incoming code errors, not this module.
1.1 timbl 618: */
1.53 frystyk 619: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 620: {
1.2 timbl 621: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 622:   if (element_number != me->sp[0].tag_number) {
1.59 eric 623:     HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 624:        me->dtd->tags[element_number].name,
 625:        me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 626:        /* panic */
1.1 timbl 627:   }
1.2 timbl 628: #endif
1.44 frystyk 629: 
 630:   /* HFN, If overflow of nestings, we need to get back to reality */
 631:   if (me->overflow > 0) {
 632:    me->overflow--;
 633:    return;
 634:   }
 635: 
1.4 timbl 636:   me->sp++;             /* Pop state off stack */
1.67 frystyk 637:   if (me->sp > me->stack + MAX_NESTING - 1) {
 638:    if (SGML_TRACE) HTTrace("HTML Parser. Bottom of style stack reached\n");
 639:    me->sp = me->stack + MAX_NESTING - 1;
 640:   }
1.44 frystyk 641: 
1.2 timbl 642:   switch(element_number) {
 643: 
 644:   case HTML_A:
 645:    UPDATE_STYLE;
1.4 timbl 646:    HText_endAnchor(me->text);
1.2 timbl 647:    break;
 648: 
 649:   case HTML_TITLE:
1.56 frystyk 650:    HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 651:    break;
 652:    
1.62 frystyk 653:   case HTML_TT:           /* Physical character highlighting */
 654:   case HTML_B:            /* Currently ignored */
 655:   case HTML_I:
 656:   case HTML_U:
 657:    UPDATE_STYLE;
1.66 frystyk 658: #if 0
1.62 frystyk 659:    HText_appendCharacter(me->text, '_');
1.66 frystyk 660: #endif
1.62 frystyk 661:    break;
1.66 frystyk 662: 
 663:   case HTML_EM:           /* Logical character highlighting */
 664:   case HTML_STRONG:         /* Currently ignored */
 665:   case HTML_CODE:
 666:   case HTML_SAMP:
 667:   case HTML_KBD:
 668:   case HTML_VAR:
 669:   case HTML_DFN:
 670:   case HTML_CITE:
 671:    break;
1.62 frystyk 672:   
1.2 timbl 673:   case HTML_LISTING:             /* Litteral text */
 674:   case HTML_XMP:
 675:   case HTML_PLAINTEXT:
 676:   case HTML_PRE:
1.4 timbl 677:    if (me->comment_start)
 678:      HText_appendText(me->text, me->comment_start);
1.2 timbl 679:    /* Fall through */
 680:    
 681:   default:
1.44 frystyk 682: 
 683:    /* Often won't really change */
 684:    change_paragraph_style(me, me->sp->style);
1.2 timbl 685:    break;
 686:    
 687:   } /* switch */
1.1 timbl 688: }
 689: 
1.2 timbl 690: 
 691: /*       Expanding entities
 692: **       ------------------
 693: */
 694: /*   (In fact, they all shrink!)
1.1 timbl 695: */
1.2 timbl 696: 
1.53 frystyk 697: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 698: {
1.4 timbl 699:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 700: }
1.2 timbl 701: 
1.42 frystyk 702: /*   Flush an HTML object
 703: **   --------------------
 704: */
1.53 frystyk 705: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 706: {
 707:   UPDATE_STYLE;              /* Creates empty document here! */
1.57 frystyk 708:   if (me->comment_end) HTML_put_string(me,me->comment_end);
 709:   return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 710: }
1.2 timbl 711: 
 712: /*   Free an HTML object
 713: **   -------------------
 714: **
1.4 timbl 715: ** If the document is empty, the text object will not yet exist.
 716:  So we could in fact abandon creating the document and return
 717:  an error code. In fact an empty document is an important type
 718:  of document, so we don't.
 719: **
 720: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 721: **   Otherwise, the interactive object is left.   
 722: */
1.53 frystyk 723: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 724: {
1.4 timbl 725:   UPDATE_STYLE;       /* Creates empty document here! */
 726:   if (me->comment_end)
 727:        HTML_put_string(me,me->comment_end);
 728:   HText_endAppend(me->text);
 729: 
 730:   if (me->target) {
1.35 duns 731:     (*me->targetClass._free)(me->target);
1.2 timbl 732:   }
1.56 frystyk 733:   HTChunk_delete(me->title);
1.58 frystyk 734:   HT_FREE(me);
1.42 frystyk 735:   return HT_OK;
1.1 timbl 736: }
 737: 
 738: 
1.53 frystyk 739: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 740: 
1.14 timbl 741: {
 742:   if (me->target) {
 743:     (*me->targetClass.abort)(me->target, e);
 744:   }
1.56 frystyk 745:   HTChunk_delete(me->title);
1.58 frystyk 746:   HT_FREE(me);
1.42 frystyk 747:   return HT_ERROR;
1.1 timbl 748: }
 749: 
1.2 timbl 750: 
 751: /*   Get Styles from style sheet
 752: **   ---------------------------
 753: */
1.53 frystyk 754: PRIVATE void get_styles (void)
1.1 timbl 755: {
1.2 timbl 756:   got_styles = YES;
 757:   
 758:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 759: 
1.2 timbl 760:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 761:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 762:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 763:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 764:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 765:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 766:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 767: 
 768:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 769:   styles[HTML_UL] =
 770:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 771:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 772:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
1.16 timbl 773: /* styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 774:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 775:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 776:   styles[HTML_PLAINTEXT] =
 777:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 778:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 779:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 780: }
 781: /*               P U B L I C
 782: */
 783: 
 784: /*   Structured Object Class
 785: **   -----------------------
 786: */
1.60 frystyk 787: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 788: {       
 789:    "text/html",
1.42 frystyk 790:    HTML_flush,
1.2 timbl 791:    HTML_free,
1.14 timbl 792:    HTML_abort,
1.2 timbl 793:    HTML_put_character,   HTML_put_string, HTML_write,
 794:    HTML_start_element,   HTML_end_element,
 795:    HTML_put_entity
 796: }; 
1.1 timbl 797: 
1.4 timbl 798: 
1.2 timbl 799: /*       New Structured Text object
 800: **       --------------------------
 801: **
1.16 timbl 802: **   The structured stream can generate either presentation,
1.4 timbl 803: **   or plain text, or HTML.
1.1 timbl 804: */
1.53 frystyk 805: PRIVATE HTStructured* HTML_new (HTRequest *  request,
 806:                   void *       param,
 807:                   HTFormat      input_format,
 808:                   HTFormat      output_format,
 809:                   HTStream * output_stream)
1.1 timbl 810: {
 811: 
1.4 timbl 812:   HTStructured * me;
 813:   
1.47 frystyk 814: #if 0
1.16 timbl 815:   if (output_format != WWW_PLAINTEXT
 816:    && output_format != WWW_PRESENT
 817:    && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 818:     HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
 819:                        output_stream, request, NO);
1.6 timbl 820:    if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 821:    if (SGML_TRACE)
1.63 frystyk 822:      HTTrace("HTML Parser. Can't parse HTML to %s\n",
1.44 frystyk 823:          HTAtom_name(output_format));
1.4 timbl 824:    exit (-99);
 825:   }
1.47 frystyk 826: #endif
1.4 timbl 827: 
1.58 frystyk 828:   if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
 829:     HT_OUTOFMEM("HTML_new");
1.1 timbl 830: 
 831:   if (!got_styles) get_styles();
 832: 
1.4 timbl 833:   me->isa = &HTMLPresentation;
1.47 frystyk 834:   me->dtd = &HTMLP_dtd;
1.54 frystyk 835:   me->request = request;
1.48 frystyk 836:   me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 837:   me->title = HTChunk_new(128);
1.4 timbl 838:   me->text = 0;
 839:   me->style_change = YES; /* Force check leading to text creation */
 840:   me->new_style = default_style;
 841:   me->old_style = 0;
 842:   me->sp = me->stack + MAX_NESTING - 1;
 843:   me->sp->tag_number = -1;              /* INVALID */
 844:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 845:   
1.4 timbl 846:   me->comment_start = NULL;
 847:   me->comment_end = NULL;
1.16 timbl 848:   me->target = output_stream;
 849:   if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 850:   
1.4 timbl 851:   return (HTStructured*) me;
1.1 timbl 852: }
 853: 
 854: 
1.2 timbl 855: /*   HTConverter for HTML to plain text
 856: **   ----------------------------------
1.1 timbl 857: **
1.2 timbl 858: **   This will convert from HTML to presentation or plain text.
1.1 timbl 859: */
1.53 frystyk 860: PUBLIC HTStream* HTMLToPlain (
 861:    HTRequest *       request,
 862:    void *         param,
 863:    HTFormat        input_format,
 864:    HTFormat        output_format,
 865:    HTStream *       output_stream)
1.1 timbl 866: {
1.47 frystyk 867:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 868:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 869: }
 870: 
 871: 
1.2 timbl 872: /*   HTConverter for HTML to C code
 873: **   ------------------------------
 874: **
1.36 frystyk 875: **   C code is like plain text but all non-preformatted code
1.2 timbl 876: **   is commented out.
 877: **   This will convert from HTML to presentation or plain text.
 878: */
1.53 frystyk 879: PUBLIC HTStream* HTMLToC (
 880:    HTRequest *       request,
 881:    void *         param,
 882:    HTFormat        input_format,
 883:    HTFormat        output_format,
 884:    HTStream *       output_stream)
1.1 timbl 885: {
1.4 timbl 886:   
 887:   HTStructured * html;
 888:   
1.36 frystyk 889:   (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 890:   html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 891:   html->comment_start = "\n/* ";
1.47 frystyk 892:   html->dtd = &HTMLP_dtd;
1.2 timbl 893:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.47 frystyk 894:   return SGML_new(&HTMLP_dtd, html);
1.1 timbl 895: }
 896: 
 897: 
1.2 timbl 898: /*   Presenter for HTML
 899: **   ------------------
 900: **
 901: **   This will convert from HTML to presentation or plain text.
 902: **
 903: **   Override this if you have a windows version
1.1 timbl 904: */
1.2 timbl 905: #ifndef GUI
1.53 frystyk 906: PUBLIC HTStream* HTMLPresent (
 907:    HTRequest *       request,
 908:    void *         param,
 909:    HTFormat        input_format,
 910:    HTFormat        output_format,
 911:    HTStream *       output_stream)
1.1 timbl 912: {
1.47 frystyk 913:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 914:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 915: }
1.2 timbl 916: #endif
1.29 frystyk 917: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /