[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.12

1.2 timbl 1: /*       Structured stream to Rich hypertext converter
 2: **       ============================================
1.1 timbl 3: **
1.2 timbl 4: **   This generates of a hypertext object. It converts from the
 5: **   structured stream interface fro HTMl events into the style-
 6: **   oriented iunterface of the HText.h interface. This module is
 7: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: **   Override this module if making a new GUI browser.
1.1 timbl 10: **
 11: */
 12: #include "HTML.h"
 13: 
1.6 timbl 14: /* #define CAREFUL       Check nesting here notreally necessary */
1.2 timbl 15: 
1.1 timbl 16: #include <ctype.h>
 17: #include <stdio.h>
 18: 
 19: #include "HTAtom.h"
 20: #include "HTChunk.h"
 21: #include "HText.h"
 22: #include "HTStyle.h"
 23: 
1.3 timbl 24: #include "HTAlert.h"
1.4 timbl 25: #include "HTMLGen.h"
1.8 timbl 26: #include "HTParse.h"
1.1 timbl 27: 
 28: extern HTStyleSheet * styleSheet;   /* Application-wide */
 29: 
 30: /*   Module-wide style cache
 31: */
 32: PRIVATE int      got_styles = 0;
1.2 timbl 33: PRIVATE HTStyle *styles[HTML_ELEMENTS];
 34: PRIVATE HTStyle *default_style;
1.1 timbl 35: 
 36: 
 37: /*       HTML Object
 38: **       -----------
 39: */
1.2 timbl 40: #define MAX_NESTING 20     /* Should be checked by parser */
 41: 
 42: typedef struct _stack_element {
 43:     HTStyle *   style;
 44:    int       tag_number;
 45: } stack_element;
 46: 
 47: struct _HTStructured {
 48:   CONST HTStructuredClass * isa;
 49:   HTParentAnchor *      node_anchor;
 50:   HText *          text;
 51: 
 52:   HTStream*         target;         /* Output stream */
 53:   HTStreamClass       targetClass;      /* Output routines */
 54: 
 55:   HTChunk          title;     /* Grow by 128 */
 56:   
 57:   char *           comment_start; /* for literate programming */
 58:   char *           comment_end;
 59: 
 60:   HTTag *          current_tag;
 61:   BOOL            style_change;
 62:   HTStyle *         new_style;
 63:   HTStyle *         old_style;
 64:   BOOL            in_word; /* Have just had a non-white char */
 65:   stack_element   stack[MAX_NESTING];
 66:   stack_element   *sp;      /* Style stack pointer */
1.1 timbl 67: };
 68: 
1.2 timbl 69: struct _HTStream {
 70:   CONST HTStreamClass *   isa;
 71:   /* .... */
 72: };
1.1 timbl 73: 
 74: /*       Forward declarations of routines
 75: */
 76: PRIVATE void get_styles NOPARAMS;
 77: 
 78: 
1.4 timbl 79: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 80: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 81: 
 82: /*   Style buffering avoids dummy paragraph begin/ends.
 83: */
1.4 timbl 84: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 85: 
 86: 
1.2 timbl 87: #ifdef OLD_CODE
1.1 timbl 88: /* The following accented characters are from peter Flynn, curia project */
 89: 
 90: /* these ifdefs don't solve the problem of a simple terminal emulator
 91: ** with a different character set to the client machine. But nothing does,
 92: ** except looking at the TERM setting */
 93: 
1.2 timbl 94: 
1.1 timbl 95:     { "ocus" , "&" },    /* for CURIA */
 96: #ifdef IBMPC
 97:     { "aacute" , "240円" }, /* For PC display */
 98:     { "eacute" , "202円" },
 99:     { "iacute" , "241円" },
 100:     { "oacute" , "242円" },
 101:     { "uacute" , "243円" },
 102:     { "Aacute" , "101円" },
 103:     { "Eacute" , "220円" },
 104:     { "Iacute" , "111円" },
 105:     { "Oacute" , "117円" },
 106:     { "Uacute" , "125円" },
 107: #else
 108:     { "aacute" , "341円" }, /* Works for openwindows -- Peter Flynn */
 109:     { "eacute" , "351円" },
 110:     { "iacute" , "355円" },
 111:     { "oacute" , "363円" },
 112:     { "uacute" , "372円" },
 113:     { "Aacute" , "301円" },
 114:     { "Eacute" , "310円" },
 115:     { "Iacute" , "315円" },
 116:     { "Oacute" , "323円" },
 117:     { "Uacute" , "332円" }, 
 118: #endif
 119:    { 0,  0 } /* Terminate list */
 120: };
1.2 timbl 121: #endif
1.1 timbl 122: 
 123: 
1.2 timbl 124: /*   Entity values -- for ISO Latin 1 local representation
 125: **
 126: **   This MUST match exactly the table referred to in the DTD!
 127: */
 128: static char * ISO_Latin1[] = {
 129:    "306円", /* capital AE diphthong (ligature) */ 
 130:    "301円", /* capital A, acute accent */ 
 131:    "302円", /* capital A, circumflex accent */ 
 132:    "300円", /* capital A, grave accent */ 
 133:    "305円", /* capital A, ring */ 
 134:    "303円", /* capital A, tilde */ 
 135:    "304円", /* capital A, dieresis or umlaut mark */ 
 136:    "307円", /* capital C, cedilla */ 
 137:    "320円", /* capital Eth, Icelandic */ 
 138:    "311円", /* capital E, acute accent */ 
 139:    "312円", /* capital E, circumflex accent */ 
 140:    "310円", /* capital E, grave accent */ 
 141:    "313円", /* capital E, dieresis or umlaut mark */ 
 142:    "315円", /* capital I, acute accent */ 
 143:    "316円", /* capital I, circumflex accent */ 
 144:    "314円", /* capital I, grave accent */ 
 145:    "317円", /* capital I, dieresis or umlaut mark */ 
 146:    "321円", /* capital N, tilde */ 
 147:    "323円", /* capital O, acute accent */ 
 148:    "324円", /* capital O, circumflex accent */ 
 149:    "322円", /* capital O, grave accent */ 
 150:    "330円", /* capital O, slash */ 
 151:    "325円", /* capital O, tilde */ 
 152:    "326円", /* capital O, dieresis or umlaut mark */ 
 153:    "336円", /* capital THORN, Icelandic */ 
 154:    "332円", /* capital U, acute accent */ 
 155:    "333円", /* capital U, circumflex accent */ 
 156:    "331円", /* capital U, grave accent */ 
 157:    "334円", /* capital U, dieresis or umlaut mark */ 
 158:    "335円", /* capital Y, acute accent */ 
 159:    "341円", /* small a, acute accent */ 
 160:    "342円", /* small a, circumflex accent */ 
 161:    "346円", /* small ae diphthong (ligature) */ 
 162:    "340円", /* small a, grave accent */ 
 163:    "046円", /* ampersand */ 
 164:    "345円", /* small a, ring */ 
 165:    "343円", /* small a, tilde */ 
 166:    "344円", /* small a, dieresis or umlaut mark */ 
 167:    "347円", /* small c, cedilla */ 
 168:    "351円", /* small e, acute accent */ 
 169:    "352円", /* small e, circumflex accent */ 
 170:    "350円", /* small e, grave accent */ 
 171:    "360円", /* small eth, Icelandic */ 
 172:    "353円", /* small e, dieresis or umlaut mark */ 
 173:    "076円", /* greater than */ 
 174:    "355円", /* small i, acute accent */ 
 175:    "356円", /* small i, circumflex accent */ 
 176:    "354円", /* small i, grave accent */ 
 177:    "357円", /* small i, dieresis or umlaut mark */ 
 178:    "074円", /* less than */ 
 179:    "361円", /* small n, tilde */ 
 180:    "363円", /* small o, acute accent */ 
 181:    "364円", /* small o, circumflex accent */ 
 182:    "362円", /* small o, grave accent */ 
 183:    "370円", /* small o, slash */ 
 184:    "365円", /* small o, tilde */ 
 185:    "366円", /* small o, dieresis or umlaut mark */ 
 186:    "337円", /* small sharp s, German (sz ligature) */ 
 187:    "376円", /* small thorn, Icelandic */ 
 188:    "372円", /* small u, acute accent */ 
 189:    "373円", /* small u, circumflex accent */ 
 190:    "371円", /* small u, grave accent */ 
 191:    "374円", /* small u, dieresis or umlaut mark */ 
 192:    "375円", /* small y, acute accent */ 
 193:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 194: };
 195: 
1.2 timbl 196: 
 197: /*   Entity values -- for NeXT local representation
 198: **
 199: **   This MUST match exactly the table referred to in the DTD!
 200: **
 201: */
 202: static char * NeXTCharacters[] = {
 203:    "341円", /* capital AE diphthong (ligature)   */ 
 204:    "202円", /* capital A, acute accent       */ 
 205:    "203円", /* capital A, circumflex accent     */ 
 206:    "201円", /* capital A, grave accent       */ 
 207:    "206円", /* capital A, ring           */ 
 208:    "204円", /* capital A, tilde           */ 
 209:    "205円", /* capital A, dieresis or umlaut mark  */ 
 210:    "207円", /* capital C, cedilla          */ 
 211:    "220円", /* capital Eth, Icelandic        */ 
 212:    "211円", /* capital E, acute accent               */ 
 213:    "212円", /* capital E, circumflex accent             */ 
 214:    "210円", /* capital E, grave accent               */ 
 215:    "213円", /* capital E, dieresis or umlaut mark          */ 
 216:    "215円", /* capital I, acute accent               */ 
 217:    "216円", /* capital I, circumflex accent     these are    */ 
 218:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 219:    "217円", /* capital I, dieresis or umlaut mark          */ 
 220:    "221円", /* capital N, tilde                   */ 
 221:    "223円", /* capital O, acute accent               */ 
 222:    "224円", /* capital O, circumflex accent             */ 
 223:    "222円", /* capital O, grave accent               */ 
 224:    "351円", /* capital O, slash       'cept this */ 
 225:    "225円", /* capital O, tilde                   */ 
 226:    "226円", /* capital O, dieresis or umlaut mark          */ 
 227:    "234円", /* capital THORN, Icelandic */ 
 228:    "230円", /* capital U, acute accent */ 
 229:    "231円", /* capital U, circumflex accent */ 
 230:    "227円", /* capital U, grave accent */ 
 231:    "232円", /* capital U, dieresis or umlaut mark */ 
 232:    "233円", /* capital Y, acute accent */ 
 233:    "326円", /* small a, acute accent */ 
 234:    "327円", /* small a, circumflex accent */ 
 235:    "361円", /* small ae diphthong (ligature) */ 
 236:    "325円", /* small a, grave accent */ 
 237:    "046円", /* ampersand */ 
 238:    "332円", /* small a, ring */ 
 239:    "330円", /* small a, tilde */ 
 240:    "331円", /* small a, dieresis or umlaut mark */ 
 241:    "333円", /* small c, cedilla */ 
 242:    "335円", /* small e, acute accent */ 
 243:    "336円", /* small e, circumflex accent */ 
 244:    "334円", /* small e, grave accent */ 
 245:    "346円", /* small eth, Icelandic     */ 
 246:    "337円", /* small e, dieresis or umlaut mark */ 
 247:    "076円", /* greater than */ 
 248:    "342円", /* small i, acute accent */ 
 249:    "344円", /* small i, circumflex accent */ 
 250:    "340円", /* small i, grave accent */ 
 251:    "345円", /* small i, dieresis or umlaut mark */ 
 252:    "074円", /* less than */ 
 253:    "347円", /* small n, tilde */ 
 254:    "355円", /* small o, acute accent */ 
 255:    "356円", /* small o, circumflex accent */ 
 256:    "354円", /* small o, grave accent */ 
 257:    "371円", /* small o, slash */ 
 258:    "357円", /* small o, tilde */ 
 259:    "360円", /* small o, dieresis or umlaut mark */ 
 260:    "373円", /* small sharp s, German (sz ligature) */ 
 261:    "374円", /* small thorn, Icelandic */ 
 262:    "363円", /* small u, acute accent */ 
 263:    "364円", /* small u, circumflex accent */ 
 264:    "362円", /* small u, grave accent */ 
 265:    "366円", /* small u, dieresis or umlaut mark */ 
 266:    "367円", /* small y, acute accent */ 
 267:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 268: };
 269: 
1.2 timbl 270: /*   Entity values -- for IBM/PC Code Page 850 (International)
 271: **
 272: **   This MUST match exactly the table referred to in the DTD!
 273: **
 274: */
 275: /* @@@@@@@@@@@@@@@@@ TBD */
 276: 
 277: 
 278: 
 279: /*       Set character set
 280: **       ----------------
 281: */
 282: 
 283: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 284: 
1.2 timbl 285: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
 286: {
 287:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 288:                       : ISO_Latin1;
 289: }
1.1 timbl 290: 
 291: 
 292: /*       Flattening the style structure
 293: **       ------------------------------
 294: **
 295: On the NeXT, and on any read-only browser, it is simpler for the text to have
 296: a sequence of styles, rather than a nested tree of styles. In this
 297: case we have to flatten the structure as it arrives from SGML tags into
 298: a sequence of styles.
 299: */
 300: 
 301: /*       If style really needs to be set, call this
 302: */
1.4 timbl 303: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 304: {
1.4 timbl 305:   if (!me->text) {          /* First time through */
 306:      me->text = HText_new2(me->node_anchor, me->target);
 307:      HText_beginAppend(me->text);
 308:      HText_setStyle(me->text, me->new_style);
 309:      me->in_word = NO;
1.1 timbl 310:   } else {
1.4 timbl 311:      HText_setStyle(me->text, me->new_style);
1.1 timbl 312:   }
1.4 timbl 313:   me->old_style = me->new_style;
 314:   me->style_change = NO;
1.1 timbl 315: }
 316: 
 317: /*   If you THINK you need to change style, call this
 318: */
 319: 
1.11 timbl 320: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 321: {
1.4 timbl 322:   if (me->new_style!=style) {
 323:    me->style_change = YES;
 324:    me->new_style = style;
1.1 timbl 325:   }
1.11 timbl 326:   me->in_word = NO;
1.1 timbl 327: }
 328: 
1.2 timbl 329: /*_________________________________________________________________________
 330: **
 331: **           A C T I O N   R O U T I N E S
 332: */
 333: 
 334: /*   Character handling
 335: **   ------------------
1.1 timbl 336: */
1.4 timbl 337: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 338: {
1.2 timbl 339: 
1.4 timbl 340:   switch (me->sp[0].tag_number) {
1.2 timbl 341:   case HTML_COMMENT:
 342:    break;                 /* Do Nothing */
 343:    
 344:   case HTML_TITLE:  
1.4 timbl 345:    HTChunkPutc(&me->title, c);
1.2 timbl 346:    break;
 347: 
 348:    
 349:   case HTML_LISTING:             /* Litteral text */
 350:   case HTML_XMP:
 351:   case HTML_PLAINTEXT:
 352:   case HTML_PRE:
 353: /*   We guarrantee that the style is up-to-date in begin_litteral
 354: */
1.4 timbl 355:    HText_appendCharacter(me->text, c);
1.2 timbl 356:    break;
 357:    
 358:   default:                  /* Free format text */
1.4 timbl 359:    if (me->style_change) {
1.2 timbl 360:      if ((c=='\n') || (c==' ')) return; /* Ignore it */
 361:      UPDATE_STYLE;
 362:    }
 363:    if (c=='\n') {
1.4 timbl 364:      if (me->in_word) {
 365:        HText_appendCharacter(me->text, ' ');
 366:        me->in_word = NO;
1.2 timbl 367:      }
 368:    } else {
1.4 timbl 369:      HText_appendCharacter(me->text, c);
 370:      me->in_word = YES;
1.2 timbl 371:    }
 372:   } /* end switch */
1.1 timbl 373: }
 374: 
1.2 timbl 375: 
 376: 
 377: /*   String handling
 378: **   ---------------
 379: **
 380: **   This is written separately from put_character becuase the loop can
1.11 timbl 381: **   in some cases be promoted to a higher function call level for speed.
1.2 timbl 382: */
1.4 timbl 383: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 384: {
1.2 timbl 385: 
1.4 timbl 386:   switch (me->sp[0].tag_number) {
1.2 timbl 387:   case HTML_COMMENT:
 388:    break;                 /* Do Nothing */
 389:    
 390:   case HTML_TITLE:  
1.4 timbl 391:    HTChunkPuts(&me->title, s);
1.2 timbl 392:    break;
 393: 
 394:    
 395:   case HTML_LISTING:             /* Litteral text */
 396:   case HTML_XMP:
 397:   case HTML_PLAINTEXT:
 398:   case HTML_PRE:
 399: 
 400: /*   We guarrantee that the style is up-to-date in begin_litteral
 401: */
1.4 timbl 402:    HText_appendText(me->text, s);
1.2 timbl 403:    break;
 404:    
 405:   default:                  /* Free format text */
 406:     {
 407:      CONST char *p = s;
1.4 timbl 408:      if (me->style_change) {
1.2 timbl 409:        for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
 410:        if (!*p) return;
 411:        UPDATE_STYLE;
 412:      }
 413:      for(; *p; p++) {
1.4 timbl 414:        if (me->style_change) {
1.2 timbl 415:          if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
 416:          UPDATE_STYLE;
 417:        }
 418:        if (*p=='\n') {
1.4 timbl 419:          if (me->in_word) {
 420:            HText_appendCharacter(me->text, ' ');
 421:            me->in_word = NO;
1.2 timbl 422:          }
 423:        } else {
1.4 timbl 424:          HText_appendCharacter(me->text, *p);
 425:          me->in_word = YES;
1.2 timbl 426:        }
 427:      } /* for */
 428:    }
 429:   } /* end switch */
1.1 timbl 430: }
 431: 
 432: 
1.2 timbl 433: /*   Buffer write
1.3 timbl 434: **   ------------
1.1 timbl 435: */
1.4 timbl 436: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 437: {
1.2 timbl 438:   CONST char* p;
 439:   CONST char* e = s+l;
1.4 timbl 440:   for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 441: }
1.2 timbl 442: 
 443: 
 444: /*   Start Element
 445: **   -------------
 446: */
 447: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 448:    HTStructured *,     me,
1.2 timbl 449:    int,      element_number,
1.3 timbl 450:    CONST BOOL*,      present,
 451:    CONST char **, value)
1.2 timbl 452: {
 453:   switch (element_number) {
 454:   case HTML_A:
 455:    {
1.8 timbl 456:      HTChildAnchor * source;
1.9 timbl 457:      char * href = NULL;
 458:      if (present[HTML_A_HREF]) {
 459:        StrAllocCopy(href, value[HTML_A_HREF]);
 460:        HTSimplify(href);
 461:      }
1.8 timbl 462:      source = HTAnchor_findChildAndLink(
1.4 timbl 463:        me->node_anchor,                /* parent */
1.2 timbl 464:        present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 465:        present[HTML_A_HREF] ? href : 0,        /* Addresss */
1.2 timbl 466:        present[HTML_A_TYPE] && value[HTML_A_TYPE] ? 
 467:            (HTLinkType*)HTAtom_for(value[HTML_A_TYPE])
 468:                        : 0);
 469:      
 470:      if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 471:        HTParentAnchor * dest = 
 472:          HTAnchor_parent(
 473:            HTAnchor_followMainLink((HTAnchor*)source)
 474:                  );
 475:        if (!HTAnchor_title(dest))
 476:            HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
 477:      }
 478:      UPDATE_STYLE;
1.4 timbl 479:      HText_beginAnchor(me->text, source);
1.2 timbl 480:    }
 481:    break;
 482:    
 483:   case HTML_TITLE:
1.4 timbl 484:     HTChunkClear(&me->title);
1.2 timbl 485:    break;
 486:    
 487:   case HTML_NEXTID:
 488:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 489:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 490:    break;
 491:    
 492:   case HTML_ISINDEX:
1.4 timbl 493:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 494:    break;
 495:    
 496:   case HTML_P:
 497:    UPDATE_STYLE;
1.4 timbl 498:    HText_appendParagraph(me->text);
 499:    me->in_word = NO;
1.2 timbl 500:    break;
 501: 
 502:   case HTML_DL:
1.11 timbl 503:     change_paragraph_style(me, present && present[DL_COMPACT]
1.2 timbl 504:        ? styles[HTML_DLC]
 505:        : styles[HTML_DL]);
 506:    break;
 507:    
 508:   case HTML_DT:
1.4 timbl 509:     if (!me->style_change) {
 510:      HText_appendParagraph(me->text);
 511:      me->in_word = NO;
1.2 timbl 512:    }
 513:    break;
 514:    
 515:   case HTML_DD:
 516:     UPDATE_STYLE;
1.4 timbl 517:    HTML_put_character(me, '\t');  /* Just tab out one stop */
 518:    me->in_word = NO;
 519:    break;
1.2 timbl 520: 
 521:   case HTML_UL:
 522:   case HTML_OL:
 523:   case HTML_MENU:
 524:   case HTML_DIR:
1.11 timbl 525:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 526:    break;
 527:    
 528:   case HTML_LI:
 529:     UPDATE_STYLE;
1.7 timbl 530:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 531:      HText_appendParagraph(me->text);
1.2 timbl 532:    else
1.4 timbl 533:      HText_appendCharacter(me->text, '\t');   /* Tab @@ nl for UL? */
 534:    me->in_word = NO;
1.2 timbl 535:    break;
 536:    
 537:   case HTML_LISTING:             /* Litteral text */
 538:   case HTML_XMP:
 539:   case HTML_PLAINTEXT:
 540:   case HTML_PRE:
1.11 timbl 541:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 542:    UPDATE_STYLE;
1.4 timbl 543:    if (me->comment_end)
 544:      HText_appendText(me->text, me->comment_end);
1.2 timbl 545:    break;
1.11 timbl 546: 
 547:   case HTML_HTML:          /* Ignore these altogether */
 548:   case HTML_HEAD:
 549:   case HTML_BODY:
 550:   
1.10 timbl 551:   case HTML_IMG:           /* Images -- ignore */
 552:   
 553:   case HTML_TT:           /* Physical character highlighting */
 554:   case HTML_B:            /* Currently ignored */
 555:   case HTML_I:
 556:   case HTML_U:
 557:   
 558:   case HTML_EM:           /* Logical character highlighting */
 559:   case HTML_STRONG:         /* Currently ignored */
 560:   case HTML_CODE:
 561:   case HTML_SAMP:
 562:   case HTML_KBD:
 563:   case HTML_VAR:
 564:   case HTML_DFN:
 565:   case HTML_CITE:
 566:    break;
 567:    
1.11 timbl 568:   case HTML_H1:           /* paragraph styles */
 569:   case HTML_H2:
 570:   case HTML_H3:
 571:   case HTML_H4:
 572:   case HTML_H5:
 573:   case HTML_H6:
 574:   case HTML_H7:
 575:   case HTML_ADDRESS:
 576:   case HTML_BLOCKQUOTE:
 577:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 578:    break;
 579: 
 580:   } /* end switch */
 581: 
 582:   if (HTML_dtd.tags[element_number].contents!= SGML_EMPTY) {
1.12 ! timbl 583:     if (me->sp == &me->stack) {
 ! 584:      fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
 ! 585:      MAX_NESTING); 
 ! 586:      return;
 ! 587:    }
1.4 timbl 588:    --(me->sp);
 589:    me->sp[0].style = me->new_style;    /* Stack new style */
 590:    me->sp[0].tag_number = element_number;
1.10 timbl 591:   } 
1.1 timbl 592: }
1.10 timbl 593: 
1.2 timbl 594: 
1.1 timbl 595: /*       End Element
1.2 timbl 596: **       -----------
1.1 timbl 597: **
1.2 timbl 598: */
 599: /*   When we end an element, the style must be returned to that
1.1 timbl 600: **   in effect before that element. Note that anchors (etc?)
 601: **   don't have an associated style, so that we must scan down the
 602: **   stack for an element with a defined style. (In fact, the styles
 603: **   should be linked to the whole stack not just the top one.)
 604: **   TBL 921119
1.6 timbl 605: **
 606: **   We don't turn on "CAREFUL" check because the parser produces
 607: **   (internal code errors apart) good nesting. The parser checks
 608: **   incoming code errors, not this module.
1.1 timbl 609: */
1.4 timbl 610: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 611: {
1.2 timbl 612: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 613:   if (element_number != me->sp[0].tag_number) {
1.2 timbl 614:     fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
 615:        HTML_dtd.tags[element_number].name,
1.4 timbl 616:        HTML_dtd.tags[me->sp->tag_number].name);
1.6 timbl 617:        /* panic */
1.1 timbl 618:   }
1.2 timbl 619: #endif
 620:   
1.4 timbl 621:   me->sp++;             /* Pop state off stack */
1.2 timbl 622:   
 623:   switch(element_number) {
 624: 
 625:   case HTML_A:
 626:    UPDATE_STYLE;
1.4 timbl 627:    HText_endAnchor(me->text);
1.2 timbl 628:    break;
 629: 
 630:   case HTML_TITLE:
1.4 timbl 631:     HTChunkTerminate(&me->title);
 632:    HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 633:    break;
 634:    
 635:   case HTML_LISTING:             /* Litteral text */
 636:   case HTML_XMP:
 637:   case HTML_PLAINTEXT:
 638:   case HTML_PRE:
1.4 timbl 639:    if (me->comment_start)
 640:      HText_appendText(me->text, me->comment_start);
1.2 timbl 641:    /* Fall through */
 642:    
 643:   default:
 644:   
1.11 timbl 645:    change_paragraph_style(me, me->sp->style);   /* Often won't really change */
1.2 timbl 646:    break;
 647:    
 648:   } /* switch */
1.1 timbl 649: }
 650: 
1.2 timbl 651: 
 652: /*       Expanding entities
 653: **       ------------------
 654: */
 655: /*   (In fact, they all shrink!)
1.1 timbl 656: */
1.2 timbl 657: 
1.4 timbl 658: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 659: {
1.4 timbl 660:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 661: }
 662: 
1.2 timbl 663: 
 664: 
 665: /*   Free an HTML object
 666: **   -------------------
 667: **
1.4 timbl 668: ** If the document is empty, the text object will not yet exist.
 669:  So we could in fact abandon creating the document and return
 670:  an error code. In fact an empty document is an important type
 671:  of document, so we don't.
 672: **
 673: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 674: **   Otherwise, the interactive object is left.   
 675: */
1.4 timbl 676: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 677: {
1.4 timbl 678:   UPDATE_STYLE;       /* Creates empty document here! */
 679:   if (me->comment_end)
 680:        HTML_put_string(me,me->comment_end);
 681:   HText_endAppend(me->text);
 682: 
 683:   if (me->target) {
 684:     (*me->targetClass.end_document)(me->target);
 685:     (*me->targetClass.free)(me->target);
 686: /*   HText_free(me->text);  */       /* @@@@@@@@@@@@@@@ */
1.2 timbl 687:   }
1.4 timbl 688:   free(me);
1.1 timbl 689: }
 690: 
 691: 
1.4 timbl 692: PRIVATE void HTML_end_document ARGS1(HTStructured *, me)
1.1 timbl 693: 
1.4 timbl 694: {           /* Obsolete */
1.1 timbl 695: }
 696: 
1.2 timbl 697: 
 698: /*   Get Styles from style sheet
 699: **   ---------------------------
 700: */
 701: PRIVATE void get_styles NOARGS
1.1 timbl 702: {
1.2 timbl 703:   got_styles = YES;
 704:   
 705:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 706: 
1.2 timbl 707:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 708:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 709:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 710:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 711:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 712:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 713:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 714: 
 715:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 716:   styles[HTML_UL] =
 717:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 718:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 719:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
 720:   styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact");
 721:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 722:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 723:   styles[HTML_PLAINTEXT] =
 724:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 725:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 726:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 727: }
 728: /*               P U B L I C
 729: */
 730: 
 731: /*   Structured Object Class
 732: **   -----------------------
 733: */
 734: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
 735: {       
 736:    "text/html",
 737:    HTML_free,
 738:    HTML_end_document,
 739:    HTML_put_character,   HTML_put_string, HTML_write,
 740:    HTML_start_element,   HTML_end_element,
 741:    HTML_put_entity
 742: }; 
1.1 timbl 743: 
1.4 timbl 744: 
1.2 timbl 745: /*       New Structured Text object
 746: **       --------------------------
 747: **
1.4 timbl 748: **   The strutcured stream can generate either presentation,
 749: **   or plain text, or HTML.
1.1 timbl 750: */
1.4 timbl 751: PUBLIC HTStructured* HTML_new ARGS3(
1.2 timbl 752:    HTParentAnchor *,    anchor,
1.4 timbl 753:    HTFormat,        format_out,
1.2 timbl 754:    HTStream*,       stream)
1.1 timbl 755: {
 756: 
1.4 timbl 757:   HTStructured * me;
 758:   
 759:   if (format_out != WWW_PLAINTEXT && format_out != WWW_PRESENT) {
1.6 timbl 760:     HTStream * intermediate = HTStreamStack(WWW_HTML, format_out,
 761:        stream, anchor);
 762:    if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 763:     fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
 764:            HTAtom_name(format_out));
 765:    exit (-99);
 766:   }
 767: 
 768:   me = (HTStructured*) malloc(sizeof(*me));
 769:   if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 770: 
 771:   if (!got_styles) get_styles();
 772: 
1.4 timbl 773:   me->isa = &HTMLPresentation;
 774:   me->node_anchor = anchor;
 775:   me->title.size = 0;
 776:   me->title.growby = 128;
 777:   me->title.allocated = 0;
 778:   me->title.data = 0;
 779:   me->text = 0;
 780:   me->style_change = YES; /* Force check leading to text creation */
 781:   me->new_style = default_style;
 782:   me->old_style = 0;
 783:   me->sp = me->stack + MAX_NESTING - 1;
 784:   me->sp->tag_number = -1;              /* INVALID */
 785:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 786:   
1.4 timbl 787:   me->comment_start = NULL;
 788:   me->comment_end = NULL;
 789:   me->target = stream;
 790:   if (stream) me->targetClass = *stream->isa;    /* Copy pointers */
1.1 timbl 791:   
1.4 timbl 792:   return (HTStructured*) me;
1.1 timbl 793: }
 794: 
 795: 
1.2 timbl 796: /*   HTConverter for HTML to plain text
 797: **   ----------------------------------
1.1 timbl 798: **
1.2 timbl 799: **   This will convert from HTML to presentation or plain text.
1.1 timbl 800: */
1.2 timbl 801: PUBLIC HTStream* HTMLToPlain ARGS3(
 802:    HTPresentation *,    pres,
 803:    HTParentAnchor *,    anchor, 
 804:    HTStream *,       sink)
1.1 timbl 805: {
1.4 timbl 806:   return SGML_new(&HTML_dtd, HTML_new(anchor, pres->rep_out, sink));
1.1 timbl 807: }
 808: 
 809: 
1.2 timbl 810: /*   HTConverter for HTML to C code
 811: **   ------------------------------
 812: **
 813: **   C copde is like plain text but all non-preformatted code
 814: **   is commented out.
 815: **   This will convert from HTML to presentation or plain text.
 816: */
 817: PUBLIC HTStream* HTMLToC ARGS3(
 818:    HTPresentation *,    pres,
 819:    HTParentAnchor *,    anchor, 
 820:    HTStream *,       sink)
1.1 timbl 821: {
1.4 timbl 822:   
 823:   HTStructured * html;
 824:   
 825:   (*sink->isa->put_string)(sink, "/* ");   /* Before even title */
 826:   html = HTML_new(anchor, WWW_PLAINTEXT, sink);
1.2 timbl 827:   html->comment_start = "/* ";
 828:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.4 timbl 829: /*  HTML_put_string(html,html->comment_start); */
1.2 timbl 830:   return SGML_new(&HTML_dtd, html);
1.1 timbl 831: }
 832: 
 833: 
1.2 timbl 834: /*   Presenter for HTML
 835: **   ------------------
 836: **
 837: **   This will convert from HTML to presentation or plain text.
 838: **
 839: **   Override this if you have a windows version
1.1 timbl 840: */
1.2 timbl 841: #ifndef GUI
 842: PUBLIC HTStream* HTMLPresent ARGS3(
 843:    HTPresentation *,    pres,
 844:    HTParentAnchor *,    anchor, 
 845:    HTStream *,       sink)
1.1 timbl 846: {
1.4 timbl 847:   return SGML_new(&HTML_dtd, HTML_new(anchor, WWW_PRESENT, NULL));
1.1 timbl 848: }
1.2 timbl 849: #endif
1.1 timbl 850: 
 851: 
1.2 timbl 852: /*   Record error message as a hypertext object
 853: **   ------------------------------------------
 854: **
 855: **   The error message should be marked as an error so that
 856: **   it can be reloaded later.
 857: **   This implementation just throws up an error message
 858: **   and leaves the document unloaded.
1.9 timbl 859: **   A smarter implementation would load an error document,
 860: **   marking at such so that it is retried on reload.
1.1 timbl 861: **
1.2 timbl 862: ** On entry,
 863: **   sink  is a stream to the output device if any
 864: **   number is the HTTP error number
 865: **   message is the human readable message.
1.9 timbl 866: **
 867: ** On exit,
 868: **   returns a negative number to indicate lack of success in the load.
1.1 timbl 869: */
1.2 timbl 870: 
 871: PUBLIC int HTLoadError ARGS3(
 872:    HTStream *,   sink,
 873:    int,      number,
 874:    CONST char *,  message)
 875: {
 876:   HTAlert(message);     /* @@@@@@@@@@@@@@@@@@@ */
 877:   return -number;
 878: } 
 879: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /