[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.17

1.2 timbl 1: /*       Structured stream to Rich hypertext converter
 2: **       ============================================
1.1 timbl 3: **
1.2 timbl 4: **   This generates of a hypertext object. It converts from the
 5: **   structured stream interface fro HTMl events into the style-
 6: **   oriented iunterface of the HText.h interface. This module is
 7: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 8: **
1.6 timbl 9: **   Override this module if making a new GUI browser.
1.1 timbl 10: **
 11: */
1.16 timbl 12: 
1.1 timbl 13: #include "HTML.h"
 14: 
1.16 timbl 15: /* #define CAREFUL       Check nesting here not really necessary */
1.2 timbl 16: 
1.1 timbl 17: #include <ctype.h>
 18: #include <stdio.h>
 19: 
 20: #include "HTAtom.h"
 21: #include "HTChunk.h"
 22: #include "HText.h"
 23: #include "HTStyle.h"
 24: 
1.3 timbl 25: #include "HTAlert.h"
1.4 timbl 26: #include "HTMLGen.h"
1.8 timbl 27: #include "HTParse.h"
1.1 timbl 28: 
 29: extern HTStyleSheet * styleSheet;   /* Application-wide */
 30: 
 31: /*   Module-wide style cache
 32: */
 33: PRIVATE int      got_styles = 0;
1.16 timbl 34: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 35: PRIVATE HTStyle *default_style;
1.1 timbl 36: 
 37: 
 38: /*       HTML Object
 39: **       -----------
 40: */
1.2 timbl 41: #define MAX_NESTING 20     /* Should be checked by parser */
 42: 
 43: typedef struct _stack_element {
 44:     HTStyle *   style;
 45:    int       tag_number;
 46: } stack_element;
 47: 
 48: struct _HTStructured {
 49:   CONST HTStructuredClass * isa;
 50:   HTParentAnchor *      node_anchor;
 51:   HText *          text;
 52: 
 53:   HTStream*         target;         /* Output stream */
 54:   HTStreamClass       targetClass;      /* Output routines */
 55: 
 56:   HTChunk          title;     /* Grow by 128 */
 57:   
 58:   char *           comment_start; /* for literate programming */
 59:   char *           comment_end;
1.16 timbl 60:   
 61:   CONST SGML_dtd*      dtd;
 62:   
1.2 timbl 63:   HTTag *          current_tag;
 64:   BOOL            style_change;
 65:   HTStyle *         new_style;
 66:   HTStyle *         old_style;
 67:   BOOL            in_word; /* Have just had a non-white char */
 68:   stack_element   stack[MAX_NESTING];
 69:   stack_element   *sp;      /* Style stack pointer */
1.1 timbl 70: };
 71: 
1.2 timbl 72: struct _HTStream {
 73:   CONST HTStreamClass *   isa;
 74:   /* .... */
 75: };
1.1 timbl 76: 
 77: /*       Forward declarations of routines
 78: */
 79: PRIVATE void get_styles NOPARAMS;
 80: 
 81: 
1.4 timbl 82: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 83: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 84: 
 85: /*   Style buffering avoids dummy paragraph begin/ends.
 86: */
1.4 timbl 87: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 88: 
 89: 
1.2 timbl 90: #ifdef OLD_CODE
1.1 timbl 91: /* The following accented characters are from peter Flynn, curia project */
 92: 
 93: /* these ifdefs don't solve the problem of a simple terminal emulator
 94: ** with a different character set to the client machine. But nothing does,
 95: ** except looking at the TERM setting */
 96: 
1.2 timbl 97: 
1.1 timbl 98:     { "ocus" , "&" },    /* for CURIA */
 99: #ifdef IBMPC
 100:     { "aacute" , "240円" }, /* For PC display */
 101:     { "eacute" , "202円" },
 102:     { "iacute" , "241円" },
 103:     { "oacute" , "242円" },
 104:     { "uacute" , "243円" },
 105:     { "Aacute" , "101円" },
 106:     { "Eacute" , "220円" },
 107:     { "Iacute" , "111円" },
 108:     { "Oacute" , "117円" },
 109:     { "Uacute" , "125円" },
 110: #else
 111:     { "aacute" , "341円" }, /* Works for openwindows -- Peter Flynn */
 112:     { "eacute" , "351円" },
 113:     { "iacute" , "355円" },
 114:     { "oacute" , "363円" },
 115:     { "uacute" , "372円" },
 116:     { "Aacute" , "301円" },
 117:     { "Eacute" , "310円" },
 118:     { "Iacute" , "315円" },
 119:     { "Oacute" , "323円" },
 120:     { "Uacute" , "332円" }, 
 121: #endif
 122:    { 0,  0 } /* Terminate list */
 123: };
1.2 timbl 124: #endif
1.1 timbl 125: 
 126: 
1.2 timbl 127: /*   Entity values -- for ISO Latin 1 local representation
 128: **
 129: **   This MUST match exactly the table referred to in the DTD!
 130: */
 131: static char * ISO_Latin1[] = {
 132:    "306円", /* capital AE diphthong (ligature) */ 
 133:    "301円", /* capital A, acute accent */ 
 134:    "302円", /* capital A, circumflex accent */ 
 135:    "300円", /* capital A, grave accent */ 
 136:    "305円", /* capital A, ring */ 
 137:    "303円", /* capital A, tilde */ 
 138:    "304円", /* capital A, dieresis or umlaut mark */ 
 139:    "307円", /* capital C, cedilla */ 
 140:    "320円", /* capital Eth, Icelandic */ 
 141:    "311円", /* capital E, acute accent */ 
 142:    "312円", /* capital E, circumflex accent */ 
 143:    "310円", /* capital E, grave accent */ 
 144:    "313円", /* capital E, dieresis or umlaut mark */ 
 145:    "315円", /* capital I, acute accent */ 
 146:    "316円", /* capital I, circumflex accent */ 
 147:    "314円", /* capital I, grave accent */ 
 148:    "317円", /* capital I, dieresis or umlaut mark */ 
 149:    "321円", /* capital N, tilde */ 
 150:    "323円", /* capital O, acute accent */ 
 151:    "324円", /* capital O, circumflex accent */ 
 152:    "322円", /* capital O, grave accent */ 
 153:    "330円", /* capital O, slash */ 
 154:    "325円", /* capital O, tilde */ 
 155:    "326円", /* capital O, dieresis or umlaut mark */ 
 156:    "336円", /* capital THORN, Icelandic */ 
 157:    "332円", /* capital U, acute accent */ 
 158:    "333円", /* capital U, circumflex accent */ 
 159:    "331円", /* capital U, grave accent */ 
 160:    "334円", /* capital U, dieresis or umlaut mark */ 
 161:    "335円", /* capital Y, acute accent */ 
 162:    "341円", /* small a, acute accent */ 
 163:    "342円", /* small a, circumflex accent */ 
 164:    "346円", /* small ae diphthong (ligature) */ 
 165:    "340円", /* small a, grave accent */ 
 166:    "046円", /* ampersand */ 
 167:    "345円", /* small a, ring */ 
 168:    "343円", /* small a, tilde */ 
 169:    "344円", /* small a, dieresis or umlaut mark */ 
 170:    "347円", /* small c, cedilla */ 
 171:    "351円", /* small e, acute accent */ 
 172:    "352円", /* small e, circumflex accent */ 
 173:    "350円", /* small e, grave accent */ 
 174:    "360円", /* small eth, Icelandic */ 
 175:    "353円", /* small e, dieresis or umlaut mark */ 
 176:    "076円", /* greater than */ 
 177:    "355円", /* small i, acute accent */ 
 178:    "356円", /* small i, circumflex accent */ 
 179:    "354円", /* small i, grave accent */ 
 180:    "357円", /* small i, dieresis or umlaut mark */ 
 181:    "074円", /* less than */ 
 182:    "361円", /* small n, tilde */ 
 183:    "363円", /* small o, acute accent */ 
 184:    "364円", /* small o, circumflex accent */ 
 185:    "362円", /* small o, grave accent */ 
 186:    "370円", /* small o, slash */ 
 187:    "365円", /* small o, tilde */ 
 188:    "366円", /* small o, dieresis or umlaut mark */ 
 189:    "337円", /* small sharp s, German (sz ligature) */ 
 190:    "376円", /* small thorn, Icelandic */ 
 191:    "372円", /* small u, acute accent */ 
 192:    "373円", /* small u, circumflex accent */ 
 193:    "371円", /* small u, grave accent */ 
 194:    "374円", /* small u, dieresis or umlaut mark */ 
 195:    "375円", /* small y, acute accent */ 
 196:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 197: };
 198: 
1.2 timbl 199: 
 200: /*   Entity values -- for NeXT local representation
 201: **
 202: **   This MUST match exactly the table referred to in the DTD!
 203: **
 204: */
 205: static char * NeXTCharacters[] = {
 206:    "341円", /* capital AE diphthong (ligature)   */ 
 207:    "202円", /* capital A, acute accent       */ 
 208:    "203円", /* capital A, circumflex accent     */ 
 209:    "201円", /* capital A, grave accent       */ 
 210:    "206円", /* capital A, ring           */ 
 211:    "204円", /* capital A, tilde           */ 
 212:    "205円", /* capital A, dieresis or umlaut mark  */ 
 213:    "207円", /* capital C, cedilla          */ 
 214:    "220円", /* capital Eth, Icelandic        */ 
 215:    "211円", /* capital E, acute accent               */ 
 216:    "212円", /* capital E, circumflex accent             */ 
 217:    "210円", /* capital E, grave accent               */ 
 218:    "213円", /* capital E, dieresis or umlaut mark          */ 
 219:    "215円", /* capital I, acute accent               */ 
 220:    "216円", /* capital I, circumflex accent     these are    */ 
 221:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 222:    "217円", /* capital I, dieresis or umlaut mark          */ 
 223:    "221円", /* capital N, tilde                   */ 
 224:    "223円", /* capital O, acute accent               */ 
 225:    "224円", /* capital O, circumflex accent             */ 
 226:    "222円", /* capital O, grave accent               */ 
 227:    "351円", /* capital O, slash       'cept this */ 
 228:    "225円", /* capital O, tilde                   */ 
 229:    "226円", /* capital O, dieresis or umlaut mark          */ 
 230:    "234円", /* capital THORN, Icelandic */ 
 231:    "230円", /* capital U, acute accent */ 
 232:    "231円", /* capital U, circumflex accent */ 
 233:    "227円", /* capital U, grave accent */ 
 234:    "232円", /* capital U, dieresis or umlaut mark */ 
 235:    "233円", /* capital Y, acute accent */ 
 236:    "326円", /* small a, acute accent */ 
 237:    "327円", /* small a, circumflex accent */ 
 238:    "361円", /* small ae diphthong (ligature) */ 
 239:    "325円", /* small a, grave accent */ 
 240:    "046円", /* ampersand */ 
 241:    "332円", /* small a, ring */ 
 242:    "330円", /* small a, tilde */ 
 243:    "331円", /* small a, dieresis or umlaut mark */ 
 244:    "333円", /* small c, cedilla */ 
 245:    "335円", /* small e, acute accent */ 
 246:    "336円", /* small e, circumflex accent */ 
 247:    "334円", /* small e, grave accent */ 
 248:    "346円", /* small eth, Icelandic     */ 
 249:    "337円", /* small e, dieresis or umlaut mark */ 
 250:    "076円", /* greater than */ 
 251:    "342円", /* small i, acute accent */ 
 252:    "344円", /* small i, circumflex accent */ 
 253:    "340円", /* small i, grave accent */ 
 254:    "345円", /* small i, dieresis or umlaut mark */ 
 255:    "074円", /* less than */ 
 256:    "347円", /* small n, tilde */ 
 257:    "355円", /* small o, acute accent */ 
 258:    "356円", /* small o, circumflex accent */ 
 259:    "354円", /* small o, grave accent */ 
 260:    "371円", /* small o, slash */ 
 261:    "357円", /* small o, tilde */ 
 262:    "360円", /* small o, dieresis or umlaut mark */ 
 263:    "373円", /* small sharp s, German (sz ligature) */ 
 264:    "374円", /* small thorn, Icelandic */ 
 265:    "363円", /* small u, acute accent */ 
 266:    "364円", /* small u, circumflex accent */ 
 267:    "362円", /* small u, grave accent */ 
 268:    "366円", /* small u, dieresis or umlaut mark */ 
 269:    "367円", /* small y, acute accent */ 
 270:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 271: };
 272: 
1.2 timbl 273: /*   Entity values -- for IBM/PC Code Page 850 (International)
 274: **
 275: **   This MUST match exactly the table referred to in the DTD!
 276: **
 277: */
 278: /* @@@@@@@@@@@@@@@@@ TBD */
 279: 
 280: 
 281: 
 282: /*       Set character set
 283: **       ----------------
 284: */
 285: 
 286: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 287: 
1.2 timbl 288: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
 289: {
 290:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 291:                       : ISO_Latin1;
 292: }
1.1 timbl 293: 
 294: 
 295: /*       Flattening the style structure
 296: **       ------------------------------
 297: **
 298: On the NeXT, and on any read-only browser, it is simpler for the text to have
 299: a sequence of styles, rather than a nested tree of styles. In this
 300: case we have to flatten the structure as it arrives from SGML tags into
 301: a sequence of styles.
 302: */
 303: 
 304: /*       If style really needs to be set, call this
 305: */
1.4 timbl 306: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 307: {
1.4 timbl 308:   if (!me->text) {          /* First time through */
 309:      me->text = HText_new2(me->node_anchor, me->target);
 310:      HText_beginAppend(me->text);
 311:      HText_setStyle(me->text, me->new_style);
 312:      me->in_word = NO;
1.1 timbl 313:   } else {
1.4 timbl 314:      HText_setStyle(me->text, me->new_style);
1.1 timbl 315:   }
1.4 timbl 316:   me->old_style = me->new_style;
 317:   me->style_change = NO;
1.1 timbl 318: }
 319: 
 320: /*   If you THINK you need to change style, call this
 321: */
 322: 
1.11 timbl 323: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 324: {
1.4 timbl 325:   if (me->new_style!=style) {
 326:    me->style_change = YES;
 327:    me->new_style = style;
1.1 timbl 328:   }
1.11 timbl 329:   me->in_word = NO;
1.1 timbl 330: }
 331: 
1.2 timbl 332: /*_________________________________________________________________________
 333: **
 334: **           A C T I O N   R O U T I N E S
 335: */
 336: 
 337: /*   Character handling
 338: **   ------------------
1.1 timbl 339: */
1.4 timbl 340: PRIVATE void HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 341: {
1.2 timbl 342: 
1.4 timbl 343:   switch (me->sp[0].tag_number) {
1.2 timbl 344:   case HTML_COMMENT:
 345:    break;                 /* Do Nothing */
 346:    
 347:   case HTML_TITLE:  
1.4 timbl 348:    HTChunkPutc(&me->title, c);
1.2 timbl 349:    break;
 350: 
 351:    
 352:   case HTML_LISTING:             /* Litteral text */
 353:   case HTML_XMP:
 354:   case HTML_PLAINTEXT:
 355:   case HTML_PRE:
 356: /*   We guarrantee that the style is up-to-date in begin_litteral
 357: */
1.4 timbl 358:    HText_appendCharacter(me->text, c);
1.2 timbl 359:    break;
 360:    
 361:   default:                  /* Free format text */
1.4 timbl 362:    if (me->style_change) {
1.2 timbl 363:      if ((c=='\n') || (c==' ')) return; /* Ignore it */
 364:      UPDATE_STYLE;
 365:    }
 366:    if (c=='\n') {
1.4 timbl 367:      if (me->in_word) {
 368:        HText_appendCharacter(me->text, ' ');
 369:        me->in_word = NO;
1.2 timbl 370:      }
 371:    } else {
1.4 timbl 372:      HText_appendCharacter(me->text, c);
 373:      me->in_word = YES;
1.2 timbl 374:    }
 375:   } /* end switch */
1.1 timbl 376: }
 377: 
1.2 timbl 378: 
 379: 
 380: /*   String handling
 381: **   ---------------
 382: **
 383: **   This is written separately from put_character becuase the loop can
1.11 timbl 384: **   in some cases be promoted to a higher function call level for speed.
1.2 timbl 385: */
1.4 timbl 386: PRIVATE void HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 387: {
1.2 timbl 388: 
1.4 timbl 389:   switch (me->sp[0].tag_number) {
1.2 timbl 390:   case HTML_COMMENT:
 391:    break;                 /* Do Nothing */
 392:    
 393:   case HTML_TITLE:  
1.4 timbl 394:    HTChunkPuts(&me->title, s);
1.2 timbl 395:    break;
 396: 
 397:    
 398:   case HTML_LISTING:             /* Litteral text */
 399:   case HTML_XMP:
 400:   case HTML_PLAINTEXT:
 401:   case HTML_PRE:
 402: 
 403: /*   We guarrantee that the style is up-to-date in begin_litteral
 404: */
1.4 timbl 405:    HText_appendText(me->text, s);
1.2 timbl 406:    break;
 407:    
 408:   default:                  /* Free format text */
 409:     {
 410:      CONST char *p = s;
1.4 timbl 411:      if (me->style_change) {
1.2 timbl 412:        for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
 413:        if (!*p) return;
 414:        UPDATE_STYLE;
 415:      }
 416:      for(; *p; p++) {
1.4 timbl 417:        if (me->style_change) {
1.2 timbl 418:          if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
 419:          UPDATE_STYLE;
 420:        }
 421:        if (*p=='\n') {
1.4 timbl 422:          if (me->in_word) {
 423:            HText_appendCharacter(me->text, ' ');
 424:            me->in_word = NO;
1.2 timbl 425:          }
 426:        } else {
1.4 timbl 427:          HText_appendCharacter(me->text, *p);
 428:          me->in_word = YES;
1.2 timbl 429:        }
 430:      } /* for */
 431:    }
 432:   } /* end switch */
1.1 timbl 433: }
 434: 
 435: 
1.2 timbl 436: /*   Buffer write
1.3 timbl 437: **   ------------
1.1 timbl 438: */
1.4 timbl 439: PRIVATE void HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 440: {
1.2 timbl 441:   CONST char* p;
 442:   CONST char* e = s+l;
1.4 timbl 443:   for (p=s; s<e; p++) HTML_put_character(me, *p);
1.1 timbl 444: }
1.2 timbl 445: 
 446: 
 447: /*   Start Element
 448: **   -------------
 449: */
 450: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 451:    HTStructured *,     me,
1.16 timbl 452:    int,          element_number,
1.3 timbl 453:    CONST BOOL*,      present,
1.16 timbl 454:    CONST char **,     value)
1.2 timbl 455: {
 456:   switch (element_number) {
 457:   case HTML_A:
 458:    {
1.8 timbl 459:      HTChildAnchor * source;
1.9 timbl 460:      char * href = NULL;
 461:      if (present[HTML_A_HREF]) {
 462:        StrAllocCopy(href, value[HTML_A_HREF]);
 463:        HTSimplify(href);
 464:      }
1.8 timbl 465:      source = HTAnchor_findChildAndLink(
1.4 timbl 466:        me->node_anchor,                /* parent */
1.2 timbl 467:        present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 468:        present[HTML_A_HREF] ? href : 0,        /* Addresss */
1.16 timbl 469:        present[HTML_A_REL] && value[HTML_A_REL] ? 
 470:            (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 471:                        : 0);
 472:      
 473:      if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 474:        HTParentAnchor * dest = 
 475:          HTAnchor_parent(
 476:            HTAnchor_followMainLink((HTAnchor*)source)
 477:                  );
 478:        if (!HTAnchor_title(dest))
 479:            HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
 480:      }
 481:      UPDATE_STYLE;
1.4 timbl 482:      HText_beginAnchor(me->text, source);
1.2 timbl 483:    }
 484:    break;
 485:    
 486:   case HTML_TITLE:
1.4 timbl 487:     HTChunkClear(&me->title);
1.2 timbl 488:    break;
 489:    
 490:   case HTML_NEXTID:
 491:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 492:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 493:    break;
 494:    
 495:   case HTML_ISINDEX:
1.4 timbl 496:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 497:    break;
 498:    
1.15 timbl 499:   case HTML_BR: 
 500:    UPDATE_STYLE;
 501:    HText_appendCharacter(me->text, '\n');
 502:    me->in_word = NO;
 503:    break;
 504:    
 505:   case HTML_HR: 
 506:    UPDATE_STYLE;
 507:    HText_appendCharacter(me->text, '\n');
1.16 timbl 508:    HText_appendText(me->text, "___________________________________");
1.15 timbl 509:    HText_appendCharacter(me->text, '\n');
 510:    me->in_word = NO;
 511:    break;
 512:    
1.2 timbl 513:   case HTML_P:
 514:    UPDATE_STYLE;
1.4 timbl 515:    HText_appendParagraph(me->text);
 516:    me->in_word = NO;
1.2 timbl 517:    break;
 518: 
 519:   case HTML_DL:
1.11 timbl 520:     change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 521:        ? styles[HTML_DL]
1.2 timbl 522:        : styles[HTML_DL]);
 523:    break;
 524:    
 525:   case HTML_DT:
1.4 timbl 526:     if (!me->style_change) {
 527:      HText_appendParagraph(me->text);
 528:      me->in_word = NO;
1.2 timbl 529:    }
 530:    break;
 531:    
 532:   case HTML_DD:
 533:     UPDATE_STYLE;
1.4 timbl 534:    HTML_put_character(me, '\t');  /* Just tab out one stop */
 535:    me->in_word = NO;
 536:    break;
1.2 timbl 537: 
 538:   case HTML_UL:
 539:   case HTML_OL:
 540:   case HTML_MENU:
 541:   case HTML_DIR:
1.11 timbl 542:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 543:    break;
 544:    
 545:   case HTML_LI:
 546:     UPDATE_STYLE;
1.7 timbl 547:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 548:      HText_appendParagraph(me->text);
1.2 timbl 549:    else
1.4 timbl 550:      HText_appendCharacter(me->text, '\t');   /* Tab @@ nl for UL? */
 551:    me->in_word = NO;
1.2 timbl 552:    break;
 553:    
 554:   case HTML_LISTING:             /* Litteral text */
 555:   case HTML_XMP:
 556:   case HTML_PLAINTEXT:
 557:   case HTML_PRE:
1.11 timbl 558:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 559:    UPDATE_STYLE;
1.4 timbl 560:    if (me->comment_end)
 561:      HText_appendText(me->text, me->comment_end);
1.2 timbl 562:    break;
1.11 timbl 563: 
 564:   case HTML_HTML:          /* Ignore these altogether */
 565:   case HTML_HEAD:
 566:   case HTML_BODY:
 567:   
1.10 timbl 568:   case HTML_IMG:           /* Images -- ignore */
 569:   
 570:   case HTML_TT:           /* Physical character highlighting */
 571:   case HTML_B:            /* Currently ignored */
 572:   case HTML_I:
 573:   case HTML_U:
 574:   
 575:   case HTML_EM:           /* Logical character highlighting */
 576:   case HTML_STRONG:         /* Currently ignored */
 577:   case HTML_CODE:
 578:   case HTML_SAMP:
 579:   case HTML_KBD:
 580:   case HTML_VAR:
 581:   case HTML_DFN:
 582:   case HTML_CITE:
 583:    break;
 584:    
1.11 timbl 585:   case HTML_H1:           /* paragraph styles */
 586:   case HTML_H2:
 587:   case HTML_H3:
 588:   case HTML_H4:
 589:   case HTML_H5:
 590:   case HTML_H6:
 591:   case HTML_H7:
 592:   case HTML_ADDRESS:
 593:   case HTML_BLOCKQUOTE:
 594:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 595:    break;
 596: 
 597:   } /* end switch */
 598: 
1.16 timbl 599:   if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 600:     if (me->sp == me->stack) {
1.12 timbl 601:      fprintf(stderr, "HTML: ****** Maximum nesting of %d exceded!\n",
 602:      MAX_NESTING); 
 603:      return;
 604:    }
1.4 timbl 605:    --(me->sp);
 606:    me->sp[0].style = me->new_style;    /* Stack new style */
 607:    me->sp[0].tag_number = element_number;
1.10 timbl 608:   } 
1.1 timbl 609: }
1.10 timbl 610: 
1.2 timbl 611: 
1.1 timbl 612: /*       End Element
1.2 timbl 613: **       -----------
1.1 timbl 614: **
1.2 timbl 615: */
 616: /*   When we end an element, the style must be returned to that
1.1 timbl 617: **   in effect before that element. Note that anchors (etc?)
 618: **   don't have an associated style, so that we must scan down the
 619: **   stack for an element with a defined style. (In fact, the styles
 620: **   should be linked to the whole stack not just the top one.)
 621: **   TBL 921119
1.6 timbl 622: **
 623: **   We don't turn on "CAREFUL" check because the parser produces
 624: **   (internal code errors apart) good nesting. The parser checks
 625: **   incoming code errors, not this module.
1.1 timbl 626: */
1.4 timbl 627: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 628: {
1.2 timbl 629: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 630:   if (element_number != me->sp[0].tag_number) {
1.2 timbl 631:     fprintf(stderr, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 632:        me->dtd->tags[element_number].name,
 633:        me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 634:        /* panic */
1.1 timbl 635:   }
1.2 timbl 636: #endif
 637:   
1.4 timbl 638:   me->sp++;             /* Pop state off stack */
1.2 timbl 639:   
 640:   switch(element_number) {
 641: 
 642:   case HTML_A:
 643:    UPDATE_STYLE;
1.4 timbl 644:    HText_endAnchor(me->text);
1.2 timbl 645:    break;
 646: 
 647:   case HTML_TITLE:
1.4 timbl 648:     HTChunkTerminate(&me->title);
 649:    HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 650:    break;
 651:    
 652:   case HTML_LISTING:             /* Litteral text */
 653:   case HTML_XMP:
 654:   case HTML_PLAINTEXT:
 655:   case HTML_PRE:
1.4 timbl 656:    if (me->comment_start)
 657:      HText_appendText(me->text, me->comment_start);
1.2 timbl 658:    /* Fall through */
 659:    
 660:   default:
 661:   
1.11 timbl 662:    change_paragraph_style(me, me->sp->style);   /* Often won't really change */
1.2 timbl 663:    break;
 664:    
 665:   } /* switch */
1.1 timbl 666: }
 667: 
1.2 timbl 668: 
 669: /*       Expanding entities
 670: **       ------------------
 671: */
 672: /*   (In fact, they all shrink!)
1.1 timbl 673: */
1.2 timbl 674: 
1.4 timbl 675: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 676: {
1.4 timbl 677:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 678: }
1.2 timbl 679: 
 680: 
 681: /*   Free an HTML object
 682: **   -------------------
 683: **
1.4 timbl 684: ** If the document is empty, the text object will not yet exist.
 685:  So we could in fact abandon creating the document and return
 686:  an error code. In fact an empty document is an important type
 687:  of document, so we don't.
 688: **
 689: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 690: **   Otherwise, the interactive object is left.   
 691: */
1.4 timbl 692: PUBLIC void HTML_free ARGS1(HTStructured *, me)
1.1 timbl 693: {
1.4 timbl 694:   UPDATE_STYLE;       /* Creates empty document here! */
 695:   if (me->comment_end)
 696:        HTML_put_string(me,me->comment_end);
 697:   HText_endAppend(me->text);
 698: 
 699:   if (me->target) {
 700:     (*me->targetClass.free)(me->target);
1.2 timbl 701:   }
1.4 timbl 702:   free(me);
1.1 timbl 703: }
 704: 
 705: 
1.14 timbl 706: PRIVATE void HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 707: 
1.14 timbl 708: {
 709:   if (me->target) {
 710:     (*me->targetClass.abort)(me->target, e);
 711:   }
 712:   free(me);
 713: 
1.1 timbl 714: }
 715: 
1.2 timbl 716: 
 717: /*   Get Styles from style sheet
 718: **   ---------------------------
 719: */
 720: PRIVATE void get_styles NOARGS
1.1 timbl 721: {
1.2 timbl 722:   got_styles = YES;
 723:   
 724:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 725: 
1.2 timbl 726:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 727:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 728:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 729:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 730:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 731:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 732:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 733: 
 734:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 735:   styles[HTML_UL] =
 736:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 737:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 738:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
1.16 timbl 739: /* styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 740:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 741:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 742:   styles[HTML_PLAINTEXT] =
 743:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 744:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 745:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 746: }
 747: /*               P U B L I C
 748: */
 749: 
 750: /*   Structured Object Class
 751: **   -----------------------
 752: */
 753: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
 754: {       
 755:    "text/html",
 756:    HTML_free,
1.14 timbl 757:    HTML_abort,
1.2 timbl 758:    HTML_put_character,   HTML_put_string, HTML_write,
 759:    HTML_start_element,   HTML_end_element,
 760:    HTML_put_entity
 761: }; 
1.1 timbl 762: 
1.4 timbl 763: 
1.2 timbl 764: /*       New Structured Text object
 765: **       --------------------------
 766: **
1.16 timbl 767: **   The structured stream can generate either presentation,
1.4 timbl 768: **   or plain text, or HTML.
1.1 timbl 769: */
1.16 timbl 770: PUBLIC HTStructured* HTML_new ARGS5(
 771:    HTRequest *,      request,
 772:    void *,         param,
 773:    HTFormat,        input_format,
 774:    HTFormat,        output_format,
 775:    HTStream *,       output_stream)
1.1 timbl 776: {
 777: 
1.4 timbl 778:   HTStructured * me;
 779:   
1.16 timbl 780:   if (output_format != WWW_PLAINTEXT
 781:    && output_format != WWW_PRESENT
 782:    && output_format != HTAtom_for("text/x-c")) {
 783:     HTStream * intermediate = HTStreamStack(WWW_HTML, request);
1.6 timbl 784:    if (intermediate) return HTMLGenerator(intermediate);
1.4 timbl 785:     fprintf(stderr, "** Internal error: can't parse HTML to %s\n",
1.16 timbl 786:            HTAtom_name(output_format));
1.4 timbl 787:    exit (-99);
 788:   }
 789: 
 790:   me = (HTStructured*) malloc(sizeof(*me));
 791:   if (me == NULL) outofmem(__FILE__, "HTML_new");
1.1 timbl 792: 
 793:   if (!got_styles) get_styles();
 794: 
1.4 timbl 795:   me->isa = &HTMLPresentation;
1.16 timbl 796:   me->dtd = &DTD;
 797:   me->node_anchor = request->anchor;
1.4 timbl 798:   me->title.size = 0;
 799:   me->title.growby = 128;
 800:   me->title.allocated = 0;
 801:   me->title.data = 0;
 802:   me->text = 0;
 803:   me->style_change = YES; /* Force check leading to text creation */
 804:   me->new_style = default_style;
 805:   me->old_style = 0;
 806:   me->sp = me->stack + MAX_NESTING - 1;
 807:   me->sp->tag_number = -1;              /* INVALID */
 808:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 809:   
1.4 timbl 810:   me->comment_start = NULL;
 811:   me->comment_end = NULL;
1.16 timbl 812:   me->target = output_stream;
 813:   if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 814:   
1.4 timbl 815:   return (HTStructured*) me;
1.1 timbl 816: }
 817: 
 818: 
1.2 timbl 819: /*   HTConverter for HTML to plain text
 820: **   ----------------------------------
1.1 timbl 821: **
1.2 timbl 822: **   This will convert from HTML to presentation or plain text.
1.1 timbl 823: */
1.16 timbl 824: PUBLIC HTStream* HTMLToPlain ARGS5(
 825:    HTRequest *,      request,
 826:    void *,         param,
 827:    HTFormat,        input_format,
 828:    HTFormat,        output_format,
 829:    HTStream *,       output_stream)
1.1 timbl 830: {
1.16 timbl 831:   return SGML_new(&DTD, HTML_new(
 832:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 833: }
 834: 
 835: 
1.2 timbl 836: /*   HTConverter for HTML to C code
 837: **   ------------------------------
 838: **
 839: **   C copde is like plain text but all non-preformatted code
 840: **   is commented out.
 841: **   This will convert from HTML to presentation or plain text.
 842: */
1.16 timbl 843: PUBLIC HTStream* HTMLToC ARGS5(
 844:    HTRequest *,      request,
 845:    void *,         param,
 846:    HTFormat,        input_format,
 847:    HTFormat,        output_format,
 848:    HTStream *,       output_stream)
1.1 timbl 849: {
1.4 timbl 850:   
 851:   HTStructured * html;
 852:   
1.16 timbl 853:   (*output_stream->isa->put_string)(output_stream, "/* "); /* Before even title */
 854:   html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.2 timbl 855:   html->comment_start = "/* ";
1.16 timbl 856:   html->dtd = &DTD;
1.2 timbl 857:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.4 timbl 858: /*  HTML_put_string(html,html->comment_start); */
1.16 timbl 859:   return SGML_new(&DTD, html);
1.1 timbl 860: }
 861: 
 862: 
1.2 timbl 863: /*   Presenter for HTML
 864: **   ------------------
 865: **
 866: **   This will convert from HTML to presentation or plain text.
 867: **
 868: **   Override this if you have a windows version
1.1 timbl 869: */
1.2 timbl 870: #ifndef GUI
1.16 timbl 871: PUBLIC HTStream* HTMLPresent ARGS5(
 872:    HTRequest *,      request,
 873:    void *,         param,
 874:    HTFormat,        input_format,
 875:    HTFormat,        output_format,
 876:    HTStream *,       output_stream)
1.1 timbl 877: {
1.16 timbl 878:   return SGML_new(&DTD, HTML_new(
 879:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 880: }
1.2 timbl 881: #endif
1.1 timbl 882: 
 883: 
1.2 timbl 884: /*   Record error message as a hypertext object
 885: **   ------------------------------------------
 886: **
 887: **   The error message should be marked as an error so that
 888: **   it can be reloaded later.
 889: **   This implementation just throws up an error message
 890: **   and leaves the document unloaded.
1.9 timbl 891: **   A smarter implementation would load an error document,
 892: **   marking at such so that it is retried on reload.
1.1 timbl 893: **
1.2 timbl 894: ** On entry,
 895: **   sink  is a stream to the output device if any
 896: **   number is the HTTP error number
 897: **   message is the human readable message.
1.9 timbl 898: **
 899: ** On exit,
 900: **   returns a negative number to indicate lack of success in the load.
1.1 timbl 901: */
1.2 timbl 902: 
 903: PUBLIC int HTLoadError ARGS3(
1.17 ! luotonen 904:    HTRequest *,  req,
1.2 timbl 905:    int,      number,
 906:    CONST char *,  message)
 907: {
 908:   HTAlert(message);     /* @@@@@@@@@@@@@@@@@@@ */
 909:   return -number;
 910: } 
 911: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /