[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.46

1.39 frystyk 1: /*                                   HTML.c
 2: **   STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
 3: **
1.43 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This generates of a hypertext object. It converts from the
 8: **   structured stream interface fro HTMl events into the style-
 9: **   oriented iunterface of the HText.h interface. This module is
 10: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: **   Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
 15: **   8 Jul 94 FM  Insulate free() from _free structure element.
 16: **
1.1 timbl 17: */
1.16 timbl 18: 
1.41 frystyk 19: /* Library include files */
 20: #include "tcp.h"
 21: #include "HTUtils.h"
 22: #include "HTString.h"
1.1 timbl 23: #include "HTAtom.h"
 24: #include "HTChunk.h"
 25: #include "HText.h"
 26: #include "HTStyle.h"
1.3 timbl 27: #include "HTAlert.h"
1.4 timbl 28: #include "HTMLGen.h"
1.8 timbl 29: #include "HTParse.h"
1.41 frystyk 30: #include "HTML.h"
1.1 timbl 31: 
 32: extern HTStyleSheet * styleSheet;   /* Application-wide */
 33: 
 34: /*   Module-wide style cache
 35: */
 36: PRIVATE int      got_styles = 0;
1.16 timbl 37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 38: PRIVATE HTStyle *default_style;
1.1 timbl 39: 
 40: 
 41: /*       HTML Object
 42: **       -----------
 43: */
1.2 timbl 44: #define MAX_NESTING 20     /* Should be checked by parser */
 45: 
 46: typedef struct _stack_element {
 47:     HTStyle *   style;
 48:    int       tag_number;
 49: } stack_element;
 50: 
 51: struct _HTStructured {
 52:   CONST HTStructuredClass * isa;
 53:   HTParentAnchor *      node_anchor;
 54:   HText *          text;
 55: 
 56:   HTStream*         target;         /* Output stream */
 57:   HTStreamClass       targetClass;      /* Output routines */
 58: 
 59:   HTChunk          title;     /* Grow by 128 */
 60:   
 61:   char *           comment_start; /* for literate programming */
 62:   char *           comment_end;
1.16 timbl 63:   
 64:   CONST SGML_dtd*      dtd;
 65:   
1.2 timbl 66:   HTTag *          current_tag;
 67:   BOOL            style_change;
 68:   HTStyle *         new_style;
 69:   HTStyle *         old_style;
 70:   BOOL            in_word; /* Have just had a non-white char */
1.44 frystyk 71: 
 72:   stack_element       stack[MAX_NESTING];
 73:   stack_element       *sp;         /* Style stack pointer */
 74:   int                overflow; /* Keep track of overflow nesting */
1.1 timbl 75: };
 76: 
1.2 timbl 77: struct _HTStream {
 78:   CONST HTStreamClass *   isa;
 79:   /* .... */
 80: };
1.1 timbl 81: 
 82: /*       Forward declarations of routines
 83: */
 84: PRIVATE void get_styles NOPARAMS;
 85: 
 86: 
1.4 timbl 87: PRIVATE void actually_set_style PARAMS((HTStructured * me));
1.11 timbl 88: PRIVATE void change_paragraph_style PARAMS((HTStructured * me, HTStyle * style));
1.1 timbl 89: 
 90: /*   Style buffering avoids dummy paragraph begin/ends.
 91: */
1.4 timbl 92: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 93: 
 94: 
1.2 timbl 95: #ifdef OLD_CODE
1.1 timbl 96: /* The following accented characters are from peter Flynn, curia project */
 97: 
 98: /* these ifdefs don't solve the problem of a simple terminal emulator
 99: ** with a different character set to the client machine. But nothing does,
 100: ** except looking at the TERM setting */
 101: 
1.2 timbl 102: 
1.1 timbl 103:     { "ocus" , "&" },    /* for CURIA */
 104: #ifdef IBMPC
 105:     { "aacute" , "240円" }, /* For PC display */
 106:     { "eacute" , "202円" },
 107:     { "iacute" , "241円" },
 108:     { "oacute" , "242円" },
 109:     { "uacute" , "243円" },
 110:     { "Aacute" , "101円" },
 111:     { "Eacute" , "220円" },
 112:     { "Iacute" , "111円" },
 113:     { "Oacute" , "117円" },
 114:     { "Uacute" , "125円" },
 115: #else
 116:     { "aacute" , "341円" }, /* Works for openwindows -- Peter Flynn */
 117:     { "eacute" , "351円" },
 118:     { "iacute" , "355円" },
 119:     { "oacute" , "363円" },
 120:     { "uacute" , "372円" },
 121:     { "Aacute" , "301円" },
 122:     { "Eacute" , "310円" },
 123:     { "Iacute" , "315円" },
 124:     { "Oacute" , "323円" },
 125:     { "Uacute" , "332円" }, 
 126: #endif
 127:    { 0,  0 } /* Terminate list */
 128: };
1.2 timbl 129: #endif
1.1 timbl 130: 
 131: 
1.2 timbl 132: /*   Entity values -- for ISO Latin 1 local representation
 133: **
 134: **   This MUST match exactly the table referred to in the DTD!
 135: */
 136: static char * ISO_Latin1[] = {
 137:    "306円", /* capital AE diphthong (ligature) */ 
 138:    "301円", /* capital A, acute accent */ 
 139:    "302円", /* capital A, circumflex accent */ 
 140:    "300円", /* capital A, grave accent */ 
 141:    "305円", /* capital A, ring */ 
 142:    "303円", /* capital A, tilde */ 
 143:    "304円", /* capital A, dieresis or umlaut mark */ 
 144:    "307円", /* capital C, cedilla */ 
 145:    "320円", /* capital Eth, Icelandic */ 
 146:    "311円", /* capital E, acute accent */ 
 147:    "312円", /* capital E, circumflex accent */ 
 148:    "310円", /* capital E, grave accent */ 
 149:    "313円", /* capital E, dieresis or umlaut mark */ 
 150:    "315円", /* capital I, acute accent */ 
 151:    "316円", /* capital I, circumflex accent */ 
 152:    "314円", /* capital I, grave accent */ 
 153:    "317円", /* capital I, dieresis or umlaut mark */ 
 154:    "321円", /* capital N, tilde */ 
 155:    "323円", /* capital O, acute accent */ 
 156:    "324円", /* capital O, circumflex accent */ 
 157:    "322円", /* capital O, grave accent */ 
 158:    "330円", /* capital O, slash */ 
 159:    "325円", /* capital O, tilde */ 
 160:    "326円", /* capital O, dieresis or umlaut mark */ 
 161:    "336円", /* capital THORN, Icelandic */ 
 162:    "332円", /* capital U, acute accent */ 
 163:    "333円", /* capital U, circumflex accent */ 
 164:    "331円", /* capital U, grave accent */ 
 165:    "334円", /* capital U, dieresis or umlaut mark */ 
 166:    "335円", /* capital Y, acute accent */ 
 167:    "341円", /* small a, acute accent */ 
 168:    "342円", /* small a, circumflex accent */ 
 169:    "346円", /* small ae diphthong (ligature) */ 
 170:    "340円", /* small a, grave accent */ 
 171:    "046円", /* ampersand */ 
 172:    "345円", /* small a, ring */ 
 173:    "343円", /* small a, tilde */ 
 174:    "344円", /* small a, dieresis or umlaut mark */ 
 175:    "347円", /* small c, cedilla */ 
 176:    "351円", /* small e, acute accent */ 
 177:    "352円", /* small e, circumflex accent */ 
 178:    "350円", /* small e, grave accent */ 
 179:    "360円", /* small eth, Icelandic */ 
 180:    "353円", /* small e, dieresis or umlaut mark */ 
 181:    "076円", /* greater than */ 
 182:    "355円", /* small i, acute accent */ 
 183:    "356円", /* small i, circumflex accent */ 
 184:    "354円", /* small i, grave accent */ 
 185:    "357円", /* small i, dieresis or umlaut mark */ 
 186:    "074円", /* less than */ 
 187:    "361円", /* small n, tilde */ 
 188:    "363円", /* small o, acute accent */ 
 189:    "364円", /* small o, circumflex accent */ 
 190:    "362円", /* small o, grave accent */ 
 191:    "370円", /* small o, slash */ 
 192:    "365円", /* small o, tilde */ 
 193:    "366円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 194:     "042円", /* double quote sign - June 94 */
1.2 timbl 195:    "337円", /* small sharp s, German (sz ligature) */ 
 196:    "376円", /* small thorn, Icelandic */ 
 197:    "372円", /* small u, acute accent */ 
 198:    "373円", /* small u, circumflex accent */ 
 199:    "371円", /* small u, grave accent */ 
 200:    "374円", /* small u, dieresis or umlaut mark */ 
 201:    "375円", /* small y, acute accent */ 
 202:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 203: };
 204: 
1.2 timbl 205: 
 206: /*   Entity values -- for NeXT local representation
 207: **
 208: **   This MUST match exactly the table referred to in the DTD!
 209: **
 210: */
 211: static char * NeXTCharacters[] = {
 212:    "341円", /* capital AE diphthong (ligature)   */ 
 213:    "202円", /* capital A, acute accent       */ 
 214:    "203円", /* capital A, circumflex accent     */ 
 215:    "201円", /* capital A, grave accent       */ 
 216:    "206円", /* capital A, ring           */ 
 217:    "204円", /* capital A, tilde           */ 
 218:    "205円", /* capital A, dieresis or umlaut mark  */ 
 219:    "207円", /* capital C, cedilla          */ 
 220:    "220円", /* capital Eth, Icelandic        */ 
 221:    "211円", /* capital E, acute accent               */ 
 222:    "212円", /* capital E, circumflex accent             */ 
 223:    "210円", /* capital E, grave accent               */ 
 224:    "213円", /* capital E, dieresis or umlaut mark          */ 
 225:    "215円", /* capital I, acute accent               */ 
 226:    "216円", /* capital I, circumflex accent     these are    */ 
 227:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 228:    "217円", /* capital I, dieresis or umlaut mark          */ 
 229:    "221円", /* capital N, tilde                   */ 
 230:    "223円", /* capital O, acute accent               */ 
 231:    "224円", /* capital O, circumflex accent             */ 
 232:    "222円", /* capital O, grave accent               */ 
 233:    "351円", /* capital O, slash       'cept this */ 
 234:    "225円", /* capital O, tilde                   */ 
 235:    "226円", /* capital O, dieresis or umlaut mark          */ 
 236:    "234円", /* capital THORN, Icelandic */ 
 237:    "230円", /* capital U, acute accent */ 
 238:    "231円", /* capital U, circumflex accent */ 
 239:    "227円", /* capital U, grave accent */ 
 240:    "232円", /* capital U, dieresis or umlaut mark */ 
 241:    "233円", /* capital Y, acute accent */ 
 242:    "326円", /* small a, acute accent */ 
 243:    "327円", /* small a, circumflex accent */ 
 244:    "361円", /* small ae diphthong (ligature) */ 
 245:    "325円", /* small a, grave accent */ 
 246:    "046円", /* ampersand */ 
 247:    "332円", /* small a, ring */ 
 248:    "330円", /* small a, tilde */ 
 249:    "331円", /* small a, dieresis or umlaut mark */ 
 250:    "333円", /* small c, cedilla */ 
 251:    "335円", /* small e, acute accent */ 
 252:    "336円", /* small e, circumflex accent */ 
 253:    "334円", /* small e, grave accent */ 
 254:    "346円", /* small eth, Icelandic     */ 
 255:    "337円", /* small e, dieresis or umlaut mark */ 
 256:    "076円", /* greater than */ 
 257:    "342円", /* small i, acute accent */ 
 258:    "344円", /* small i, circumflex accent */ 
 259:    "340円", /* small i, grave accent */ 
 260:    "345円", /* small i, dieresis or umlaut mark */ 
 261:    "074円", /* less than */ 
 262:    "347円", /* small n, tilde */ 
 263:    "355円", /* small o, acute accent */ 
 264:    "356円", /* small o, circumflex accent */ 
 265:    "354円", /* small o, grave accent */ 
 266:    "371円", /* small o, slash */ 
 267:    "357円", /* small o, tilde */ 
 268:    "360円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 269:     "042円", /* double quote sign - June 94 */
1.2 timbl 270:    "373円", /* small sharp s, German (sz ligature) */ 
 271:    "374円", /* small thorn, Icelandic */ 
 272:    "363円", /* small u, acute accent */ 
 273:    "364円", /* small u, circumflex accent */ 
 274:    "362円", /* small u, grave accent */ 
 275:    "366円", /* small u, dieresis or umlaut mark */ 
 276:    "367円", /* small y, acute accent */ 
 277:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 278: };
 279: 
1.2 timbl 280: /*   Entity values -- for IBM/PC Code Page 850 (International)
 281: **
 282: **   This MUST match exactly the table referred to in the DTD!
 283: **
 284: */
 285: /* @@@@@@@@@@@@@@@@@ TBD */
 286: 
 287: 
 288: 
 289: /*       Set character set
 290: **       ----------------
 291: */
 292: 
 293: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 294: 
1.2 timbl 295: PUBLIC void HTMLUseCharacterSet ARGS1(HTMLCharacterSet, i)
 296: {
 297:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 298:                       : ISO_Latin1;
 299: }
1.1 timbl 300: 
 301: 
 302: /*       Flattening the style structure
 303: **       ------------------------------
 304: **
 305: On the NeXT, and on any read-only browser, it is simpler for the text to have
 306: a sequence of styles, rather than a nested tree of styles. In this
 307: case we have to flatten the structure as it arrives from SGML tags into
 308: a sequence of styles.
 309: */
 310: 
 311: /*       If style really needs to be set, call this
 312: */
1.4 timbl 313: PRIVATE void actually_set_style ARGS1(HTStructured *, me)
1.1 timbl 314: {
1.4 timbl 315:   if (!me->text) {          /* First time through */
 316:      me->text = HText_new2(me->node_anchor, me->target);
 317:      HText_beginAppend(me->text);
 318:      HText_setStyle(me->text, me->new_style);
 319:      me->in_word = NO;
1.1 timbl 320:   } else {
1.4 timbl 321:      HText_setStyle(me->text, me->new_style);
1.1 timbl 322:   }
1.4 timbl 323:   me->old_style = me->new_style;
 324:   me->style_change = NO;
1.1 timbl 325: }
 326: 
 327: /*   If you THINK you need to change style, call this
 328: */
 329: 
1.11 timbl 330: PRIVATE void change_paragraph_style ARGS2(HTStructured *, me, HTStyle *,style)
1.1 timbl 331: {
1.4 timbl 332:   if (me->new_style!=style) {
 333:    me->style_change = YES;
 334:    me->new_style = style;
1.1 timbl 335:   }
1.11 timbl 336:   me->in_word = NO;
1.1 timbl 337: }
 338: 
1.2 timbl 339: /*_________________________________________________________________________
 340: **
 341: **           A C T I O N   R O U T I N E S
 342: */
 343: 
 344: /*   Character handling
 345: **   ------------------
1.1 timbl 346: */
1.42 frystyk 347: PRIVATE int HTML_put_character ARGS2(HTStructured *, me, char, c)
1.1 timbl 348: {
1.2 timbl 349: 
1.4 timbl 350:   switch (me->sp[0].tag_number) {
1.2 timbl 351:   case HTML_COMMENT:
 352:    break;                 /* Do Nothing */
 353:    
 354:   case HTML_TITLE:  
1.4 timbl 355:    HTChunkPutc(&me->title, c);
1.2 timbl 356:    break;
 357: 
 358:    
 359:   case HTML_LISTING:             /* Litteral text */
 360:   case HTML_XMP:
 361:   case HTML_PLAINTEXT:
 362:   case HTML_PRE:
 363: /*   We guarrantee that the style is up-to-date in begin_litteral
 364: */
1.4 timbl 365:    HText_appendCharacter(me->text, c);
1.2 timbl 366:    break;
 367:    
 368:   default:                  /* Free format text */
1.4 timbl 369:    if (me->style_change) {
1.42 frystyk 370:      if ((c=='\n') || (c==' ')) return HT_OK;  /* Ignore it */
1.2 timbl 371:      UPDATE_STYLE;
 372:    }
 373:    if (c=='\n') {
1.4 timbl 374:      if (me->in_word) {
 375:        HText_appendCharacter(me->text, ' ');
 376:        me->in_word = NO;
1.2 timbl 377:      }
 378:    } else {
1.4 timbl 379:      HText_appendCharacter(me->text, c);
 380:      me->in_word = YES;
1.2 timbl 381:    }
 382:   } /* end switch */
1.42 frystyk 383:   return HT_OK;
1.1 timbl 384: }
 385: 
1.2 timbl 386: 
 387: 
 388: /*   String handling
 389: **   ---------------
 390: **
 391: **   This is written separately from put_character becuase the loop can
1.11 timbl 392: **   in some cases be promoted to a higher function call level for speed.
1.2 timbl 393: */
1.42 frystyk 394: PRIVATE int HTML_put_string ARGS2(HTStructured *, me, CONST char*, s)
1.1 timbl 395: {
1.2 timbl 396: 
1.4 timbl 397:   switch (me->sp[0].tag_number) {
1.2 timbl 398:   case HTML_COMMENT:
 399:    break;                 /* Do Nothing */
 400:    
 401:   case HTML_TITLE:  
1.4 timbl 402:    HTChunkPuts(&me->title, s);
1.2 timbl 403:    break;
 404: 
 405:    
 406:   case HTML_LISTING:             /* Litteral text */
 407:   case HTML_XMP:
 408:   case HTML_PLAINTEXT:
 409:   case HTML_PRE:
 410: 
 411: /*   We guarrantee that the style is up-to-date in begin_litteral
 412: */
1.4 timbl 413:    HText_appendText(me->text, s);
1.2 timbl 414:    break;
 415:    
 416:   default:                  /* Free format text */
 417:     {
 418:      CONST char *p = s;
1.4 timbl 419:      if (me->style_change) {
1.2 timbl 420:        for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 421:        if (!*p) return HT_OK;
1.2 timbl 422:        UPDATE_STYLE;
 423:      }
 424:      for(; *p; p++) {
1.4 timbl 425:        if (me->style_change) {
1.2 timbl 426:          if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
 427:          UPDATE_STYLE;
 428:        }
 429:        if (*p=='\n') {
1.4 timbl 430:          if (me->in_word) {
 431:            HText_appendCharacter(me->text, ' ');
 432:            me->in_word = NO;
1.2 timbl 433:          }
 434:        } else {
1.4 timbl 435:          HText_appendCharacter(me->text, *p);
 436:          me->in_word = YES;
1.2 timbl 437:        }
 438:      } /* for */
 439:    }
 440:   } /* end switch */
1.42 frystyk 441:   return HT_OK;
1.1 timbl 442: }
 443: 
 444: 
1.2 timbl 445: /*   Buffer write
1.3 timbl 446: **   ------------
1.1 timbl 447: */
1.42 frystyk 448: PRIVATE int HTML_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
1.1 timbl 449: {
1.38 frystyk 450:   while (l-- > 0)
 451:    HTML_put_character(me, *s++);
1.42 frystyk 452:   return HT_OK;
1.1 timbl 453: }
1.2 timbl 454: 
 455: 
 456: /*   Start Element
 457: **   -------------
 458: */
 459: PRIVATE void HTML_start_element ARGS4(
1.4 timbl 460:    HTStructured *,     me,
1.16 timbl 461:    int,          element_number,
1.3 timbl 462:    CONST BOOL*,      present,
1.16 timbl 463:    CONST char **,     value)
1.2 timbl 464: {
 465:   switch (element_number) {
 466:   case HTML_A:
 467:    {
1.8 timbl 468:      HTChildAnchor * source;
1.9 timbl 469:      char * href = NULL;
1.42 frystyk 470:      if (present[HTML_A_HREF])
1.9 timbl 471:        StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 472:      source = HTAnchor_findChildAndLink(
1.4 timbl 473:        me->node_anchor,                /* parent */
1.2 timbl 474:        present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 475:        present[HTML_A_HREF] ? href : 0,        /* Addresss */
1.16 timbl 476:        present[HTML_A_REL] && value[HTML_A_REL] ? 
 477:            (HTLinkType*)HTAtom_for(value[HTML_A_REL])
1.2 timbl 478:                        : 0);
 479:      
 480:      if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 481:        HTParentAnchor * dest = 
 482:          HTAnchor_parent(
 483:            HTAnchor_followMainLink((HTAnchor*)source)
 484:                  );
 485:        if (!HTAnchor_title(dest))
 486:            HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
 487:      }
 488:      UPDATE_STYLE;
1.4 timbl 489:      HText_beginAnchor(me->text, source);
1.42 frystyk 490:      FREE(href);             /* Leak fix Henrik 17/02-94 */
1.2 timbl 491:    }
 492:    break;
 493:    
 494:   case HTML_TITLE:
1.4 timbl 495:     HTChunkClear(&me->title);
1.2 timbl 496:    break;
 497:    
 498:   case HTML_NEXTID:
 499:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 500:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 501:    break;
 502:    
 503:   case HTML_ISINDEX:
1.4 timbl 504:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 505:    break;
 506:    
1.15 timbl 507:   case HTML_BR: 
 508:    UPDATE_STYLE;
 509:    HText_appendCharacter(me->text, '\n');
 510:    me->in_word = NO;
 511:    break;
 512:    
 513:   case HTML_HR: 
 514:    UPDATE_STYLE;
 515:    HText_appendCharacter(me->text, '\n');
1.16 timbl 516:    HText_appendText(me->text, "___________________________________");
1.15 timbl 517:    HText_appendCharacter(me->text, '\n');
 518:    me->in_word = NO;
 519:    break;
 520:    
1.2 timbl 521:   case HTML_P:
 522:    UPDATE_STYLE;
1.4 timbl 523:    HText_appendParagraph(me->text);
 524:    me->in_word = NO;
1.2 timbl 525:    break;
 526: 
 527:   case HTML_DL:
1.11 timbl 528:     change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 529:        ? styles[HTML_DL]
1.2 timbl 530:        : styles[HTML_DL]);
 531:    break;
 532:    
 533:   case HTML_DT:
1.4 timbl 534:     if (!me->style_change) {
 535:      HText_appendParagraph(me->text);
 536:      me->in_word = NO;
1.2 timbl 537:    }
 538:    break;
 539:    
 540:   case HTML_DD:
 541:     UPDATE_STYLE;
1.4 timbl 542:    HTML_put_character(me, '\t');  /* Just tab out one stop */
 543:    me->in_word = NO;
 544:    break;
1.2 timbl 545: 
 546:   case HTML_UL:
 547:   case HTML_OL:
 548:   case HTML_MENU:
 549:   case HTML_DIR:
1.11 timbl 550:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 551:    break;
 552:    
 553:   case HTML_LI:
 554:     UPDATE_STYLE;
1.7 timbl 555:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 556:      HText_appendParagraph(me->text);
1.2 timbl 557:    else
1.4 timbl 558:      HText_appendCharacter(me->text, '\t');   /* Tab @@ nl for UL? */
 559:    me->in_word = NO;
1.2 timbl 560:    break;
 561:    
 562:   case HTML_LISTING:             /* Litteral text */
 563:   case HTML_XMP:
 564:   case HTML_PLAINTEXT:
 565:   case HTML_PRE:
1.11 timbl 566:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 567:    UPDATE_STYLE;
1.4 timbl 568:    if (me->comment_end)
 569:      HText_appendText(me->text, me->comment_end);
1.2 timbl 570:    break;
1.11 timbl 571: 
1.23 frystyk 572:   case HTML_IMG:           /* Images */
 573:    {
 574:      HTChildAnchor *source;
 575:      char *src = NULL;
 576:      if (present[HTML_IMG_SRC]) {
 577:        StrAllocCopy(src, value[HTML_IMG_SRC]);
1.36 frystyk 578: #ifdef OLD_CODE
1.23 frystyk 579:        HTSimplify(src);
1.36 frystyk 580: #endif
1.23 frystyk 581:      }
 582:      source = HTAnchor_findChildAndLink(
 583:                        me->node_anchor,  /* parent */
 584:                        0,           /* Tag */
 585:                        src ? src : 0,  /* Addresss */
 586:                        0);
 587:      UPDATE_STYLE;
 588:      HText_appendImage(me->text, source,
1.24 frystyk 589:           present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
 590:           present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
 591:           present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 592:      free(src);
1.24 frystyk 593:    }    
 594:    break;
 595: 
 596:   case HTML_HTML:          /* Ignore these altogether */
 597:   case HTML_HEAD:
 598:   case HTML_BODY:
 599:   
1.10 timbl 600:   case HTML_TT:           /* Physical character highlighting */
 601:   case HTML_B:            /* Currently ignored */
 602:   case HTML_I:
 603:   case HTML_U:
 604:   
 605:   case HTML_EM:           /* Logical character highlighting */
 606:   case HTML_STRONG:         /* Currently ignored */
 607:   case HTML_CODE:
 608:   case HTML_SAMP:
 609:   case HTML_KBD:
 610:   case HTML_VAR:
 611:   case HTML_DFN:
 612:   case HTML_CITE:
 613:    break;
 614:    
1.11 timbl 615:   case HTML_H1:           /* paragraph styles */
 616:   case HTML_H2:
 617:   case HTML_H3:
 618:   case HTML_H4:
 619:   case HTML_H5:
 620:   case HTML_H6:
 621:   case HTML_H7:
 622:   case HTML_ADDRESS:
 623:   case HTML_BLOCKQUOTE:
 624:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 625:    break;
 626: 
 627:   } /* end switch */
 628: 
1.16 timbl 629:   if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 630:     if (me->sp == me->stack) {
1.44 frystyk 631:      if (SGML_TRACE)
 632:        fprintf(TDEST, "HTML........ Maximum nesting of %d exceded!\n",
 633:            MAX_NESTING); 
 634:      me->overflow++;
1.12 timbl 635:      return;
 636:    }
1.4 timbl 637:    --(me->sp);
 638:    me->sp[0].style = me->new_style;    /* Stack new style */
 639:    me->sp[0].tag_number = element_number;
1.10 timbl 640:   } 
1.1 timbl 641: }
1.10 timbl 642: 
1.2 timbl 643: 
1.1 timbl 644: /*       End Element
1.2 timbl 645: **       -----------
1.1 timbl 646: **
1.2 timbl 647: */
 648: /*   When we end an element, the style must be returned to that
1.1 timbl 649: **   in effect before that element. Note that anchors (etc?)
 650: **   don't have an associated style, so that we must scan down the
 651: **   stack for an element with a defined style. (In fact, the styles
 652: **   should be linked to the whole stack not just the top one.)
 653: **   TBL 921119
1.6 timbl 654: **
 655: **   We don't turn on "CAREFUL" check because the parser produces
 656: **   (internal code errors apart) good nesting. The parser checks
 657: **   incoming code errors, not this module.
1.1 timbl 658: */
1.4 timbl 659: PRIVATE void HTML_end_element ARGS2(HTStructured *, me, int , element_number)
1.1 timbl 660: {
1.2 timbl 661: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 662:   if (element_number != me->sp[0].tag_number) {
1.41 frystyk 663:     fprintf(TDEST, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 664:        me->dtd->tags[element_number].name,
 665:        me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 666:        /* panic */
1.1 timbl 667:   }
1.2 timbl 668: #endif
1.44 frystyk 669: 
 670:   /* HFN, If overflow of nestings, we need to get back to reality */
 671:   if (me->overflow > 0) {
 672:    me->overflow--;
 673:    return;
 674:   }
 675: 
1.4 timbl 676:   me->sp++;             /* Pop state off stack */
1.44 frystyk 677: 
1.2 timbl 678:   switch(element_number) {
 679: 
 680:   case HTML_A:
 681:    UPDATE_STYLE;
1.4 timbl 682:    HText_endAnchor(me->text);
1.2 timbl 683:    break;
 684: 
 685:   case HTML_TITLE:
1.4 timbl 686:     HTChunkTerminate(&me->title);
 687:    HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 688:    break;
 689:    
 690:   case HTML_LISTING:             /* Litteral text */
 691:   case HTML_XMP:
 692:   case HTML_PLAINTEXT:
 693:   case HTML_PRE:
1.4 timbl 694:    if (me->comment_start)
 695:      HText_appendText(me->text, me->comment_start);
1.2 timbl 696:    /* Fall through */
 697:    
 698:   default:
1.44 frystyk 699: 
 700:    /* Often won't really change */
 701:    change_paragraph_style(me, me->sp->style);
1.2 timbl 702:    break;
 703:    
 704:   } /* switch */
1.1 timbl 705: }
 706: 
1.2 timbl 707: 
 708: /*       Expanding entities
 709: **       ------------------
 710: */
 711: /*   (In fact, they all shrink!)
1.1 timbl 712: */
1.2 timbl 713: 
1.4 timbl 714: PRIVATE void HTML_put_entity ARGS2(HTStructured *, me, int, entity_number)
1.1 timbl 715: {
1.4 timbl 716:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 717: }
1.2 timbl 718: 
1.42 frystyk 719: /*   Flush an HTML object
 720: **   --------------------
 721: */
 722: PUBLIC int HTML_flush ARGS1(HTStructured *, me)
 723: {
 724:   UPDATE_STYLE;              /* Creates empty document here! */
 725:   if (me->comment_end)
 726:        HTML_put_string(me,me->comment_end);
 727:   HText_endAppend(me->text);
 728:   return (*me->targetClass.flush)(me->target);
 729: }
1.2 timbl 730: 
 731: /*   Free an HTML object
 732: **   -------------------
 733: **
1.4 timbl 734: ** If the document is empty, the text object will not yet exist.
 735:  So we could in fact abandon creating the document and return
 736:  an error code. In fact an empty document is an important type
 737:  of document, so we don't.
 738: **
 739: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 740: **   Otherwise, the interactive object is left.   
 741: */
1.37 frystyk 742: PUBLIC int HTML_free ARGS1(HTStructured *, me)
1.1 timbl 743: {
1.4 timbl 744:   UPDATE_STYLE;       /* Creates empty document here! */
 745:   if (me->comment_end)
 746:        HTML_put_string(me,me->comment_end);
 747:   HText_endAppend(me->text);
 748: 
 749:   if (me->target) {
1.35 duns 750:     (*me->targetClass._free)(me->target);
1.2 timbl 751:   }
1.19 frystyk 752:   HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 753:   free(me);
1.42 frystyk 754:   return HT_OK;
1.1 timbl 755: }
 756: 
 757: 
1.37 frystyk 758: PRIVATE int HTML_abort ARGS2(HTStructured *, me, HTError, e)
1.1 timbl 759: 
1.14 timbl 760: {
 761:   if (me->target) {
 762:     (*me->targetClass.abort)(me->target, e);
 763:   }
1.19 frystyk 764:   HTChunkClear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 765:   free(me);
1.42 frystyk 766:   return HT_ERROR;
1.1 timbl 767: }
 768: 
1.2 timbl 769: 
 770: /*   Get Styles from style sheet
 771: **   ---------------------------
 772: */
 773: PRIVATE void get_styles NOARGS
1.1 timbl 774: {
1.2 timbl 775:   got_styles = YES;
 776:   
 777:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 778: 
1.2 timbl 779:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 780:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 781:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 782:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 783:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 784:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 785:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 786: 
 787:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 788:   styles[HTML_UL] =
 789:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 790:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 791:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
1.16 timbl 792: /* styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 793:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 794:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 795:   styles[HTML_PLAINTEXT] =
 796:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 797:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 798:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 799: }
 800: /*               P U B L I C
 801: */
 802: 
 803: /*   Structured Object Class
 804: **   -----------------------
 805: */
 806: PUBLIC CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
 807: {       
 808:    "text/html",
1.42 frystyk 809:    HTML_flush,
1.2 timbl 810:    HTML_free,
1.14 timbl 811:    HTML_abort,
1.2 timbl 812:    HTML_put_character,   HTML_put_string, HTML_write,
 813:    HTML_start_element,   HTML_end_element,
 814:    HTML_put_entity
 815: }; 
1.1 timbl 816: 
1.4 timbl 817: 
1.2 timbl 818: /*       New Structured Text object
 819: **       --------------------------
 820: **
1.16 timbl 821: **   The structured stream can generate either presentation,
1.4 timbl 822: **   or plain text, or HTML.
1.1 timbl 823: */
1.16 timbl 824: PUBLIC HTStructured* HTML_new ARGS5(
 825:    HTRequest *,      request,
 826:    void *,         param,
 827:    HTFormat,        input_format,
 828:    HTFormat,        output_format,
 829:    HTStream *,       output_stream)
1.1 timbl 830: {
 831: 
1.4 timbl 832:   HTStructured * me;
 833:   
1.16 timbl 834:   if (output_format != WWW_PLAINTEXT
 835:    && output_format != WWW_PRESENT
 836:    && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 837:     HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
 838:                        output_stream, request, NO);
1.6 timbl 839:    if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 840:    if (SGML_TRACE)
 841:      fprintf(TDEST, "HTML........ Can't parse HTML to %s\n",
 842:          HTAtom_name(output_format));
1.4 timbl 843:    exit (-99);
 844:   }
 845: 
1.44 frystyk 846:   if ((me = (HTStructured*) calloc(1, sizeof(*me))) == NULL)
 847:    outofmem(__FILE__, "HTML_new");
1.1 timbl 848: 
 849:   if (!got_styles) get_styles();
 850: 
1.4 timbl 851:   me->isa = &HTMLPresentation;
1.16 timbl 852:   me->dtd = &DTD;
 853:   me->node_anchor = request->anchor;
1.4 timbl 854:   me->title.size = 0;
 855:   me->title.growby = 128;
 856:   me->title.allocated = 0;
 857:   me->title.data = 0;
 858:   me->text = 0;
 859:   me->style_change = YES; /* Force check leading to text creation */
 860:   me->new_style = default_style;
 861:   me->old_style = 0;
 862:   me->sp = me->stack + MAX_NESTING - 1;
 863:   me->sp->tag_number = -1;              /* INVALID */
 864:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 865:   
1.4 timbl 866:   me->comment_start = NULL;
 867:   me->comment_end = NULL;
1.16 timbl 868:   me->target = output_stream;
 869:   if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 870:   
1.4 timbl 871:   return (HTStructured*) me;
1.1 timbl 872: }
 873: 
 874: 
1.2 timbl 875: /*   HTConverter for HTML to plain text
 876: **   ----------------------------------
1.1 timbl 877: **
1.2 timbl 878: **   This will convert from HTML to presentation or plain text.
1.1 timbl 879: */
1.16 timbl 880: PUBLIC HTStream* HTMLToPlain ARGS5(
 881:    HTRequest *,      request,
 882:    void *,         param,
 883:    HTFormat,        input_format,
 884:    HTFormat,        output_format,
 885:    HTStream *,       output_stream)
1.1 timbl 886: {
1.16 timbl 887:   return SGML_new(&DTD, HTML_new(
 888:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 889: }
 890: 
 891: 
1.2 timbl 892: /*   HTConverter for HTML to C code
 893: **   ------------------------------
 894: **
1.36 frystyk 895: **   C code is like plain text but all non-preformatted code
1.2 timbl 896: **   is commented out.
 897: **   This will convert from HTML to presentation or plain text.
 898: */
1.16 timbl 899: PUBLIC HTStream* HTMLToC ARGS5(
 900:    HTRequest *,      request,
 901:    void *,         param,
 902:    HTFormat,        input_format,
 903:    HTFormat,        output_format,
 904:    HTStream *,       output_stream)
1.1 timbl 905: {
1.4 timbl 906:   
 907:   HTStructured * html;
 908:   
1.36 frystyk 909:   (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 910:   html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 911:   html->comment_start = "\n/* ";
1.16 timbl 912:   html->dtd = &DTD;
1.2 timbl 913:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.16 timbl 914:   return SGML_new(&DTD, html);
1.1 timbl 915: }
 916: 
 917: 
1.2 timbl 918: /*   Presenter for HTML
 919: **   ------------------
 920: **
 921: **   This will convert from HTML to presentation or plain text.
 922: **
 923: **   Override this if you have a windows version
1.1 timbl 924: */
1.2 timbl 925: #ifndef GUI
1.16 timbl 926: PUBLIC HTStream* HTMLPresent ARGS5(
 927:    HTRequest *,      request,
 928:    void *,         param,
 929:    HTFormat,        input_format,
 930:    HTFormat,        output_format,
 931:    HTStream *,       output_stream)
1.1 timbl 932: {
1.16 timbl 933:   return SGML_new(&DTD, HTML_new(
 934:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 935: }
1.2 timbl 936: #endif
1.1 timbl 937: 
 938: 
1.2 timbl 939: /*   Record error message as a hypertext object
 940: **   ------------------------------------------
 941: **
 942: **   The error message should be marked as an error so that
 943: **   it can be reloaded later.
 944: **   This implementation just throws up an error message
 945: **   and leaves the document unloaded.
1.9 timbl 946: **   A smarter implementation would load an error document,
 947: **   marking at such so that it is retried on reload.
1.1 timbl 948: **
1.2 timbl 949: ** On entry,
 950: **   sink  is a stream to the output device if any
 951: **   number is the HTTP error number
 952: **   message is the human readable message.
1.9 timbl 953: **
 954: ** On exit,
 955: **   returns a negative number to indicate lack of success in the load.
1.1 timbl 956: */
1.2 timbl 957: 
 958: PUBLIC int HTLoadError ARGS3(
1.17 luotonen 959:    HTRequest *,  req,
1.2 timbl 960:    int,      number,
 961:    CONST char *,  message)
 962: {
1.20 frystyk 963:   char *err = "Oh I screwed up!";      /* Dummy pointer not used (I hope) */
1.46 ! frystyk 964:   HTAlert(req, message);       /* @@@@@@@@@@@@@@@@@@@ */
1.20 frystyk 965:   /* Clean up! Henrik 04/03-94 */
 966:   if (req && req->output_stream)
 967:    (*req->output_stream->isa->abort)(req->output_stream, err);
1.33 frystyk 968: #if OLD_CODE
1.25 luotonen 969:   HTClearErrors(req);
1.33 frystyk 970: #endif
1.2 timbl 971:   return -number;
 972: } 
1.29 frystyk 973: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /