[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.55

1.39 frystyk 1: /*                                   HTML.c
 2: **   STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
 3: **
1.43 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.1 timbl 6: **
1.2 timbl 7: **   This generates of a hypertext object. It converts from the
 8: **   structured stream interface fro HTMl events into the style-
1.47 frystyk 9: **   oriented iunterface of the HText interface. This module is
1.2 timbl 10: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 11: **
1.6 timbl 12: **   Override this module if making a new GUI browser.
1.1 timbl 13: **
1.35 duns 14: ** HISTORY:
 15: **   8 Jul 94 FM  Insulate free() from _free structure element.
 16: **
1.1 timbl 17: */
1.16 timbl 18: 
1.41 frystyk 19: /* Library include files */
 20: #include "tcp.h"
 21: #include "HTUtils.h"
 22: #include "HTString.h"
1.1 timbl 23: #include "HTAtom.h"
 24: #include "HTChunk.h"
 25: #include "HText.h"
 26: #include "HTStyle.h"
1.3 timbl 27: #include "HTAlert.h"
1.4 timbl 28: #include "HTMLGen.h"
1.8 timbl 29: #include "HTParse.h"
1.41 frystyk 30: #include "HTML.h"
1.1 timbl 31: 
 32: extern HTStyleSheet * styleSheet;   /* Application-wide */
 33: 
 34: /*   Module-wide style cache
 35: */
 36: PRIVATE int      got_styles = 0;
1.16 timbl 37: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 38: PRIVATE HTStyle *default_style;
1.1 timbl 39: 
 40: 
 41: /*       HTML Object
 42: **       -----------
 43: */
1.2 timbl 44: #define MAX_NESTING 20     /* Should be checked by parser */
 45: 
 46: typedef struct _stack_element {
 47:     HTStyle *   style;
 48:    int       tag_number;
 49: } stack_element;
 50: 
 51: struct _HTStructured {
 52:   CONST HTStructuredClass * isa;
1.54 frystyk 53:   HTRequest *            request;
1.2 timbl 54:   HTParentAnchor *      node_anchor;
 55:   HText *          text;
 56: 
 57:   HTStream*         target;         /* Output stream */
 58:   HTStreamClass       targetClass;      /* Output routines */
 59: 
 60:   HTChunk          title;     /* Grow by 128 */
 61:   
 62:   char *           comment_start; /* for literate programming */
 63:   char *           comment_end;
1.16 timbl 64:   
 65:   CONST SGML_dtd*      dtd;
 66:   
1.2 timbl 67:   HTTag *          current_tag;
 68:   BOOL            style_change;
 69:   HTStyle *         new_style;
 70:   HTStyle *         old_style;
 71:   BOOL            in_word; /* Have just had a non-white char */
1.44 frystyk 72: 
 73:   stack_element       stack[MAX_NESTING];
 74:   stack_element       *sp;         /* Style stack pointer */
 75:   int                overflow; /* Keep track of overflow nesting */
1.1 timbl 76: };
 77: 
1.2 timbl 78: struct _HTStream {
 79:   CONST HTStreamClass *   isa;
 80:   /* .... */
 81: };
1.1 timbl 82: 
 83: /*       Forward declarations of routines
 84: */
1.52 frystyk 85: PRIVATE void get_styles (void);
1.1 timbl 86: 
 87: 
1.52 frystyk 88: PRIVATE void actually_set_style (HTStructured * me);
 89: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 90: 
 91: /*   Style buffering avoids dummy paragraph begin/ends.
 92: */
1.4 timbl 93: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 94: 
 95: 
1.2 timbl 96: #ifdef OLD_CODE
1.1 timbl 97: /* The following accented characters are from peter Flynn, curia project */
 98: 
 99: /* these ifdefs don't solve the problem of a simple terminal emulator
 100: ** with a different character set to the client machine. But nothing does,
 101: ** except looking at the TERM setting */
 102: 
1.2 timbl 103: 
1.1 timbl 104:     { "ocus" , "&" },    /* for CURIA */
 105: #ifdef IBMPC
 106:     { "aacute" , "240円" }, /* For PC display */
 107:     { "eacute" , "202円" },
 108:     { "iacute" , "241円" },
 109:     { "oacute" , "242円" },
 110:     { "uacute" , "243円" },
 111:     { "Aacute" , "101円" },
 112:     { "Eacute" , "220円" },
 113:     { "Iacute" , "111円" },
 114:     { "Oacute" , "117円" },
 115:     { "Uacute" , "125円" },
 116: #else
 117:     { "aacute" , "341円" }, /* Works for openwindows -- Peter Flynn */
 118:     { "eacute" , "351円" },
 119:     { "iacute" , "355円" },
 120:     { "oacute" , "363円" },
 121:     { "uacute" , "372円" },
 122:     { "Aacute" , "301円" },
 123:     { "Eacute" , "310円" },
 124:     { "Iacute" , "315円" },
 125:     { "Oacute" , "323円" },
 126:     { "Uacute" , "332円" }, 
 127: #endif
 128:    { 0,  0 } /* Terminate list */
 129: };
1.2 timbl 130: #endif
1.1 timbl 131: 
 132: 
1.2 timbl 133: /*   Entity values -- for ISO Latin 1 local representation
 134: **
 135: **   This MUST match exactly the table referred to in the DTD!
 136: */
 137: static char * ISO_Latin1[] = {
 138:    "306円", /* capital AE diphthong (ligature) */ 
 139:    "301円", /* capital A, acute accent */ 
 140:    "302円", /* capital A, circumflex accent */ 
 141:    "300円", /* capital A, grave accent */ 
 142:    "305円", /* capital A, ring */ 
 143:    "303円", /* capital A, tilde */ 
 144:    "304円", /* capital A, dieresis or umlaut mark */ 
 145:    "307円", /* capital C, cedilla */ 
 146:    "320円", /* capital Eth, Icelandic */ 
 147:    "311円", /* capital E, acute accent */ 
 148:    "312円", /* capital E, circumflex accent */ 
 149:    "310円", /* capital E, grave accent */ 
 150:    "313円", /* capital E, dieresis or umlaut mark */ 
 151:    "315円", /* capital I, acute accent */ 
 152:    "316円", /* capital I, circumflex accent */ 
 153:    "314円", /* capital I, grave accent */ 
 154:    "317円", /* capital I, dieresis or umlaut mark */ 
 155:    "321円", /* capital N, tilde */ 
 156:    "323円", /* capital O, acute accent */ 
 157:    "324円", /* capital O, circumflex accent */ 
 158:    "322円", /* capital O, grave accent */ 
 159:    "330円", /* capital O, slash */ 
 160:    "325円", /* capital O, tilde */ 
 161:    "326円", /* capital O, dieresis or umlaut mark */ 
 162:    "336円", /* capital THORN, Icelandic */ 
 163:    "332円", /* capital U, acute accent */ 
 164:    "333円", /* capital U, circumflex accent */ 
 165:    "331円", /* capital U, grave accent */ 
 166:    "334円", /* capital U, dieresis or umlaut mark */ 
 167:    "335円", /* capital Y, acute accent */ 
 168:    "341円", /* small a, acute accent */ 
 169:    "342円", /* small a, circumflex accent */ 
 170:    "346円", /* small ae diphthong (ligature) */ 
 171:    "340円", /* small a, grave accent */ 
 172:    "046円", /* ampersand */ 
 173:    "345円", /* small a, ring */ 
 174:    "343円", /* small a, tilde */ 
 175:    "344円", /* small a, dieresis or umlaut mark */ 
 176:    "347円", /* small c, cedilla */ 
 177:    "351円", /* small e, acute accent */ 
 178:    "352円", /* small e, circumflex accent */ 
 179:    "350円", /* small e, grave accent */ 
 180:    "360円", /* small eth, Icelandic */ 
 181:    "353円", /* small e, dieresis or umlaut mark */ 
 182:    "076円", /* greater than */ 
 183:    "355円", /* small i, acute accent */ 
 184:    "356円", /* small i, circumflex accent */ 
 185:    "354円", /* small i, grave accent */ 
 186:    "357円", /* small i, dieresis or umlaut mark */ 
 187:    "074円", /* less than */ 
 188:    "361円", /* small n, tilde */ 
 189:    "363円", /* small o, acute accent */ 
 190:    "364円", /* small o, circumflex accent */ 
 191:    "362円", /* small o, grave accent */ 
 192:    "370円", /* small o, slash */ 
 193:    "365円", /* small o, tilde */ 
 194:    "366円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 195:     "042円", /* double quote sign - June 94 */
1.2 timbl 196:    "337円", /* small sharp s, German (sz ligature) */ 
 197:    "376円", /* small thorn, Icelandic */ 
 198:    "372円", /* small u, acute accent */ 
 199:    "373円", /* small u, circumflex accent */ 
 200:    "371円", /* small u, grave accent */ 
 201:    "374円", /* small u, dieresis or umlaut mark */ 
 202:    "375円", /* small y, acute accent */ 
 203:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 204: };
 205: 
1.2 timbl 206: 
 207: /*   Entity values -- for NeXT local representation
 208: **
 209: **   This MUST match exactly the table referred to in the DTD!
 210: **
 211: */
 212: static char * NeXTCharacters[] = {
 213:    "341円", /* capital AE diphthong (ligature)   */ 
 214:    "202円", /* capital A, acute accent       */ 
 215:    "203円", /* capital A, circumflex accent     */ 
 216:    "201円", /* capital A, grave accent       */ 
 217:    "206円", /* capital A, ring           */ 
 218:    "204円", /* capital A, tilde           */ 
 219:    "205円", /* capital A, dieresis or umlaut mark  */ 
 220:    "207円", /* capital C, cedilla          */ 
 221:    "220円", /* capital Eth, Icelandic        */ 
 222:    "211円", /* capital E, acute accent               */ 
 223:    "212円", /* capital E, circumflex accent             */ 
 224:    "210円", /* capital E, grave accent               */ 
 225:    "213円", /* capital E, dieresis or umlaut mark          */ 
 226:    "215円", /* capital I, acute accent               */ 
 227:    "216円", /* capital I, circumflex accent     these are    */ 
 228:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 229:    "217円", /* capital I, dieresis or umlaut mark          */ 
 230:    "221円", /* capital N, tilde                   */ 
 231:    "223円", /* capital O, acute accent               */ 
 232:    "224円", /* capital O, circumflex accent             */ 
 233:    "222円", /* capital O, grave accent               */ 
 234:    "351円", /* capital O, slash       'cept this */ 
 235:    "225円", /* capital O, tilde                   */ 
 236:    "226円", /* capital O, dieresis or umlaut mark          */ 
 237:    "234円", /* capital THORN, Icelandic */ 
 238:    "230円", /* capital U, acute accent */ 
 239:    "231円", /* capital U, circumflex accent */ 
 240:    "227円", /* capital U, grave accent */ 
 241:    "232円", /* capital U, dieresis or umlaut mark */ 
 242:    "233円", /* capital Y, acute accent */ 
 243:    "326円", /* small a, acute accent */ 
 244:    "327円", /* small a, circumflex accent */ 
 245:    "361円", /* small ae diphthong (ligature) */ 
 246:    "325円", /* small a, grave accent */ 
 247:    "046円", /* ampersand */ 
 248:    "332円", /* small a, ring */ 
 249:    "330円", /* small a, tilde */ 
 250:    "331円", /* small a, dieresis or umlaut mark */ 
 251:    "333円", /* small c, cedilla */ 
 252:    "335円", /* small e, acute accent */ 
 253:    "336円", /* small e, circumflex accent */ 
 254:    "334円", /* small e, grave accent */ 
 255:    "346円", /* small eth, Icelandic     */ 
 256:    "337円", /* small e, dieresis or umlaut mark */ 
 257:    "076円", /* greater than */ 
 258:    "342円", /* small i, acute accent */ 
 259:    "344円", /* small i, circumflex accent */ 
 260:    "340円", /* small i, grave accent */ 
 261:    "345円", /* small i, dieresis or umlaut mark */ 
 262:    "074円", /* less than */ 
 263:    "347円", /* small n, tilde */ 
 264:    "355円", /* small o, acute accent */ 
 265:    "356円", /* small o, circumflex accent */ 
 266:    "354円", /* small o, grave accent */ 
 267:    "371円", /* small o, slash */ 
 268:    "357円", /* small o, tilde */ 
 269:    "360円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 270:     "042円", /* double quote sign - June 94 */
1.2 timbl 271:    "373円", /* small sharp s, German (sz ligature) */ 
 272:    "374円", /* small thorn, Icelandic */ 
 273:    "363円", /* small u, acute accent */ 
 274:    "364円", /* small u, circumflex accent */ 
 275:    "362円", /* small u, grave accent */ 
 276:    "366円", /* small u, dieresis or umlaut mark */ 
 277:    "367円", /* small y, acute accent */ 
 278:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 279: };
 280: 
1.2 timbl 281: /*   Entity values -- for IBM/PC Code Page 850 (International)
 282: **
 283: **   This MUST match exactly the table referred to in the DTD!
 284: **
 285: */
 286: /* @@@@@@@@@@@@@@@@@ TBD */
 287: 
 288: 
 289: 
 290: /*       Set character set
 291: **       ----------------
 292: */
 293: 
 294: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 295: 
1.53 frystyk 296: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 297: {
 298:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 299:                       : ISO_Latin1;
 300: }
1.1 timbl 301: 
 302: 
 303: /*       Flattening the style structure
 304: **       ------------------------------
 305: **
 306: On the NeXT, and on any read-only browser, it is simpler for the text to have
 307: a sequence of styles, rather than a nested tree of styles. In this
 308: case we have to flatten the structure as it arrives from SGML tags into
 309: a sequence of styles.
 310: */
 311: 
 312: /*       If style really needs to be set, call this
 313: */
1.53 frystyk 314: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 315: {
1.4 timbl 316:   if (!me->text) {          /* First time through */
1.54 frystyk 317:      me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 318:      HText_beginAppend(me->text);
 319:      HText_setStyle(me->text, me->new_style);
 320:      me->in_word = NO;
1.1 timbl 321:   } else {
1.4 timbl 322:      HText_setStyle(me->text, me->new_style);
1.1 timbl 323:   }
1.4 timbl 324:   me->old_style = me->new_style;
 325:   me->style_change = NO;
1.1 timbl 326: }
 327: 
 328: /*   If you THINK you need to change style, call this
 329: */
 330: 
1.53 frystyk 331: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 332: {
1.4 timbl 333:   if (me->new_style!=style) {
 334:    me->style_change = YES;
 335:    me->new_style = style;
1.1 timbl 336:   }
1.11 timbl 337:   me->in_word = NO;
1.1 timbl 338: }
 339: 
1.2 timbl 340: /*_________________________________________________________________________
 341: **
 342: **           A C T I O N   R O U T I N E S
 343: */
 344: 
 345: /*   Character handling
 346: **   ------------------
1.1 timbl 347: */
1.53 frystyk 348: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 349: {
1.2 timbl 350: 
1.4 timbl 351:   switch (me->sp[0].tag_number) {
1.2 timbl 352:   case HTML_COMMENT:
 353:    break;                 /* Do Nothing */
 354:    
 355:   case HTML_TITLE:  
1.55 ! frystyk 356:    HTChunk_putc(&me->title, c);
1.2 timbl 357:    break;
 358: 
 359:    
 360:   case HTML_LISTING:             /* Litteral text */
 361:   case HTML_XMP:
 362:   case HTML_PLAINTEXT:
 363:   case HTML_PRE:
 364: /*   We guarrantee that the style is up-to-date in begin_litteral
 365: */
1.4 timbl 366:    HText_appendCharacter(me->text, c);
1.2 timbl 367:    break;
 368:    
 369:   default:                  /* Free format text */
1.4 timbl 370:    if (me->style_change) {
1.42 frystyk 371:      if ((c=='\n') || (c==' ')) return HT_OK;  /* Ignore it */
1.2 timbl 372:      UPDATE_STYLE;
 373:    }
 374:    if (c=='\n') {
1.4 timbl 375:      if (me->in_word) {
 376:        HText_appendCharacter(me->text, ' ');
 377:        me->in_word = NO;
1.2 timbl 378:      }
 379:    } else {
1.4 timbl 380:      HText_appendCharacter(me->text, c);
 381:      me->in_word = YES;
1.2 timbl 382:    }
 383:   } /* end switch */
1.42 frystyk 384:   return HT_OK;
1.1 timbl 385: }
 386: 
1.2 timbl 387: 
 388: 
 389: /*   String handling
 390: **   ---------------
 391: **
 392: **   This is written separately from put_character becuase the loop can
1.11 timbl 393: **   in some cases be promoted to a higher function call level for speed.
1.2 timbl 394: */
1.53 frystyk 395: PRIVATE int HTML_put_string (HTStructured * me, CONST char* s)
1.1 timbl 396: {
1.2 timbl 397: 
1.4 timbl 398:   switch (me->sp[0].tag_number) {
1.2 timbl 399:   case HTML_COMMENT:
 400:    break;                 /* Do Nothing */
 401:    
 402:   case HTML_TITLE:  
1.55 ! frystyk 403:    HTChunk_puts(&me->title, s);
1.2 timbl 404:    break;
 405: 
 406:    
 407:   case HTML_LISTING:             /* Litteral text */
 408:   case HTML_XMP:
 409:   case HTML_PLAINTEXT:
 410:   case HTML_PRE:
 411: 
 412: /*   We guarrantee that the style is up-to-date in begin_litteral
 413: */
1.4 timbl 414:    HText_appendText(me->text, s);
1.2 timbl 415:    break;
 416:    
 417:   default:                  /* Free format text */
 418:     {
 419:      CONST char *p = s;
1.4 timbl 420:      if (me->style_change) {
1.2 timbl 421:        for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 422:        if (!*p) return HT_OK;
1.2 timbl 423:        UPDATE_STYLE;
 424:      }
 425:      for(; *p; p++) {
1.4 timbl 426:        if (me->style_change) {
1.2 timbl 427:          if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
 428:          UPDATE_STYLE;
 429:        }
 430:        if (*p=='\n') {
1.4 timbl 431:          if (me->in_word) {
 432:            HText_appendCharacter(me->text, ' ');
 433:            me->in_word = NO;
1.2 timbl 434:          }
 435:        } else {
1.4 timbl 436:          HText_appendCharacter(me->text, *p);
 437:          me->in_word = YES;
1.2 timbl 438:        }
 439:      } /* for */
 440:    }
 441:   } /* end switch */
1.42 frystyk 442:   return HT_OK;
1.1 timbl 443: }
 444: 
 445: 
1.2 timbl 446: /*   Buffer write
1.3 timbl 447: **   ------------
1.1 timbl 448: */
1.53 frystyk 449: PRIVATE int HTML_write (HTStructured * me, CONST char* s, int l)
1.1 timbl 450: {
1.38 frystyk 451:   while (l-- > 0)
 452:    HTML_put_character(me, *s++);
1.42 frystyk 453:   return HT_OK;
1.1 timbl 454: }
1.2 timbl 455: 
 456: 
 457: /*   Start Element
 458: **   -------------
 459: */
1.53 frystyk 460: PRIVATE void HTML_start_element (
 461:    HTStructured * me,
 462:    int           element_number,
 463:    CONST BOOL*       present,
 464:    CONST char **      value)
1.2 timbl 465: {
 466:   switch (element_number) {
 467:   case HTML_A:
 468:    {
1.8 timbl 469:      HTChildAnchor * source;
1.9 timbl 470:      char * href = NULL;
1.42 frystyk 471:      if (present[HTML_A_HREF])
1.9 timbl 472:        StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 473:      source = HTAnchor_findChildAndLink(
1.4 timbl 474:        me->node_anchor,                /* parent */
1.2 timbl 475:        present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 476:        present[HTML_A_HREF] ? href : 0,        /* Addresss */
1.16 timbl 477:        present[HTML_A_REL] && value[HTML_A_REL] ? 
1.54 frystyk 478:            (HTLinkType) HTAtom_for(value[HTML_A_REL])
 479:                        : 0);
1.2 timbl 480:      
 481:      if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 482:        HTParentAnchor * dest = 
 483:          HTAnchor_parent(
 484:            HTAnchor_followMainLink((HTAnchor*)source)
 485:                  );
 486:        if (!HTAnchor_title(dest))
 487:            HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
 488:      }
 489:      UPDATE_STYLE;
1.4 timbl 490:      HText_beginAnchor(me->text, source);
1.42 frystyk 491:      FREE(href);             /* Leak fix Henrik 17/02-94 */
1.2 timbl 492:    }
 493:    break;
 494:    
 495:   case HTML_TITLE:
1.55 ! frystyk 496:     HTChunk_clear(&me->title);
1.2 timbl 497:    break;
 498:    
 499:   case HTML_NEXTID:
 500:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 501:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 502:    break;
 503:    
 504:   case HTML_ISINDEX:
1.4 timbl 505:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 506:    break;
 507:    
1.15 timbl 508:   case HTML_BR: 
 509:    UPDATE_STYLE;
 510:    HText_appendCharacter(me->text, '\n');
 511:    me->in_word = NO;
 512:    break;
 513:    
 514:   case HTML_HR: 
 515:    UPDATE_STYLE;
 516:    HText_appendCharacter(me->text, '\n');
1.16 timbl 517:    HText_appendText(me->text, "___________________________________");
1.15 timbl 518:    HText_appendCharacter(me->text, '\n');
 519:    me->in_word = NO;
 520:    break;
 521:    
1.2 timbl 522:   case HTML_P:
 523:    UPDATE_STYLE;
1.4 timbl 524:    HText_appendParagraph(me->text);
 525:    me->in_word = NO;
1.2 timbl 526:    break;
 527: 
 528:   case HTML_DL:
1.11 timbl 529:     change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 530:        ? styles[HTML_DL]
1.2 timbl 531:        : styles[HTML_DL]);
 532:    break;
 533:    
 534:   case HTML_DT:
1.4 timbl 535:     if (!me->style_change) {
 536:      HText_appendParagraph(me->text);
 537:      me->in_word = NO;
1.2 timbl 538:    }
 539:    break;
 540:    
 541:   case HTML_DD:
 542:     UPDATE_STYLE;
1.4 timbl 543:    HTML_put_character(me, '\t');  /* Just tab out one stop */
 544:    me->in_word = NO;
 545:    break;
1.2 timbl 546: 
 547:   case HTML_UL:
 548:   case HTML_OL:
 549:   case HTML_MENU:
 550:   case HTML_DIR:
1.11 timbl 551:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 552:    break;
 553:    
 554:   case HTML_LI:
 555:     UPDATE_STYLE;
1.7 timbl 556:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 557:      HText_appendParagraph(me->text);
1.2 timbl 558:    else
1.4 timbl 559:      HText_appendCharacter(me->text, '\t');   /* Tab @@ nl for UL? */
 560:    me->in_word = NO;
1.2 timbl 561:    break;
 562:    
 563:   case HTML_LISTING:             /* Litteral text */
 564:   case HTML_XMP:
 565:   case HTML_PLAINTEXT:
 566:   case HTML_PRE:
1.11 timbl 567:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 568:    UPDATE_STYLE;
1.4 timbl 569:    if (me->comment_end)
 570:      HText_appendText(me->text, me->comment_end);
1.2 timbl 571:    break;
1.11 timbl 572: 
1.23 frystyk 573:   case HTML_IMG:           /* Images */
 574:    {
 575:      HTChildAnchor *source;
 576:      char *src = NULL;
1.49 frystyk 577:      if (present[HTML_IMG_SRC])
1.23 frystyk 578:        StrAllocCopy(src, value[HTML_IMG_SRC]);
 579:      source = HTAnchor_findChildAndLink(
 580:                        me->node_anchor,  /* parent */
 581:                        0,           /* Tag */
 582:                        src ? src : 0,  /* Addresss */
 583:                        0);
 584:      UPDATE_STYLE;
 585:      HText_appendImage(me->text, source,
1.24 frystyk 586:           present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
 587:           present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
 588:           present[HTML_IMG_ISMAP] ? YES : NO);
1.23 frystyk 589:      free(src);
1.24 frystyk 590:    }    
 591:    break;
 592: 
 593:   case HTML_HTML:          /* Ignore these altogether */
 594:   case HTML_HEAD:
 595:   case HTML_BODY:
 596:   
1.10 timbl 597:   case HTML_TT:           /* Physical character highlighting */
 598:   case HTML_B:            /* Currently ignored */
 599:   case HTML_I:
 600:   case HTML_U:
 601:   
 602:   case HTML_EM:           /* Logical character highlighting */
 603:   case HTML_STRONG:         /* Currently ignored */
 604:   case HTML_CODE:
 605:   case HTML_SAMP:
 606:   case HTML_KBD:
 607:   case HTML_VAR:
 608:   case HTML_DFN:
 609:   case HTML_CITE:
 610:    break;
 611:    
1.11 timbl 612:   case HTML_H1:           /* paragraph styles */
 613:   case HTML_H2:
 614:   case HTML_H3:
 615:   case HTML_H4:
 616:   case HTML_H5:
 617:   case HTML_H6:
 618:   case HTML_H7:
 619:   case HTML_ADDRESS:
 620:   case HTML_BLOCKQUOTE:
 621:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 622:    break;
 623: 
 624:   } /* end switch */
 625: 
1.16 timbl 626:   if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 627:     if (me->sp == me->stack) {
1.44 frystyk 628:      if (SGML_TRACE)
1.50 frystyk 629:        TTYPrint(TDEST, "HTML........ Maximum nesting of %d exceded!\n",
1.44 frystyk 630:            MAX_NESTING); 
 631:      me->overflow++;
1.12 timbl 632:      return;
 633:    }
1.4 timbl 634:    --(me->sp);
 635:    me->sp[0].style = me->new_style;    /* Stack new style */
 636:    me->sp[0].tag_number = element_number;
1.10 timbl 637:   } 
1.1 timbl 638: }
1.10 timbl 639: 
1.2 timbl 640: 
1.1 timbl 641: /*       End Element
1.2 timbl 642: **       -----------
1.1 timbl 643: **
1.2 timbl 644: */
 645: /*   When we end an element, the style must be returned to that
1.1 timbl 646: **   in effect before that element. Note that anchors (etc?)
 647: **   don't have an associated style, so that we must scan down the
 648: **   stack for an element with a defined style. (In fact, the styles
 649: **   should be linked to the whole stack not just the top one.)
 650: **   TBL 921119
1.6 timbl 651: **
 652: **   We don't turn on "CAREFUL" check because the parser produces
 653: **   (internal code errors apart) good nesting. The parser checks
 654: **   incoming code errors, not this module.
1.1 timbl 655: */
1.53 frystyk 656: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 657: {
1.2 timbl 658: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 659:   if (element_number != me->sp[0].tag_number) {
1.50 frystyk 660:     TTYPrint(TDEST, "HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 661:        me->dtd->tags[element_number].name,
 662:        me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 663:        /* panic */
1.1 timbl 664:   }
1.2 timbl 665: #endif
1.44 frystyk 666: 
 667:   /* HFN, If overflow of nestings, we need to get back to reality */
 668:   if (me->overflow > 0) {
 669:    me->overflow--;
 670:    return;
 671:   }
 672: 
1.4 timbl 673:   me->sp++;             /* Pop state off stack */
1.44 frystyk 674: 
1.2 timbl 675:   switch(element_number) {
 676: 
 677:   case HTML_A:
 678:    UPDATE_STYLE;
1.4 timbl 679:    HText_endAnchor(me->text);
1.2 timbl 680:    break;
 681: 
 682:   case HTML_TITLE:
1.55 ! frystyk 683:     HTChunk_terminate(&me->title);
1.4 timbl 684:    HTAnchor_setTitle(me->node_anchor, me->title.data);
1.2 timbl 685:    break;
 686:    
 687:   case HTML_LISTING:             /* Litteral text */
 688:   case HTML_XMP:
 689:   case HTML_PLAINTEXT:
 690:   case HTML_PRE:
1.4 timbl 691:    if (me->comment_start)
 692:      HText_appendText(me->text, me->comment_start);
1.2 timbl 693:    /* Fall through */
 694:    
 695:   default:
1.44 frystyk 696: 
 697:    /* Often won't really change */
 698:    change_paragraph_style(me, me->sp->style);
1.2 timbl 699:    break;
 700:    
 701:   } /* switch */
1.1 timbl 702: }
 703: 
1.2 timbl 704: 
 705: /*       Expanding entities
 706: **       ------------------
 707: */
 708: /*   (In fact, they all shrink!)
1.1 timbl 709: */
1.2 timbl 710: 
1.53 frystyk 711: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 712: {
1.4 timbl 713:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 714: }
1.2 timbl 715: 
1.42 frystyk 716: /*   Flush an HTML object
 717: **   --------------------
 718: */
1.53 frystyk 719: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 720: {
 721:   UPDATE_STYLE;              /* Creates empty document here! */
 722:   if (me->comment_end)
 723:        HTML_put_string(me,me->comment_end);
 724:   HText_endAppend(me->text);
 725:   return (*me->targetClass.flush)(me->target);
 726: }
1.2 timbl 727: 
 728: /*   Free an HTML object
 729: **   -------------------
 730: **
1.4 timbl 731: ** If the document is empty, the text object will not yet exist.
 732:  So we could in fact abandon creating the document and return
 733:  an error code. In fact an empty document is an important type
 734:  of document, so we don't.
 735: **
 736: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 737: **   Otherwise, the interactive object is left.   
 738: */
1.53 frystyk 739: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 740: {
1.4 timbl 741:   UPDATE_STYLE;       /* Creates empty document here! */
 742:   if (me->comment_end)
 743:        HTML_put_string(me,me->comment_end);
 744:   HText_endAppend(me->text);
 745: 
 746:   if (me->target) {
1.35 duns 747:     (*me->targetClass._free)(me->target);
1.2 timbl 748:   }
1.55 ! frystyk 749:   HTChunk_clear(&me->title); /* Henrik 18/02-94 */
1.4 timbl 750:   free(me);
1.42 frystyk 751:   return HT_OK;
1.1 timbl 752: }
 753: 
 754: 
1.53 frystyk 755: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 756: 
1.14 timbl 757: {
 758:   if (me->target) {
 759:     (*me->targetClass.abort)(me->target, e);
 760:   }
1.55 ! frystyk 761:   HTChunk_clear(&me->title); /* Henrik 18/02-94 */
1.14 timbl 762:   free(me);
1.42 frystyk 763:   return HT_ERROR;
1.1 timbl 764: }
 765: 
1.2 timbl 766: 
 767: /*   Get Styles from style sheet
 768: **   ---------------------------
 769: */
1.53 frystyk 770: PRIVATE void get_styles (void)
1.1 timbl 771: {
1.2 timbl 772:   got_styles = YES;
 773:   
 774:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 775: 
1.2 timbl 776:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 777:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 778:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 779:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 780:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 781:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 782:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 783: 
 784:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 785:   styles[HTML_UL] =
 786:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 787:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 788:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
1.16 timbl 789: /* styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 790:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 791:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 792:   styles[HTML_PLAINTEXT] =
 793:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 794:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 795:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 796: }
 797: /*               P U B L I C
 798: */
 799: 
 800: /*   Structured Object Class
 801: **   -----------------------
 802: */
1.47 frystyk 803: PRIVATE CONST HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 804: {       
 805:    "text/html",
1.42 frystyk 806:    HTML_flush,
1.2 timbl 807:    HTML_free,
1.14 timbl 808:    HTML_abort,
1.2 timbl 809:    HTML_put_character,   HTML_put_string, HTML_write,
 810:    HTML_start_element,   HTML_end_element,
 811:    HTML_put_entity
 812: }; 
1.1 timbl 813: 
1.4 timbl 814: 
1.2 timbl 815: /*       New Structured Text object
 816: **       --------------------------
 817: **
1.16 timbl 818: **   The structured stream can generate either presentation,
1.4 timbl 819: **   or plain text, or HTML.
1.1 timbl 820: */
1.53 frystyk 821: PRIVATE HTStructured* HTML_new (HTRequest *  request,
 822:                   void *       param,
 823:                   HTFormat      input_format,
 824:                   HTFormat      output_format,
 825:                   HTStream * output_stream)
1.1 timbl 826: {
 827: 
1.4 timbl 828:   HTStructured * me;
 829:   
1.47 frystyk 830: #if 0
1.16 timbl 831:   if (output_format != WWW_PLAINTEXT
 832:    && output_format != WWW_PRESENT
 833:    && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 834:     HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
 835:                        output_stream, request, NO);
1.6 timbl 836:    if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 837:    if (SGML_TRACE)
1.50 frystyk 838:      TTYPrint(TDEST, "HTML........ Can't parse HTML to %s\n",
1.44 frystyk 839:          HTAtom_name(output_format));
1.4 timbl 840:    exit (-99);
 841:   }
1.47 frystyk 842: #endif
1.4 timbl 843: 
1.44 frystyk 844:   if ((me = (HTStructured*) calloc(1, sizeof(*me))) == NULL)
 845:    outofmem(__FILE__, "HTML_new");
1.1 timbl 846: 
 847:   if (!got_styles) get_styles();
 848: 
1.4 timbl 849:   me->isa = &HTMLPresentation;
1.47 frystyk 850:   me->dtd = &HTMLP_dtd;
1.54 frystyk 851:   me->request = request;
1.48 frystyk 852:   me->node_anchor = HTRequest_anchor(request);
1.4 timbl 853:   me->title.size = 0;
 854:   me->title.growby = 128;
 855:   me->title.allocated = 0;
 856:   me->title.data = 0;
 857:   me->text = 0;
 858:   me->style_change = YES; /* Force check leading to text creation */
 859:   me->new_style = default_style;
 860:   me->old_style = 0;
 861:   me->sp = me->stack + MAX_NESTING - 1;
 862:   me->sp->tag_number = -1;              /* INVALID */
 863:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 864:   
1.4 timbl 865:   me->comment_start = NULL;
 866:   me->comment_end = NULL;
1.16 timbl 867:   me->target = output_stream;
 868:   if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 869:   
1.4 timbl 870:   return (HTStructured*) me;
1.1 timbl 871: }
 872: 
 873: 
1.2 timbl 874: /*   HTConverter for HTML to plain text
 875: **   ----------------------------------
1.1 timbl 876: **
1.2 timbl 877: **   This will convert from HTML to presentation or plain text.
1.1 timbl 878: */
1.53 frystyk 879: PUBLIC HTStream* HTMLToPlain (
 880:    HTRequest *       request,
 881:    void *         param,
 882:    HTFormat        input_format,
 883:    HTFormat        output_format,
 884:    HTStream *       output_stream)
1.1 timbl 885: {
1.47 frystyk 886:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 887:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 888: }
 889: 
 890: 
1.2 timbl 891: /*   HTConverter for HTML to C code
 892: **   ------------------------------
 893: **
1.36 frystyk 894: **   C code is like plain text but all non-preformatted code
1.2 timbl 895: **   is commented out.
 896: **   This will convert from HTML to presentation or plain text.
 897: */
1.53 frystyk 898: PUBLIC HTStream* HTMLToC (
 899:    HTRequest *       request,
 900:    void *         param,
 901:    HTFormat        input_format,
 902:    HTFormat        output_format,
 903:    HTStream *       output_stream)
1.1 timbl 904: {
1.4 timbl 905:   
 906:   HTStructured * html;
 907:   
1.36 frystyk 908:   (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 909:   html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 910:   html->comment_start = "\n/* ";
1.47 frystyk 911:   html->dtd = &HTMLP_dtd;
1.2 timbl 912:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.47 frystyk 913:   return SGML_new(&HTMLP_dtd, html);
1.1 timbl 914: }
 915: 
 916: 
1.2 timbl 917: /*   Presenter for HTML
 918: **   ------------------
 919: **
 920: **   This will convert from HTML to presentation or plain text.
 921: **
 922: **   Override this if you have a windows version
1.1 timbl 923: */
1.2 timbl 924: #ifndef GUI
1.53 frystyk 925: PUBLIC HTStream* HTMLPresent (
 926:    HTRequest *       request,
 927:    void *         param,
 928:    HTFormat        input_format,
 929:    HTFormat        output_format,
 930:    HTStream *       output_stream)
1.1 timbl 931: {
1.47 frystyk 932:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 933:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 934: }
1.2 timbl 935: #endif
1.29 frystyk 936: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /