[BACK] Return to HTML.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTML.c, revision 1.62

1.39 frystyk 1: /*                                   HTML.c
 2: **   STRUCTURED STREAM TO RICH HYPERTEXT CONVERTER
 3: **
1.43 frystyk 4: **   (c) COPYRIGHT MIT 1995.
1.39 frystyk 5: **   Please first read the full copyright statement in the file COPYRIGH.
1.62 ! frystyk 6: **   @(#) $Id: HTML.c,v 1.61 1996年04月12日 17:47:44 frystyk Exp $
1.1 timbl 7: **
1.2 timbl 8: **   This generates of a hypertext object. It converts from the
 9: **   structured stream interface fro HTMl events into the style-
1.47 frystyk 10: **   oriented iunterface of the HText interface. This module is
1.2 timbl 11: **   only used in clients and shouldnot be linked into servers.
1.1 timbl 12: **
1.6 timbl 13: **   Override this module if making a new GUI browser.
1.1 timbl 14: **
1.35 duns 15: ** HISTORY:
 16: **   8 Jul 94 FM  Insulate free() from _free structure element.
 17: **
1.1 timbl 18: */
1.16 timbl 19: 
1.41 frystyk 20: /* Library include files */
1.60 frystyk 21: #include "sysdep.h"
1.41 frystyk 22: #include "HTUtils.h"
 23: #include "HTString.h"
1.1 timbl 24: #include "HTAtom.h"
 25: #include "HTChunk.h"
 26: #include "HText.h"
 27: #include "HTStyle.h"
1.3 timbl 28: #include "HTAlert.h"
1.4 timbl 29: #include "HTMLGen.h"
1.8 timbl 30: #include "HTParse.h"
1.41 frystyk 31: #include "HTML.h"
1.1 timbl 32: 
 33: extern HTStyleSheet * styleSheet;   /* Application-wide */
 34: 
 35: /*   Module-wide style cache
 36: */
 37: PRIVATE int      got_styles = 0;
1.16 timbl 38: PRIVATE HTStyle *styles[HTMLP_ELEMENTS];
1.2 timbl 39: PRIVATE HTStyle *default_style;
1.1 timbl 40: 
1.62 ! frystyk 41: #define TAB  '0円'
1.1 timbl 42: 
 43: /*       HTML Object
 44: **       -----------
 45: */
1.2 timbl 46: #define MAX_NESTING 20     /* Should be checked by parser */
 47: 
 48: typedef struct _stack_element {
 49:     HTStyle *   style;
 50:    int       tag_number;
 51: } stack_element;
 52: 
 53: struct _HTStructured {
1.60 frystyk 54:   const HTStructuredClass * isa;
1.54 frystyk 55:   HTRequest *            request;
1.2 timbl 56:   HTParentAnchor *      node_anchor;
 57:   HText *          text;
 58: 
 59:   HTStream*         target;         /* Output stream */
 60:   HTStreamClass       targetClass;      /* Output routines */
 61: 
1.56 frystyk 62:   HTChunk *         title;     /* Grow by 128 */
1.2 timbl 63:   
 64:   char *           comment_start; /* for literate programming */
 65:   char *           comment_end;
1.16 timbl 66:   
1.60 frystyk 67:   const SGML_dtd*      dtd;
1.16 timbl 68:   
1.2 timbl 69:   HTTag *          current_tag;
 70:   BOOL            style_change;
 71:   HTStyle *         new_style;
 72:   HTStyle *         old_style;
 73:   BOOL            in_word; /* Have just had a non-white char */
1.44 frystyk 74: 
 75:   stack_element       stack[MAX_NESTING];
 76:   stack_element       *sp;         /* Style stack pointer */
 77:   int                overflow; /* Keep track of overflow nesting */
1.1 timbl 78: };
 79: 
1.2 timbl 80: struct _HTStream {
1.60 frystyk 81:   const HTStreamClass *   isa;
1.2 timbl 82:   /* .... */
 83: };
1.1 timbl 84: 
 85: /*       Forward declarations of routines
 86: */
1.52 frystyk 87: PRIVATE void get_styles (void);
1.1 timbl 88: 
 89: 
1.52 frystyk 90: PRIVATE void actually_set_style (HTStructured * me);
 91: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle * style);
1.1 timbl 92: 
 93: /*   Style buffering avoids dummy paragraph begin/ends.
 94: */
1.4 timbl 95: #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
1.1 timbl 96: 
 97: 
1.2 timbl 98: #ifdef OLD_CODE
1.1 timbl 99: /* The following accented characters are from peter Flynn, curia project */
 100: 
 101: /* these ifdefs don't solve the problem of a simple terminal emulator
 102: ** with a different character set to the client machine. But nothing does,
 103: ** except looking at the TERM setting */
 104: 
1.2 timbl 105: 
1.1 timbl 106:     { "ocus" , "&" },    /* for CURIA */
 107: #ifdef IBMPC
 108:     { "aacute" , "240円" }, /* For PC display */
 109:     { "eacute" , "202円" },
 110:     { "iacute" , "241円" },
 111:     { "oacute" , "242円" },
 112:     { "uacute" , "243円" },
 113:     { "Aacute" , "101円" },
 114:     { "Eacute" , "220円" },
 115:     { "Iacute" , "111円" },
 116:     { "Oacute" , "117円" },
 117:     { "Uacute" , "125円" },
 118: #else
 119:     { "aacute" , "341円" }, /* Works for openwindows -- Peter Flynn */
 120:     { "eacute" , "351円" },
 121:     { "iacute" , "355円" },
 122:     { "oacute" , "363円" },
 123:     { "uacute" , "372円" },
 124:     { "Aacute" , "301円" },
 125:     { "Eacute" , "310円" },
 126:     { "Iacute" , "315円" },
 127:     { "Oacute" , "323円" },
 128:     { "Uacute" , "332円" }, 
 129: #endif
 130:    { 0,  0 } /* Terminate list */
 131: };
1.2 timbl 132: #endif
1.1 timbl 133: 
 134: 
1.2 timbl 135: /*   Entity values -- for ISO Latin 1 local representation
 136: **
 137: **   This MUST match exactly the table referred to in the DTD!
 138: */
 139: static char * ISO_Latin1[] = {
 140:    "306円", /* capital AE diphthong (ligature) */ 
 141:    "301円", /* capital A, acute accent */ 
 142:    "302円", /* capital A, circumflex accent */ 
 143:    "300円", /* capital A, grave accent */ 
 144:    "305円", /* capital A, ring */ 
 145:    "303円", /* capital A, tilde */ 
 146:    "304円", /* capital A, dieresis or umlaut mark */ 
 147:    "307円", /* capital C, cedilla */ 
 148:    "320円", /* capital Eth, Icelandic */ 
 149:    "311円", /* capital E, acute accent */ 
 150:    "312円", /* capital E, circumflex accent */ 
 151:    "310円", /* capital E, grave accent */ 
 152:    "313円", /* capital E, dieresis or umlaut mark */ 
 153:    "315円", /* capital I, acute accent */ 
 154:    "316円", /* capital I, circumflex accent */ 
 155:    "314円", /* capital I, grave accent */ 
 156:    "317円", /* capital I, dieresis or umlaut mark */ 
 157:    "321円", /* capital N, tilde */ 
 158:    "323円", /* capital O, acute accent */ 
 159:    "324円", /* capital O, circumflex accent */ 
 160:    "322円", /* capital O, grave accent */ 
 161:    "330円", /* capital O, slash */ 
 162:    "325円", /* capital O, tilde */ 
 163:    "326円", /* capital O, dieresis or umlaut mark */ 
 164:    "336円", /* capital THORN, Icelandic */ 
 165:    "332円", /* capital U, acute accent */ 
 166:    "333円", /* capital U, circumflex accent */ 
 167:    "331円", /* capital U, grave accent */ 
 168:    "334円", /* capital U, dieresis or umlaut mark */ 
 169:    "335円", /* capital Y, acute accent */ 
 170:    "341円", /* small a, acute accent */ 
 171:    "342円", /* small a, circumflex accent */ 
 172:    "346円", /* small ae diphthong (ligature) */ 
 173:    "340円", /* small a, grave accent */ 
 174:    "046円", /* ampersand */ 
 175:    "345円", /* small a, ring */ 
 176:    "343円", /* small a, tilde */ 
 177:    "344円", /* small a, dieresis or umlaut mark */ 
 178:    "347円", /* small c, cedilla */ 
 179:    "351円", /* small e, acute accent */ 
 180:    "352円", /* small e, circumflex accent */ 
 181:    "350円", /* small e, grave accent */ 
 182:    "360円", /* small eth, Icelandic */ 
 183:    "353円", /* small e, dieresis or umlaut mark */ 
 184:    "076円", /* greater than */ 
 185:    "355円", /* small i, acute accent */ 
 186:    "356円", /* small i, circumflex accent */ 
 187:    "354円", /* small i, grave accent */ 
 188:    "357円", /* small i, dieresis or umlaut mark */ 
 189:    "074円", /* less than */ 
1.62 ! frystyk 190:    "040円", /* non-breaking space */
1.2 timbl 191:    "361円", /* small n, tilde */ 
 192:    "363円", /* small o, acute accent */ 
 193:    "364円", /* small o, circumflex accent */ 
 194:    "362円", /* small o, grave accent */ 
 195:    "370円", /* small o, slash */ 
 196:    "365円", /* small o, tilde */ 
 197:    "366円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 198:     "042円", /* double quote sign - June 94 */
1.2 timbl 199:    "337円", /* small sharp s, German (sz ligature) */ 
 200:    "376円", /* small thorn, Icelandic */ 
 201:    "372円", /* small u, acute accent */ 
 202:    "373円", /* small u, circumflex accent */ 
 203:    "371円", /* small u, grave accent */ 
 204:    "374円", /* small u, dieresis or umlaut mark */ 
 205:    "375円", /* small y, acute accent */ 
 206:    "377円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 207: };
 208: 
1.2 timbl 209: 
 210: /*   Entity values -- for NeXT local representation
 211: **
 212: **   This MUST match exactly the table referred to in the DTD!
 213: **
 214: */
 215: static char * NeXTCharacters[] = {
 216:    "341円", /* capital AE diphthong (ligature)   */ 
 217:    "202円", /* capital A, acute accent       */ 
 218:    "203円", /* capital A, circumflex accent     */ 
 219:    "201円", /* capital A, grave accent       */ 
 220:    "206円", /* capital A, ring           */ 
 221:    "204円", /* capital A, tilde           */ 
 222:    "205円", /* capital A, dieresis or umlaut mark  */ 
 223:    "207円", /* capital C, cedilla          */ 
 224:    "220円", /* capital Eth, Icelandic        */ 
 225:    "211円", /* capital E, acute accent               */ 
 226:    "212円", /* capital E, circumflex accent             */ 
 227:    "210円", /* capital E, grave accent               */ 
 228:    "213円", /* capital E, dieresis or umlaut mark          */ 
 229:    "215円", /* capital I, acute accent               */ 
 230:    "216円", /* capital I, circumflex accent     these are    */ 
 231:    "214円", /* capital I, grave accent       ISO -100 hex  */ 
 232:    "217円", /* capital I, dieresis or umlaut mark          */ 
 233:    "221円", /* capital N, tilde                   */ 
 234:    "223円", /* capital O, acute accent               */ 
 235:    "224円", /* capital O, circumflex accent             */ 
 236:    "222円", /* capital O, grave accent               */ 
 237:    "351円", /* capital O, slash       'cept this */ 
 238:    "225円", /* capital O, tilde                   */ 
 239:    "226円", /* capital O, dieresis or umlaut mark          */ 
 240:    "234円", /* capital THORN, Icelandic */ 
 241:    "230円", /* capital U, acute accent */ 
 242:    "231円", /* capital U, circumflex accent */ 
 243:    "227円", /* capital U, grave accent */ 
 244:    "232円", /* capital U, dieresis or umlaut mark */ 
 245:    "233円", /* capital Y, acute accent */ 
 246:    "326円", /* small a, acute accent */ 
 247:    "327円", /* small a, circumflex accent */ 
 248:    "361円", /* small ae diphthong (ligature) */ 
 249:    "325円", /* small a, grave accent */ 
 250:    "046円", /* ampersand */ 
 251:    "332円", /* small a, ring */ 
 252:    "330円", /* small a, tilde */ 
 253:    "331円", /* small a, dieresis or umlaut mark */ 
 254:    "333円", /* small c, cedilla */ 
 255:    "335円", /* small e, acute accent */ 
 256:    "336円", /* small e, circumflex accent */ 
 257:    "334円", /* small e, grave accent */ 
 258:    "346円", /* small eth, Icelandic     */ 
 259:    "337円", /* small e, dieresis or umlaut mark */ 
 260:    "076円", /* greater than */ 
 261:    "342円", /* small i, acute accent */ 
 262:    "344円", /* small i, circumflex accent */ 
 263:    "340円", /* small i, grave accent */ 
 264:    "345円", /* small i, dieresis or umlaut mark */ 
 265:    "074円", /* less than */ 
1.62 ! frystyk 266:    "040円", /* non-breaking space */
1.2 timbl 267:    "347円", /* small n, tilde */ 
 268:    "355円", /* small o, acute accent */ 
 269:    "356円", /* small o, circumflex accent */ 
 270:    "354円", /* small o, grave accent */ 
 271:    "371円", /* small o, slash */ 
 272:    "357円", /* small o, tilde */ 
 273:    "360円", /* small o, dieresis or umlaut mark */ 
1.36 frystyk 274:     "042円", /* double quote sign - June 94 */
1.2 timbl 275:    "373円", /* small sharp s, German (sz ligature) */ 
 276:    "374円", /* small thorn, Icelandic */ 
 277:    "363円", /* small u, acute accent */ 
 278:    "364円", /* small u, circumflex accent */ 
 279:    "362円", /* small u, grave accent */ 
 280:    "366円", /* small u, dieresis or umlaut mark */ 
 281:    "367円", /* small y, acute accent */ 
 282:    "375円", /* small y, dieresis or umlaut mark */ 
1.1 timbl 283: };
 284: 
1.2 timbl 285: /*   Entity values -- for IBM/PC Code Page 850 (International)
 286: **
 287: **   This MUST match exactly the table referred to in the DTD!
 288: **
 289: */
 290: /* @@@@@@@@@@@@@@@@@ TBD */
 291: 
 292: 
 293: 
 294: /*       Set character set
 295: **       ----------------
 296: */
 297: 
 298: PRIVATE char** p_entity_values = ISO_Latin1;  /* Pointer to translation */
1.1 timbl 299: 
1.53 frystyk 300: PUBLIC void HTMLUseCharacterSet (HTMLCharacterSet i)
1.2 timbl 301: {
 302:   p_entity_values = (i == HTML_NEXT_CHARS) ? NeXTCharacters
 303:                       : ISO_Latin1;
 304: }
1.1 timbl 305: 
 306: 
 307: /*       Flattening the style structure
 308: **       ------------------------------
 309: **
 310: On the NeXT, and on any read-only browser, it is simpler for the text to have
 311: a sequence of styles, rather than a nested tree of styles. In this
 312: case we have to flatten the structure as it arrives from SGML tags into
 313: a sequence of styles.
 314: */
 315: 
 316: /*       If style really needs to be set, call this
 317: */
1.53 frystyk 318: PRIVATE void actually_set_style (HTStructured * me)
1.1 timbl 319: {
1.4 timbl 320:   if (!me->text) {          /* First time through */
1.54 frystyk 321:      me->text = HText_new2(me->request, me->node_anchor, me->target);
1.4 timbl 322:      HText_beginAppend(me->text);
 323:      HText_setStyle(me->text, me->new_style);
 324:      me->in_word = NO;
1.1 timbl 325:   } else {
1.4 timbl 326:      HText_setStyle(me->text, me->new_style);
1.1 timbl 327:   }
1.4 timbl 328:   me->old_style = me->new_style;
 329:   me->style_change = NO;
1.1 timbl 330: }
 331: 
 332: /*   If you THINK you need to change style, call this
 333: */
 334: 
1.53 frystyk 335: PRIVATE void change_paragraph_style (HTStructured * me, HTStyle *style)
1.1 timbl 336: {
1.4 timbl 337:   if (me->new_style!=style) {
 338:    me->style_change = YES;
 339:    me->new_style = style;
1.1 timbl 340:   }
1.11 timbl 341:   me->in_word = NO;
1.1 timbl 342: }
 343: 
1.2 timbl 344: /*_________________________________________________________________________
 345: **
 346: **           A C T I O N   R O U T I N E S
 347: */
 348: 
 349: /*   Character handling
 350: **   ------------------
1.1 timbl 351: */
1.53 frystyk 352: PRIVATE int HTML_put_character (HTStructured * me, char c)
1.1 timbl 353: {
1.2 timbl 354: 
1.4 timbl 355:   switch (me->sp[0].tag_number) {
1.2 timbl 356:   case HTML_COMMENT:
 357:    break;                 /* Do Nothing */
 358:    
 359:   case HTML_TITLE:  
1.56 frystyk 360:    HTChunk_putb(me->title, &c, 1);
1.2 timbl 361:    break;
 362: 
 363:    
 364:   case HTML_LISTING:             /* Litteral text */
 365:   case HTML_XMP:
 366:   case HTML_PLAINTEXT:
 367:   case HTML_PRE:
 368: /*   We guarrantee that the style is up-to-date in begin_litteral
 369: */
1.4 timbl 370:    HText_appendCharacter(me->text, c);
1.2 timbl 371:    break;
 372:    
 373:   default:                  /* Free format text */
1.4 timbl 374:    if (me->style_change) {
1.42 frystyk 375:      if ((c=='\n') || (c==' ')) return HT_OK;  /* Ignore it */
1.2 timbl 376:      UPDATE_STYLE;
 377:    }
1.62 ! frystyk 378:    if (c == TAB)
 ! 379:      HText_appendCharacter(me->text, '\t');
 ! 380:    else if (WHITE(c)) {
1.4 timbl 381:      if (me->in_word) {
 382:        HText_appendCharacter(me->text, ' ');
 383:        me->in_word = NO;
1.2 timbl 384:      }
 385:    } else {
1.4 timbl 386:      HText_appendCharacter(me->text, c);
 387:      me->in_word = YES;
1.2 timbl 388:    }
 389:   } /* end switch */
1.42 frystyk 390:   return HT_OK;
1.1 timbl 391: }
 392: 
1.2 timbl 393: 
 394: 
 395: /*   String handling
 396: **   ---------------
 397: **
 398: **   This is written separately from put_character becuase the loop can
1.11 timbl 399: **   in some cases be promoted to a higher function call level for speed.
1.2 timbl 400: */
1.60 frystyk 401: PRIVATE int HTML_put_string (HTStructured * me, const char* s)
1.1 timbl 402: {
1.2 timbl 403: 
1.4 timbl 404:   switch (me->sp[0].tag_number) {
1.2 timbl 405:   case HTML_COMMENT:
 406:    break;                 /* Do Nothing */
 407:    
 408:   case HTML_TITLE:  
1.56 frystyk 409:    HTChunk_putb(me->title, s, strlen(s));
1.2 timbl 410:    break;
 411: 
 412:    
 413:   case HTML_LISTING:             /* Litteral text */
 414:   case HTML_XMP:
 415:   case HTML_PLAINTEXT:
 416:   case HTML_PRE:
 417: 
 418: /*   We guarrantee that the style is up-to-date in begin_litteral
 419: */
1.4 timbl 420:    HText_appendText(me->text, s);
1.2 timbl 421:    break;
 422:    
 423:   default:                  /* Free format text */
 424:     {
1.60 frystyk 425:      const char *p = s;
1.4 timbl 426:      if (me->style_change) {
1.2 timbl 427:        for (; *p && ((*p=='\n') || (*p==' ')); p++) ; /* Ignore leaders */
1.42 frystyk 428:        if (!*p) return HT_OK;
1.2 timbl 429:        UPDATE_STYLE;
 430:      }
 431:      for(; *p; p++) {
1.4 timbl 432:        if (me->style_change) {
1.2 timbl 433:          if ((*p=='\n') || (*p==' ')) continue; /* Ignore it */
 434:          UPDATE_STYLE;
 435:        }
 436:        if (*p=='\n') {
1.4 timbl 437:          if (me->in_word) {
 438:            HText_appendCharacter(me->text, ' ');
 439:            me->in_word = NO;
1.2 timbl 440:          }
 441:        } else {
1.4 timbl 442:          HText_appendCharacter(me->text, *p);
 443:          me->in_word = YES;
1.2 timbl 444:        }
 445:      } /* for */
 446:    }
 447:   } /* end switch */
1.42 frystyk 448:   return HT_OK;
1.1 timbl 449: }
 450: 
 451: 
1.2 timbl 452: /*   Buffer write
1.3 timbl 453: **   ------------
1.1 timbl 454: */
1.60 frystyk 455: PRIVATE int HTML_write (HTStructured * me, const char* s, int l)
1.1 timbl 456: {
1.38 frystyk 457:   while (l-- > 0)
 458:    HTML_put_character(me, *s++);
1.42 frystyk 459:   return HT_OK;
1.1 timbl 460: }
1.2 timbl 461: 
 462: 
 463: /*   Start Element
 464: **   -------------
 465: */
1.53 frystyk 466: PRIVATE void HTML_start_element (
 467:    HTStructured * me,
 468:    int           element_number,
1.60 frystyk 469:    const BOOL*       present,
 470:    const char **      value)
1.2 timbl 471: {
 472:   switch (element_number) {
 473:   case HTML_A:
 474:    {
1.8 timbl 475:      HTChildAnchor * source;
1.9 timbl 476:      char * href = NULL;
1.42 frystyk 477:      if (present[HTML_A_HREF])
1.9 timbl 478:        StrAllocCopy(href, value[HTML_A_HREF]);
1.8 timbl 479:      source = HTAnchor_findChildAndLink(
1.4 timbl 480:        me->node_anchor,                /* parent */
1.2 timbl 481:        present[HTML_A_NAME] ? value[HTML_A_NAME] : 0, /* Tag */
1.9 timbl 482:        present[HTML_A_HREF] ? href : 0,        /* Addresss */
1.16 timbl 483:        present[HTML_A_REL] && value[HTML_A_REL] ? 
1.54 frystyk 484:            (HTLinkType) HTAtom_for(value[HTML_A_REL])
 485:                        : 0);
1.2 timbl 486:      
 487:      if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) {
 488:        HTParentAnchor * dest = 
 489:          HTAnchor_parent(
 490:            HTAnchor_followMainLink((HTAnchor*)source)
 491:                  );
 492:        if (!HTAnchor_title(dest))
 493:            HTAnchor_setTitle(dest, value[HTML_A_TITLE]);
 494:      }
 495:      UPDATE_STYLE;
1.4 timbl 496:      HText_beginAnchor(me->text, source);
1.58 frystyk 497:      HT_FREE(href);           /* Leak fix Henrik 17/02-94 */
1.2 timbl 498:    }
 499:    break;
 500:    
 501:   case HTML_TITLE:
1.56 frystyk 502:     HTChunk_clear(me->title);
1.2 timbl 503:    break;
 504:    
 505:   case HTML_NEXTID:
 506:    /* if (present[NEXTID_N] && value[NEXTID_N])
1.4 timbl 507:        HText_setNextId(me->text, atoi(value[NEXTID_N])); */
1.2 timbl 508:    break;
 509:    
 510:   case HTML_ISINDEX:
1.4 timbl 511:    HTAnchor_setIndex(me->node_anchor);
1.2 timbl 512:    break;
 513:    
1.15 timbl 514:   case HTML_BR: 
 515:    UPDATE_STYLE;
 516:    HText_appendCharacter(me->text, '\n');
 517:    me->in_word = NO;
 518:    break;
 519:    
 520:   case HTML_HR: 
 521:    UPDATE_STYLE;
 522:    HText_appendCharacter(me->text, '\n');
1.16 timbl 523:    HText_appendText(me->text, "___________________________________");
1.15 timbl 524:    HText_appendCharacter(me->text, '\n');
 525:    me->in_word = NO;
 526:    break;
 527:    
1.2 timbl 528:   case HTML_P:
 529:    UPDATE_STYLE;
1.4 timbl 530:    HText_appendParagraph(me->text);
 531:    me->in_word = NO;
1.2 timbl 532:    break;
 533: 
 534:   case HTML_DL:
1.11 timbl 535:     change_paragraph_style(me, present && present[DL_COMPACT]
1.16 timbl 536:        ? styles[HTML_DL]
1.2 timbl 537:        : styles[HTML_DL]);
 538:    break;
 539:    
 540:   case HTML_DT:
1.4 timbl 541:     if (!me->style_change) {
 542:      HText_appendParagraph(me->text);
 543:      me->in_word = NO;
1.2 timbl 544:    }
 545:    break;
 546:    
 547:   case HTML_DD:
 548:     UPDATE_STYLE;
1.62 ! frystyk 549:    HTML_put_character(me, TAB);  /* Just tab out one stop */
1.4 timbl 550:    me->in_word = NO;
 551:    break;
1.2 timbl 552: 
 553:   case HTML_UL:
 554:   case HTML_OL:
 555:   case HTML_MENU:
 556:   case HTML_DIR:
1.11 timbl 557:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 558:    break;
 559:    
 560:   case HTML_LI:
 561:     UPDATE_STYLE;
1.7 timbl 562:    if (me->sp[0].tag_number != HTML_DIR)
1.4 timbl 563:      HText_appendParagraph(me->text);
1.2 timbl 564:    else
1.62 ! frystyk 565:      HText_appendCharacter(me->text, TAB);
1.4 timbl 566:    me->in_word = NO;
1.2 timbl 567:    break;
 568:    
 569:   case HTML_LISTING:             /* Litteral text */
 570:   case HTML_XMP:
 571:   case HTML_PLAINTEXT:
 572:   case HTML_PRE:
1.11 timbl 573:    change_paragraph_style(me, styles[element_number]);
1.2 timbl 574:    UPDATE_STYLE;
1.4 timbl 575:    if (me->comment_end)
 576:      HText_appendText(me->text, me->comment_end);
1.2 timbl 577:    break;
1.11 timbl 578: 
1.23 frystyk 579:   case HTML_IMG:           /* Images */
 580:    {
 581:      HTChildAnchor *source;
 582:      char *src = NULL;
1.49 frystyk 583:      if (present[HTML_IMG_SRC])
1.23 frystyk 584:        StrAllocCopy(src, value[HTML_IMG_SRC]);
 585:      source = HTAnchor_findChildAndLink(
 586:                        me->node_anchor,  /* parent */
 587:                        0,           /* Tag */
 588:                        src ? src : 0,  /* Addresss */
 589:                        0);
 590:      UPDATE_STYLE;
 591:      HText_appendImage(me->text, source,
1.24 frystyk 592:           present[HTML_IMG_ALT] ? value[HTML_IMG_ALT] : NULL,
 593:           present[HTML_IMG_ALIGN] ? value[HTML_IMG_ALIGN] : NULL,
 594:           present[HTML_IMG_ISMAP] ? YES : NO);
1.58 frystyk 595:      HT_FREE(src);
1.24 frystyk 596:    }    
 597:    break;
 598: 
 599:   case HTML_HTML:          /* Ignore these altogether */
 600:   case HTML_HEAD:
 601:   case HTML_BODY:
1.62 ! frystyk 602:    break;
1.24 frystyk 603:   
1.10 timbl 604:   case HTML_TT:           /* Physical character highlighting */
 605:   case HTML_B:            /* Currently ignored */
 606:   case HTML_I:
 607:   case HTML_U:
1.62 ! frystyk 608:    UPDATE_STYLE;
 ! 609:    HText_appendCharacter(me->text, '_');
 ! 610:    me->in_word = NO;
 ! 611:    break;
1.10 timbl 612:   
 613:   case HTML_EM:           /* Logical character highlighting */
 614:   case HTML_STRONG:         /* Currently ignored */
 615:   case HTML_CODE:
 616:   case HTML_SAMP:
 617:   case HTML_KBD:
 618:   case HTML_VAR:
 619:   case HTML_DFN:
 620:   case HTML_CITE:
 621:    break;
 622:    
1.11 timbl 623:   case HTML_H1:           /* paragraph styles */
 624:   case HTML_H2:
 625:   case HTML_H3:
 626:   case HTML_H4:
 627:   case HTML_H5:
 628:   case HTML_H6:
 629:   case HTML_H7:
 630:   case HTML_ADDRESS:
 631:   case HTML_BLOCKQUOTE:
 632:    change_paragraph_style(me, styles[element_number]);   /* May be postponed */
1.2 timbl 633:    break;
 634: 
 635:   } /* end switch */
 636: 
1.16 timbl 637:   if (me->dtd->tags[element_number].contents!= SGML_EMPTY) {
1.13 timbl 638:     if (me->sp == me->stack) {
1.44 frystyk 639:      if (SGML_TRACE)
1.59 eric 640:        HTTrace("HTML........ Maximum nesting of %d exceded!\n",
1.44 frystyk 641:            MAX_NESTING); 
 642:      me->overflow++;
1.12 timbl 643:      return;
 644:    }
1.4 timbl 645:    --(me->sp);
 646:    me->sp[0].style = me->new_style;    /* Stack new style */
 647:    me->sp[0].tag_number = element_number;
1.10 timbl 648:   } 
1.1 timbl 649: }
1.10 timbl 650: 
1.2 timbl 651: 
1.1 timbl 652: /*       End Element
1.2 timbl 653: **       -----------
1.1 timbl 654: **
1.2 timbl 655: */
 656: /*   When we end an element, the style must be returned to that
1.1 timbl 657: **   in effect before that element. Note that anchors (etc?)
 658: **   don't have an associated style, so that we must scan down the
 659: **   stack for an element with a defined style. (In fact, the styles
 660: **   should be linked to the whole stack not just the top one.)
 661: **   TBL 921119
1.6 timbl 662: **
 663: **   We don't turn on "CAREFUL" check because the parser produces
 664: **   (internal code errors apart) good nesting. The parser checks
 665: **   incoming code errors, not this module.
1.1 timbl 666: */
1.53 frystyk 667: PRIVATE void HTML_end_element (HTStructured * me, int element_number)
1.1 timbl 668: {
1.2 timbl 669: #ifdef CAREFUL         /* parser assumed to produce good nesting */
1.4 timbl 670:   if (element_number != me->sp[0].tag_number) {
1.59 eric 671:     HTTrace("HTMLText: end of element %s when expecting end of %s\n",
1.16 timbl 672:        me->dtd->tags[element_number].name,
 673:        me->dtd->tags[me->sp->tag_number].name);
1.6 timbl 674:        /* panic */
1.1 timbl 675:   }
1.2 timbl 676: #endif
1.44 frystyk 677: 
 678:   /* HFN, If overflow of nestings, we need to get back to reality */
 679:   if (me->overflow > 0) {
 680:    me->overflow--;
 681:    return;
 682:   }
 683: 
1.4 timbl 684:   me->sp++;             /* Pop state off stack */
1.44 frystyk 685: 
1.2 timbl 686:   switch(element_number) {
 687: 
 688:   case HTML_A:
 689:    UPDATE_STYLE;
1.4 timbl 690:    HText_endAnchor(me->text);
1.2 timbl 691:    break;
 692: 
 693:   case HTML_TITLE:
1.56 frystyk 694:    HTAnchor_setTitle(me->node_anchor, HTChunk_data(me->title));
1.2 timbl 695:    break;
 696:    
1.62 ! frystyk 697:   case HTML_TT:           /* Physical character highlighting */
 ! 698:   case HTML_B:            /* Currently ignored */
 ! 699:   case HTML_I:
 ! 700:   case HTML_U:
 ! 701:    UPDATE_STYLE;
 ! 702:    HText_appendCharacter(me->text, '_');
 ! 703:    break;
 ! 704:   
1.2 timbl 705:   case HTML_LISTING:             /* Litteral text */
 706:   case HTML_XMP:
 707:   case HTML_PLAINTEXT:
 708:   case HTML_PRE:
1.4 timbl 709:    if (me->comment_start)
 710:      HText_appendText(me->text, me->comment_start);
1.2 timbl 711:    /* Fall through */
 712:    
 713:   default:
1.44 frystyk 714: 
 715:    /* Often won't really change */
 716:    change_paragraph_style(me, me->sp->style);
1.2 timbl 717:    break;
 718:    
 719:   } /* switch */
1.1 timbl 720: }
 721: 
1.2 timbl 722: 
 723: /*       Expanding entities
 724: **       ------------------
 725: */
 726: /*   (In fact, they all shrink!)
1.1 timbl 727: */
1.2 timbl 728: 
1.53 frystyk 729: PRIVATE void HTML_put_entity (HTStructured * me, int entity_number)
1.1 timbl 730: {
1.4 timbl 731:   HTML_put_string(me, ISO_Latin1[entity_number]);  /* @@ Other representations */
1.1 timbl 732: }
1.2 timbl 733: 
1.42 frystyk 734: /*   Flush an HTML object
 735: **   --------------------
 736: */
1.53 frystyk 737: PUBLIC int HTML_flush (HTStructured * me)
1.42 frystyk 738: {
 739:   UPDATE_STYLE;              /* Creates empty document here! */
1.57 frystyk 740:   if (me->comment_end) HTML_put_string(me,me->comment_end);
 741:   return me->target ? (*me->targetClass.flush)(me->target) : HT_OK;
1.42 frystyk 742: }
1.2 timbl 743: 
 744: /*   Free an HTML object
 745: **   -------------------
 746: **
1.4 timbl 747: ** If the document is empty, the text object will not yet exist.
 748:  So we could in fact abandon creating the document and return
 749:  an error code. In fact an empty document is an important type
 750:  of document, so we don't.
 751: **
 752: **   If non-interactive, everything is freed off.  No: crashes -listrefs
1.2 timbl 753: **   Otherwise, the interactive object is left.   
 754: */
1.53 frystyk 755: PUBLIC int HTML_free (HTStructured * me)
1.1 timbl 756: {
1.4 timbl 757:   UPDATE_STYLE;       /* Creates empty document here! */
 758:   if (me->comment_end)
 759:        HTML_put_string(me,me->comment_end);
 760:   HText_endAppend(me->text);
 761: 
 762:   if (me->target) {
1.35 duns 763:     (*me->targetClass._free)(me->target);
1.2 timbl 764:   }
1.56 frystyk 765:   HTChunk_delete(me->title);
1.58 frystyk 766:   HT_FREE(me);
1.42 frystyk 767:   return HT_OK;
1.1 timbl 768: }
 769: 
 770: 
1.53 frystyk 771: PRIVATE int HTML_abort (HTStructured * me, HTList * e)
1.1 timbl 772: 
1.14 timbl 773: {
 774:   if (me->target) {
 775:     (*me->targetClass.abort)(me->target, e);
 776:   }
1.56 frystyk 777:   HTChunk_delete(me->title);
1.58 frystyk 778:   HT_FREE(me);
1.42 frystyk 779:   return HT_ERROR;
1.1 timbl 780: }
 781: 
1.2 timbl 782: 
 783: /*   Get Styles from style sheet
 784: **   ---------------------------
 785: */
1.53 frystyk 786: PRIVATE void get_styles (void)
1.1 timbl 787: {
1.2 timbl 788:   got_styles = YES;
 789:   
 790:   default_style =      HTStyleNamed(styleSheet, "Normal");
1.1 timbl 791: 
1.2 timbl 792:   styles[HTML_H1] =     HTStyleNamed(styleSheet, "Heading1");
 793:   styles[HTML_H2] =     HTStyleNamed(styleSheet, "Heading2");
 794:   styles[HTML_H3] =     HTStyleNamed(styleSheet, "Heading3");
 795:   styles[HTML_H4] =     HTStyleNamed(styleSheet, "Heading4");
 796:   styles[HTML_H5] =     HTStyleNamed(styleSheet, "Heading5");
 797:   styles[HTML_H6] =     HTStyleNamed(styleSheet, "Heading6");
 798:   styles[HTML_H7] =     HTStyleNamed(styleSheet, "Heading7");
 799: 
 800:   styles[HTML_DL] =     HTStyleNamed(styleSheet, "Glossary");
 801:   styles[HTML_UL] =
 802:   styles[HTML_OL] =     HTStyleNamed(styleSheet, "List");
 803:   styles[HTML_MENU] =        HTStyleNamed(styleSheet, "Menu");
 804:   styles[HTML_DIR] =     HTStyleNamed(styleSheet, "Dir");  
1.16 timbl 805: /* styles[HTML_DLC] =     HTStyleNamed(styleSheet, "GlossaryCompact"); */
1.2 timbl 806:   styles[HTML_ADDRESS]=   HTStyleNamed(styleSheet, "Address");
 807:   styles[HTML_BLOCKQUOTE]=  HTStyleNamed(styleSheet, "BlockQuote");
 808:   styles[HTML_PLAINTEXT] =
 809:   styles[HTML_XMP] =     HTStyleNamed(styleSheet, "Example");
 810:   styles[HTML_PRE] =     HTStyleNamed(styleSheet, "Preformatted");
 811:   styles[HTML_LISTING] =   HTStyleNamed(styleSheet, "Listing");
 812: }
 813: /*               P U B L I C
 814: */
 815: 
 816: /*   Structured Object Class
 817: **   -----------------------
 818: */
1.60 frystyk 819: PRIVATE const HTStructuredClass HTMLPresentation = /* As opposed to print etc */
1.2 timbl 820: {       
 821:    "text/html",
1.42 frystyk 822:    HTML_flush,
1.2 timbl 823:    HTML_free,
1.14 timbl 824:    HTML_abort,
1.2 timbl 825:    HTML_put_character,   HTML_put_string, HTML_write,
 826:    HTML_start_element,   HTML_end_element,
 827:    HTML_put_entity
 828: }; 
1.1 timbl 829: 
1.4 timbl 830: 
1.2 timbl 831: /*       New Structured Text object
 832: **       --------------------------
 833: **
1.16 timbl 834: **   The structured stream can generate either presentation,
1.4 timbl 835: **   or plain text, or HTML.
1.1 timbl 836: */
1.53 frystyk 837: PRIVATE HTStructured* HTML_new (HTRequest *  request,
 838:                   void *       param,
 839:                   HTFormat      input_format,
 840:                   HTFormat      output_format,
 841:                   HTStream * output_stream)
1.1 timbl 842: {
 843: 
1.4 timbl 844:   HTStructured * me;
 845:   
1.47 frystyk 846: #if 0
1.16 timbl 847:   if (output_format != WWW_PLAINTEXT
 848:    && output_format != WWW_PRESENT
 849:    && output_format != HTAtom_for("text/x-c")) {
1.37 frystyk 850:     HTStream * intermediate = HTStreamStack(WWW_HTML, output_format,
 851:                        output_stream, request, NO);
1.6 timbl 852:    if (intermediate) return HTMLGenerator(intermediate);
1.44 frystyk 853:    if (SGML_TRACE)
1.59 eric 854:      HTTrace("HTML........ Can't parse HTML to %s\n",
1.44 frystyk 855:          HTAtom_name(output_format));
1.4 timbl 856:    exit (-99);
 857:   }
1.47 frystyk 858: #endif
1.4 timbl 859: 
1.58 frystyk 860:   if ((me = (HTStructured *) HT_CALLOC(1, sizeof(*me))) == NULL)
 861:     HT_OUTOFMEM("HTML_new");
1.1 timbl 862: 
 863:   if (!got_styles) get_styles();
 864: 
1.4 timbl 865:   me->isa = &HTMLPresentation;
1.47 frystyk 866:   me->dtd = &HTMLP_dtd;
1.54 frystyk 867:   me->request = request;
1.48 frystyk 868:   me->node_anchor = HTRequest_anchor(request);
1.56 frystyk 869:   me->title = HTChunk_new(128);
1.4 timbl 870:   me->text = 0;
 871:   me->style_change = YES; /* Force check leading to text creation */
 872:   me->new_style = default_style;
 873:   me->old_style = 0;
 874:   me->sp = me->stack + MAX_NESTING - 1;
 875:   me->sp->tag_number = -1;              /* INVALID */
 876:   me->sp->style = default_style;           /* INVALID */
1.1 timbl 877:   
1.4 timbl 878:   me->comment_start = NULL;
 879:   me->comment_end = NULL;
1.16 timbl 880:   me->target = output_stream;
 881:   if (output_stream) me->targetClass = *output_stream->isa; /* Copy pointers */
1.1 timbl 882:   
1.4 timbl 883:   return (HTStructured*) me;
1.1 timbl 884: }
 885: 
 886: 
1.2 timbl 887: /*   HTConverter for HTML to plain text
 888: **   ----------------------------------
1.1 timbl 889: **
1.2 timbl 890: **   This will convert from HTML to presentation or plain text.
1.1 timbl 891: */
1.53 frystyk 892: PUBLIC HTStream* HTMLToPlain (
 893:    HTRequest *       request,
 894:    void *         param,
 895:    HTFormat        input_format,
 896:    HTFormat        output_format,
 897:    HTStream *       output_stream)
1.1 timbl 898: {
1.47 frystyk 899:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 900:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 901: }
 902: 
 903: 
1.2 timbl 904: /*   HTConverter for HTML to C code
 905: **   ------------------------------
 906: **
1.36 frystyk 907: **   C code is like plain text but all non-preformatted code
1.2 timbl 908: **   is commented out.
 909: **   This will convert from HTML to presentation or plain text.
 910: */
1.53 frystyk 911: PUBLIC HTStream* HTMLToC (
 912:    HTRequest *       request,
 913:    void *         param,
 914:    HTFormat        input_format,
 915:    HTFormat        output_format,
 916:    HTStream *       output_stream)
1.1 timbl 917: {
1.4 timbl 918:   
 919:   HTStructured * html;
 920:   
1.36 frystyk 921:   (*output_stream->isa->put_string)(output_stream, "/* "); /* Before title */
1.16 timbl 922:   html = HTML_new(request, NULL, input_format, output_format, output_stream);
1.45 frystyk 923:   html->comment_start = "\n/* ";
1.47 frystyk 924:   html->dtd = &HTMLP_dtd;
1.2 timbl 925:   html->comment_end = " */\n";    /* Must start in col 1 for cpp */
1.47 frystyk 926:   return SGML_new(&HTMLP_dtd, html);
1.1 timbl 927: }
 928: 
 929: 
1.2 timbl 930: /*   Presenter for HTML
 931: **   ------------------
 932: **
 933: **   This will convert from HTML to presentation or plain text.
 934: **
 935: **   Override this if you have a windows version
1.1 timbl 936: */
1.2 timbl 937: #ifndef GUI
1.53 frystyk 938: PUBLIC HTStream* HTMLPresent (
 939:    HTRequest *       request,
 940:    void *         param,
 941:    HTFormat        input_format,
 942:    HTFormat        output_format,
 943:    HTStream *       output_stream)
1.1 timbl 944: {
1.47 frystyk 945:   return SGML_new(&HTMLP_dtd, HTML_new(
1.16 timbl 946:    request, NULL, input_format, output_format, output_stream));
1.1 timbl 947: }
1.2 timbl 948: #endif
1.29 frystyk 949: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /