[BACK] Return to HTTeXGen.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTeXGen.c, revision 2.2

2.1 frystyk 1: /* Simple LaTeX Generator that converts in a 1:1 manner from HTML to LaTeX
 2: ** =======================================================================
 3: **
 4: **   This version of the HTML object sends LaTeX to the output stream.
 5: **   No attributes are considered in the translation!
 6: **   The module uses simple 1:1 table-conversions, but this COULD be
 7: **   expanded to a stack-machine. This would then be in start_element and
 8: **   end_element...
 9: **                       Henrik 07/03-94
 10: */
 11: 
 12: #define BUFFER_SIZE  80   /* Line buffer attempts to make neat breaks */
 13: #define WORD_DELIMITERS ",/;:\"[]()"
 14: 
 15: /* Implements: */
 16: #include "HTTeXGen.h"
 17: #include <stdio.h>
 18: #include "HTMLPDTD.h"
 19: #include "HTStream.h"
 20: #include "SGML.h"
 21: #include "HTFormat.h"
 22: #include "tcp.h"
 23: 
 24: 
 25: /*       HTML Object
 26: **       -----------
 27: */
 28: 
 29: struct _HTStream {
 30:    CONST HTStreamClass *      isa;  
 31:    HTStream *           target;
 32:    HTStreamClass          targetClass;    /* COPY for speed */
 33: };
 34: 
 35: struct _HTStructured {
 36:    CONST HTStructuredClass *    isa;
 37:    HTStream *           target;
 38:    HTStreamClass          targetClass;    /* COPY for speed */
 39:    CONST SGML_dtd *        dtd;
 40:    
 41:    char              buffer[BUFFER_SIZE+20]; /* Needed!! */
 42:    char *             write_pointer;
 43:    char *             line_break;
 44:    BOOL              sensitive;     /* Can we put \n */
 45:    BOOL              preformatted;   /* Is it verbatim? */
 46:    BOOL              markup;   /* If doing LaTeX markup */
 47:    BOOL              startup;   /* To skip MIME header */
 48: };
 49: 
 50: PRIVATE char *TeX_names[][2] = {
 51:   { "",       ""       },   /* HTML_A        */
 52:   { "",       ""       },   /* HTML_ABBREV     */
 53:   { "\n\\begin{abstract}\n","\n\\end{abstract}\n"}, /* HTML_ABSTRACT    */
 54:   { "",       ""       },   /* HTML_ACRONYM     */
 55:   { "",       ""       },   /* HTML_ADDED      */
 56:   { "{\\it ",        "}"       },   /* HTML_ADDRESS     */
 57:   { "",       ""       },   /* HTML_ARG       */
 58:   { "{\\bf ",        "}"       },   /* HTML_B        */
 59:   { "",       ""       },   /* HTML_BASE      */
 60:   { "{\\sf ",        "}"       },   /* HTML_BLOCKQUOTE   */
 61:   { "",       ""       },   /* HTML_BODY      */
 62:   { "",       ""       },   /* HTML_BOX       */
 63:   { "",       ""       },   /* HTML_BR       */
 64:   { "",       ""       },   /* HTML_BYLINE     */
 65:   { "",       ""       },   /* HTML_CAPTION     */
 66:   { "",       ""       },   /* HTML_CHANGED     */
 67:   { "\\cite{",    "}"       },   /* HTML_CITE      */
 68:   { "",       ""       },   /* HTML_CMD       */
 69:   { "{\\tt ",        "}"       },   /* HTML_CODE      */
 70:   { "\n\\typeout{", "}\n"      },   /* HTML_COMMENT     */
 71:   { "]",       ""       },   /* HTML_DD       */
 72:   { "",       ""       },   /* HTML_DFN       */
 73:   { "",       ""       },   /* HTML_DIR       */
 74:   { "\n\\begin{description}","\n\\end{description}\n"}, /* HTML_DL  */
 75:   { "\n\\item[",   ""       },   /* HTML_DT       */
 76:   { "{\\em ",        "}"       },   /* HTML_EM       */
 77:   { "",   ""           },   /* HTML_FIG       */
 78:   { "\n\\footnote{", "}\n"      },   /* HTML_FOOTNOTE    */
 79:   { "",   ""           },   /* HTML_FORM      */
 80:   { "\n\\chapter{", "}\n"      },   /* HTML_H1       */
 81:   { "\n\\section{", "}\n"      },   /* HTML_H2       */
 82:   { "\n\\subsection{","}\n"         },   /* HTML_H3       */
 83:   { "\n\\subsubsection{","}\n"    },   /* HTML_H4       */
 84:   { "\n\\paragraph{",    "}\n"      },   /* HTML_H5       */
 85:   { "\n\\subparagraph{","}\n"        },   /* HTML_H6       */
 86:   { "",       "\n"      },   /* HTML_H7       */
 87:   { "",       ""       },   /* HTML_HEAD      */
 88:   { "",       ""       },   /* HTML_HR       */
 89:   { "",       ""       },   /* HTML_HTML      */
 90:   { ""        ""       },   /* HTML_HTMLPLUS    */
 91:   { "{\\it ",        "}"       },   /* HTML_I        */
 92:   { "",       ""       },   /* HTML_IMAGE      */
 93:   { "",       ""       },   /* HTML_IMG       */
 94:   { "",       ""       },   /* HTML_INPUT      */
 95:   { "",       ""       },   /* HTML_ISINDEX     */
 96:   { "{\\tt ",        "}"       },   /* HTML_KBD       */
 97:   { "",       ""       },   /* HTML_L        */
 98:   { "\n\\item ",   ""       },   /* HTML_LI       */
 99:   { "",       ""       },   /* HTML_LINK      */
 100:   { "",       ""       },   /* HTML_LISTING     */
 101:   { "",       ""       },   /* HTML_LIT       */
 102:   { "",       ""       },   /* HTML_MARGIN     */
 103:   { "",       ""       },   /* HTML_MATH      */
 104:   { "",       ""       },   /* HTML_MENU      */
 105:   { "",       ""       },   /* HTML_NEXTID     */
 106:   { "",       ""       },   /* HTML_NOTE      */
 107:   { "\n\\begin{enumerate}\n","\n\\end{enumerate}\n"}, /* HTML_OL       */
 108:   { "",       ""       },   /* HTML_OPTION     */
 109:   { "",       ""       },   /* HTML_OVER      */
 110:   { "\n\n",     ""       },   /* HTML_P        */
 111:   { "",       ""       },   /* HTML_PERSON     */
 112:   { "",       ""       },   /* HTML_PLAINTEXT    */
 113:   { "\n\\begin{verbatim}"," \\end{verbatim}\n"}, /* HTML_PRE    */
 114:   { "",       ""       },   /* HTML_Q        */
 115:   { "\\begin{quote}",    "\\end{quote}"},    /* HTML_QUOTE      */
 116:   { "",       ""       },   /* HTML_RENDER     */
 117:   { "",       ""       },   /* HTML_REMOVED     */
 118:   { "",       ""       },   /* HTML_S        */
 119:   { "",       ""       },   /* HTML_SAMP      */
 120:   { "",       ""       },   /* HTML_SELECT     */
 121:   { "{\\bf ",        "}"       },   /* HTML_STRONG     */
 122:   { "",       ""       },   /* HTML_SUB       */
 123:   { "",       ""       },   /* HTML_SUP       */
 124:   { "",       ""       },   /* HTML_TAB       */
 125:   { "",       ""       },   /* HTML_TABLE      */
 126:   { "",       ""       },   /* HTML_TD       */
 127:   { "",       ""       },   /* HTML_TEXTAREA    */
 128:   { "",       ""       },   /* HTML_TH       */
 129:   { "\n\\title{",  "}\n\\author{}\n\\maketitle\n"}, /* HTML_TITLE */
 130:   { "",       ""       },   /* HTML_TR       */
 131:   { "",       ""       },   /* HTML_TT       */
 132:   { "",       ""       },   /* HTML_U        */
 133:   { "\n\\begin{itemize}","\n\\end{itemize}\n"},  /* HTML_UL     */
 134:   { "",       ""       },   /* HTML_VAR       */
 135:   { "{\\sf ",        "}"       }    /* HTML_XMP       */
 136: };
 137: 
 138: PRIVATE char *TeX_entities[] = {
 139:   "\\AE ",      /*"AElig",    capital AE diphthong (ligature) */ 
2.2 ! frystyk 140:   "\\\'{A}",     /*"Aacute",   capital A, acute accent */ 
 ! 141:   "\\^{A}",     /*"Acirc",    capital A, circumflex accent */ 
 ! 142:   "\\`{A}",     /*"Agrave",   capital A, grave accent */ 
 ! 143:   "\\AA",      /*"Aring",    capital A, ring */ 
 ! 144:   "\\~{A}",     /*"Atilde",   capital A, tilde */ 
 ! 145:   "\\\"{A}",     /*"Auml",    capital A, dieresis or umlaut mark */ 
 ! 146:   "\\c{C}",     /*"Ccedil",   capital C, cedilla */ 
 ! 147:   "\\OE ",      /*"ETH",     capital Eth, Icelandic */ 
 ! 148:   "\\\'{E}",     /*"Eacute",   capital E, acute accent */ 
 ! 149:   "\\^{E}",     /*"Ecirc",    capital E, circumflex accent */ 
 ! 150:   "\\`{E}",     /*"Egrave",   capital E, grave accent */ 
 ! 151:   "\\\"{E}",     /*"Euml",    capital E, dieresis or umlaut mark */ 
 ! 152:   "\\\'{I}",     /*"Iacute",   capital I, acute accent */ 
 ! 153:   "\\^{I}",     /*"Icirc",    capital I, circumflex accent */ 
 ! 154:   "\\`{I}",     /*"Igrave",   capital I, grave accent */ 
 ! 155:   "\\\"{I}",     /*"Iuml",    capital I, dieresis or umlaut mark */ 
 ! 156:   "\\~{N}",     /*"Ntilde",   capital N, tilde */ 
 ! 157:   "\\\'{O}",     /*"Oacute",   capital O, acute accent */ 
 ! 158:   "\\^{O}",     /*"Ocirc",    capital O, circumflex accent */ 
 ! 159:   "\\`{O}",     /*"Ograve",   capital O, grave accent */ 
2.1 frystyk 160:   "\\O ",          /*"Oslash",   capital O, slash */ 
2.2 ! frystyk 161:   "\\~{O}",     /*"Otilde",   capital O, tilde */ 
 ! 162:   "\\\"{O}",     /*"Ouml",    capital O, dieresis or umlaut mark */ 
 ! 163:   " ",    /*"THORN",    capital THORN, Icelandic */ 
 ! 164:   "\\\'{U}",     /*"Uacute",   capital U, acute accent */ 
 ! 165:   "\\^{U}",     /*"Ucirc",    capital U, circumflex accent */ 
 ! 166:   "\\`{U}",     /*"Ugrave",   capital U, grave accent */ 
 ! 167:   "\\\"{U}",     /*"Uuml",    capital U, dieresis or umlaut mark */ 
 ! 168:   "\\\'{Y}",     /*"Yacute",   capital Y, acute accent */ 
 ! 169:   "\\\'{a}",     /*"aacute",   small a, acute accent */ 
 ! 170:   "\\^{a}",     /*"acirc",    small a, circumflex accent */ 
2.1 frystyk 171:   "\\ae ",      /*"aelig",    small ae diphthong (ligature) */ 
2.2 ! frystyk 172:   "\\`{a}",     /*"agrave",   small a, grave accent */ 
2.1 frystyk 173:   "&",        /*"amp",     ampersand */ 
 174:   "\\aa ",      /*"aring",    small a, ring */ 
2.2 ! frystyk 175:   "\\~{a}",     /*"atilde",   small a, tilde */ 
 ! 176:   "\\\"{a}",     /*"auml",    small a, dieresis or umlaut mark */ 
 ! 177:   "\\c{c}",     /*"ccedil",   small c, cedilla */ 
 ! 178:   "\\\'{e}",     /*"eacute",   small e, acute accent */ 
 ! 179:   "\\^{c}",     /*"ecirc",    small e, circumflex accent */ 
 ! 180:   "\\`{c}",     /*"egrave",   small e, grave accent */ 
 ! 181:   "\\oe ",      /*"eth",     small eth, Icelandic */ 
 ! 182:   "\\\"{e}",     /*"euml",    small e, dieresis or umlaut mark */ 
2.1 frystyk 183:   ">",        /*"gt", greater than */ 
2.2 ! frystyk 184:   "\\\'{\\i}",    /*"iacute",   small i, acute accent */ 
 ! 185:   "\\^{\\i}",    /*"icirc",    small i, circumflex accent */ 
 ! 186:   "\\`{\\i}",    /*"igrave",   small i, grave accent */ 
 ! 187:   "\\\"{\\i}",    /*"iuml",    small i, dieresis or umlaut mark */ 
2.1 frystyk 188:   "<",        /*"lt", less than */ 
2.2 ! frystyk 189:   "\\~{n}",     /*"ntilde",   small n, tilde */ 
 ! 190:   "\\\'{o}",     /*"oacute",   small o, acute accent */ 
 ! 191:   "\\~{o}",     /*"ocirc",    small o, circumflex accent */ 
 ! 192:   "\\`{o}",     /*"ograve",   small o, grave accent */ 
2.1 frystyk 193:   "\\o ",          /*"oslash",   small o, slash */ 
2.2 ! frystyk 194:   "\\~{o}",     /*"otilde",   small o, tilde */ 
 ! 195:   "\\\"{o}",     /*"ouml",    small o, dieresis or umlaut mark */ 
2.1 frystyk 196:   "\\ss ",      /*"szlig",    small sharp s, German (sz ligature)*/ 
2.2 ! frystyk 197:   " ",        /*"thorn",    small thorn, Icelandic */ 
 ! 198:   "\\\'{u}",     /*"uacute",   small u, acute accent */ 
 ! 199:   "\\^{u}",     /*"ucirc",    small u, circumflex accent */ 
 ! 200:   "\\`{u}",     /*"ugrave",   small u, grave accent */ 
 ! 201:   "\\\"{u}",     /*"uuml",    small u, dieresis or umlaut mark */ 
 ! 202:   "\\\'{y}",     /*"yacute",   small y, acute accent */ 
 ! 203:   "\\\"{y}"     /*"yuml",    small y, dieresis or umlaut mark */ 
2.1 frystyk 204: };
 205: 
 206: 
 207: /*   Flush Buffer
 208: **   ------------
 209: */
 210: PRIVATE void HTTeXGen_flush ARGS1(HTStructured *, me)
 211: {
 212:   (*me->targetClass.put_block)(me->target, 
 213:                 me->buffer,
 214:                 me->write_pointer - me->buffer);
 215:   me->write_pointer = me->buffer;
 216:   me->line_break = me->buffer;
 217: }
 218: 
 219: 
 220: /*   Character handling
 221: **   ------------------
 222: **
 223: */
 224: PRIVATE void HTTeXGen_put_character ARGS2(HTStructured *, me, char, c)
 225: {
 226:   if (!me->startup)                /* To skip MIME header */
 227:    return;
 228:   if (c=='\n') {
 229:    if (me->markup || me->preformatted) {   /* Put out as is and flush */
 230:      *me->write_pointer++ = c;
 231:      HTTeXGen_flush(me);
 232:      return;
 233:    } else if (me->sensitive || *(me->write_pointer-1)==' ') {
 234:      return;
 235:     } else
 236:      *me->write_pointer++ = ' ';        /* Try to pretty print */
 237:   } else if (me->markup || me->preformatted) {
 238:    *me->write_pointer++ = c;
 239:   } else if (c==' ' || c=='\t') {         /* Skip space and tabs */
 240:    if (*(me->write_pointer-1) != ' ')
 241:      *me->write_pointer++ = ' ';
 242:    else
 243:      return;
 244:   } else {
 245:    if (c=='$' || c=='&' || c=='%' || c=='#' ||     /* Special chars */
 246:      c=='{' || c=='}' || c=='_') {
 247:      *me->write_pointer++ = '\\';
 248:      *me->write_pointer++ = c;
 249:    } else if (c=='\\') {                /* Special names */
 250:      char *temp = "$\\backslash$";
 251:      strcpy(me->write_pointer, temp);
 252:      me->write_pointer += strlen(temp);
 253:    } else if (c=='^') {
 254:      char *temp = "$\\hat{ }$";
 255:      strcpy(me->write_pointer, temp);
 256:      me->write_pointer += strlen(temp);   
 257:    } else if (c=='~') {
 258:      char *temp = "$\\tilde{ }$";
 259:      strcpy(me->write_pointer, temp);
 260:      me->write_pointer += strlen(temp);
 261:    } else if (c=='|' || c=='<' || c=='>') {        /* Math mode */
 262:      *me->write_pointer++ = '$';
 263:      *me->write_pointer++ = c;
 264:      *me->write_pointer++ = '$';
 265:    } else
 266:      *me->write_pointer++ = c;          /* Char seems normal */
 267:   }
 268: 
 269:   if (c==' ')                          /* Find deliniter */
 270:    me->line_break = me->write_pointer;
 271:   else if (strchr(WORD_DELIMITERS, c))
 272:    me->line_break = me->write_pointer-1;
 273: 
 274:   /* Flush buffer out when full */
 275:   if (me->write_pointer >= me->buffer+BUFFER_SIZE-3) {
 276:    if (me->markup || me->preformatted) {
 277:      *me->write_pointer = '\n';
 278:      (*me->targetClass.put_block)(me->target,
 279:                     me->buffer,
 280:                     me->write_pointer-me->buffer+1);
 281:      me->write_pointer = me->buffer;
 282:    } else {                     /* Use break-point */
 283:      char line_break_char = *me->line_break;
 284:      char *saved = me->line_break;
 285:      *me->line_break = '\n';
 286:      (*me->targetClass.put_block)(me->target,
 287:                     me->buffer,
 288:                     me->line_break-me->buffer+1);
 289:      *me->line_break = line_break_char;
 290:      {                      /* move next line in */
 291:        char *p = saved;
 292:        char *q;
 293:        for(q=me->buffer; p<me->write_pointer; )
 294:          *q++ = *p++;
 295:      }
 296:      me->write_pointer = me->buffer + (me->write_pointer-saved);
 297:    }      
 298:    me->line_break = me->buffer;
 299:   }
 300: }
 301: 
 302: 
 303: 
 304: /*   String handling
 305: **   ---------------
 306: */
 307: PRIVATE void HTTeXGen_put_string ARGS2(HTStructured *, me, CONST char*, s)
 308: {
 309:   CONST char * p;
 310:   for (p=s; *p; p++)
 311:    HTTeXGen_put_character(me, *p);
 312: }
 313: 
 314: 
 315: PRIVATE void HTTeXGen_write ARGS3(HTStructured *, me, CONST char*, s, int, l)
 316: {
 317:   CONST char * p;
 318:   for(p=s; p<s+l; p++)
 319:    HTTeXGen_put_character(me, *p);
 320: }
 321: 
 322: 
 323: /*   Start Element
 324: **   -------------
 325: **
 326: **       No attributes are put to the output       Henrik 07/03-94
 327: **   Does no assumptions of WHAT element is started...
 328: */
 329: PRIVATE void HTTeXGen_start_element ARGS4(
 330:    HTStructured *,     me,
 331:    int,          element_number,
 332:    CONST BOOL*,      present,
 333:    CONST char **,     value)
 334: {
 335:   me->startup = YES;             /* Now, let's get down to it */
 336:   if (me->preformatted == YES)       /* Don't start markup in here */
 337:    return;
 338:   if (element_number == HTML_PRE)
 339:    me->preformatted = YES;
 340:   if (element_number == HTML_CITE ||        /* No \n here, please! */
 341:    element_number == HTML_COMMENT ||
 342:    element_number == HTML_DT ||
 343:    element_number == HTML_H1 ||
 344:    element_number == HTML_H2 ||
 345:    element_number == HTML_H3 ||
 346:    element_number == HTML_H4 ||
 347:    element_number == HTML_H5 ||
 348:    element_number == HTML_H6 ||
 349:    element_number == HTML_H7 ||
 350:    element_number == HTML_TITLE)
 351:    me->sensitive = YES;
 352:   else if (element_number == HTML_DD)     /* Only way to turn <DT> off */
 353:    me->sensitive = NO;
 354:   me->markup = element_number == HTML_A ? NO : YES;
 355:   HTTeXGen_put_string(me, *TeX_names[element_number]);
 356:   me->markup = NO;
 357: }
 358: 
 359: 
 360: /*       End Element
 361: **       -----------
 362: **
 363: **   Ends an markup element         Henrik 07/03-94
 364: **   Does no assumptions of WHAT element is ended...
 365: */
 366: PRIVATE void HTTeXGen_end_element ARGS2(HTStructured *, me,
 367:                    int , element_number)
 368: {
 369:   if (me->preformatted && element_number != HTML_PRE)
 370:    return;         
 371:   me->preformatted = NO;
 372:   me->markup = YES;
 373:   HTTeXGen_put_string(me, *(TeX_names[element_number]+1));
 374:   me->markup = NO;
 375:   if (element_number == HTML_CITE || 
 376:    element_number == HTML_COMMENT ||
 377:    element_number == HTML_DL ||
 378:    element_number == HTML_H1 ||
 379:    element_number == HTML_H2 ||
 380:    element_number == HTML_H3 ||
 381:    element_number == HTML_H4 ||
 382:    element_number == HTML_H5 ||
 383:    element_number == HTML_H6 ||
 384:    element_number == HTML_H7 ||
 385:    element_number == HTML_TITLE)
 386:    me->sensitive = NO;
 387: }
 388: 
 389: 
 390: /*       Expanding entities
 391: **       ------------------
 392: **
 393: */
 394: PRIVATE void HTTeXGen_put_entity ARGS2(HTStructured *, me, int, entity_number)
 395: {
 396:   BOOL mark = me->markup;
 397:   if (*TeX_entities[entity_number] != '&' && /* Theese are converted later */
 398:    *TeX_entities[entity_number] != '<' &&
 399:    *TeX_entities[entity_number] != '>')
 400:    me->markup = YES;
 401:   HTTeXGen_put_string(me, TeX_entities[entity_number]);
 402:   me->markup = mark;
 403: }
 404: 
 405: 
 406: 
 407: /*   Free an HTML object
 408: **   -------------------
 409: **
 410: */
 411: PRIVATE void HTTeXGen_free ARGS1(HTStructured *, me)
 412: {
 413:   HTTeXGen_flush(me);
 414:   (*me->targetClass.put_string)(me->target, "\n\\end{document}\n");
 415:   HTTeXGen_flush(me);
 416:   (*me->targetClass.free)(me->target);    /* ripple through */
 417:   free(me);
 418: }
 419: 
 420: 
 421: PRIVATE void HTTeXGen_abort ARGS2(HTStructured *, me, HTError, e)
 422: {
 423:   HTTeXGen_free(me);
 424: }
 425: 
 426: 
 427: /*   Structured Object Class
 428: **   -----------------------
 429: */
 430: PRIVATE CONST HTStructuredClass HTTeXGeneration = /* As opposed to print etc */
 431: {       
 432:    "HTMLToTeX",
 433:    HTTeXGen_free,
 434:    HTTeXGen_abort,
 435:    HTTeXGen_put_character,     HTTeXGen_put_string,  HTTeXGen_write,
 436:    HTTeXGen_start_element,     HTTeXGen_end_element,
 437:    HTTeXGen_put_entity
 438: }; 
 439: 
 440: 
 441: /*   HTConverter from HTML to TeX Stream
 442: **   ------------------------------------------
 443: **
 444: */
 445: PUBLIC HTStream* HTMLToTeX ARGS5(
 446:    HTRequest *,      request,
 447:    void *,         param,
 448:    HTFormat,        input_format,
 449:    HTFormat,        output_format,
 450:    HTStream *,       output_stream)
 451: {
 452:   HTStructured* me = (HTStructured*) calloc(1, sizeof(*me));
 453:   if (me == NULL) outofmem(__FILE__, "HTMLToTeX");  
 454: 
 455:   me->isa = (HTStructuredClass*) &HTTeXGeneration;
 456:   me->dtd = &HTMLP_dtd;
 457:   me->target = output_stream;
 458:   me->targetClass = *me->target->isa;/* Copy pointers to routines for speed*/
 459:   me->write_pointer = me->buffer;
 460:   me->line_break =  me->buffer;
 461:   (*me->targetClass.put_string)(me->target,
 462:     "\\documentstyle[11pt]{report}\n\\begin{document}\n");
 463:   return SGML_new(&HTMLP_dtd, me);
 464: }
 465: 
 466: 
 467: /* END OF FILE HTTeXGen.c */
 468: 
 469: 
 470: 
 471: 
 472: 
 473: 
 474: 
 475: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /