diff --git a/src/.cvsignore b/src/.cvsignore index ab0c685..b5caf54 100644 --- a/src/.cvsignore +++ b/src/.cvsignore @@ -26,3 +26,4 @@ cpp2html.cc.html cpp2html.h.html src-hilite-lesspipe.sh libsh.a +check-regexp
\ No newline at end of file diff --git a/src/Makefile.am b/src/Makefile.am index e6ca190..ffed6b7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 1999, 2000, Lorenzo Bettini <http://www.lorenzobettini.it> +# Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it> # # This file is free software; as a special exception the author gives # unlimited permission to copy and/or distribute it, with or without @@ -8,10 +8,24 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -bin_PROGRAMS = source-highlight +SUFFIXES = .ggo + +if NO_GENGETOPT +.ggo.c: + touch $@ +else + +check-regexp_cmd.c: $(srcdir)/check-regexp_cmd.ggo + $(GENGETOPT) --input $(srcdir)/check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd + +.ggo.c: + $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version +endif + + +bin_PROGRAMS = source-highlight check-regexp bin_SCRIPTS = java2html cpp2html src-hilite-lesspipe.sh -noinst_LTLIBRARIES = libsh.la -libsh_la_LIBADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la + EXTRA_PROGRAMS = source-highlight-cgi SRCHILITE = $(top_builddir)/src/source-highlight$(EXEEXT) @@ -24,8 +38,10 @@ AM_CPPFLAGS = -I$(top_srcdir)/gl -I$(top_builddir)/gl LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la -source_highlight_LDADD = libsh.la -source_highlight_cgi_LDADD = libsh.la +source_highlight_LDADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la +source_highlight_cgi_LDADD = $(source_highlight_LDADD) + +check_regexp_LDADD = $(BOOST_REGEX_LIB) $(top_builddir)/gl/libgnu.la SUBDIRS = \ lib \ @@ -35,12 +51,11 @@ DIST_SUBDIRS = \ lib \ includes -libsh_la_SOURCES = source-highlight.cc regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp - # for most rules, we use one file per line. `diffs' are more clear this way -source_highlight_SOURCES = startapp.cc +source_highlight_SOURCES = cmdline.c startapp.cc source-highlight.cc +check_regexp_SOURCES = check-regexp_cmd.c check-regexp.cpp -source_highlight_cgi_SOURCES = startapp-cgi.cc envmapper.c +source_highlight_cgi_SOURCES = cmdline.c startapp-cgi.cc envmapper.c # files that we don't want automake/autoconf to touch ever. # just stick them in the distribution as-is @@ -81,10 +96,12 @@ xhtml_common.outlang \ xhtmlcss.outlang \ xhtmldoc.outlang \ xhtml.outlang \ +xhtml_notfixed.outlang \ xhtmltable.outlang \ texinfo.outlang \ javadoc.outlang \ -docbook.outlang +docbook.outlang \ +docbookdoc.outlang STYLEFILES = texinfo.style esc.style @@ -111,4 +128,4 @@ tags: .PHONY: tags -noinst_HEADERS = regexpengine.h regexpstate.h asserttest.h regexpenginedebug.h +noinst_HEADERS = asserttest.h cmdline.h cmdlineargs.h cmdline.ggo check-regexp_cmd.h check-regexp_cmd.ggo diff --git a/src/asserttest.h b/src/asserttest.h index 7b54235..c3275a2 100644 --- a/src/asserttest.h +++ b/src/asserttest.h @@ -15,8 +15,9 @@ #include <iostream> #include <stdlib.h> +template <typename T> int -assertEquals(const std::string &expected, const std::string &actual) +assertEquals(T expected, T actual) { if (expected != actual) { std::cerr << "assertEquals failed" << std::endl; @@ -30,7 +31,7 @@ assertEquals(const std::string &expected, const std::string &actual) } int -assertEquals(bool expected, bool actual) +assertEquals(const std::string &expected, const std::string &actual) { if (expected != actual) { std::cerr << "assertEquals failed" << std::endl; diff --git a/src/changelog.lang b/src/changelog.lang index 828bf26..0a1fb37 100644 --- a/src/changelog.lang +++ b/src/changelog.lang @@ -3,14 +3,6 @@ state date start '[[:digit:]]{2,4}-?[[:digit:]]{2}-?[[:digit:]]{2}' begin name = '([[:word:]]|[[:punct:]])+' end -state symbol start '^(?:[\t]+|[[:space:]]+)\*[[:space:]]+' begin - state file start '[^:]+\:' begin - normal start '.' - end -end +(normal,symbol,normal,file)= `(^[[:blank:]]+)(\*)([[:blank:]]+)((?:[^:]+\:)?)` +(normal,file)= `(^[[:blank:]]+)((?:[^:]+\:)?)` -state normal start '^(?:[\t]+|[[:space:]]+)' begin - state file start '[^:]+\:' begin - normal start '.' - end -end diff --git a/src/check-regexp.cpp b/src/check-regexp.cpp new file mode 100644 index 0000000..1808381 --- /dev/null +++ b/src/check-regexp.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2007 Lorenzo Bettini <http://www.lorenzobettini.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * This program is part of GNU source-highlight simply to + * check a regular expression against a given an expression + */ + +/* + * some examples: + * + + ./check-regexp "([^[:alnum:]]+)[^[:blank:]]*(1円)[^[:blank:]]*(1円)" "|w|\$|e|d| ^w^w^ ?a?b?" + trying to match: |w|$|e|d| ^w^w^ ?a?b? + against : ([^[:alnum:]]+)[^[:blank:]]*(1円)[^[:blank:]]*(1円) + what[0]: |w|$|e|d| + what[1]: | + what[2]: | + what[3]: | + prefix: + what[0]: ^w^w^ + what[1]: ^ + what[2]: ^ + what[3]: ^ + prefix: + what[0]: ?a?b? + what[1]: ? + what[2]: ? + what[3]: ? + total number of matches: 3 + + * + */ + +#include <cstdlib> +#include <boost/regex.hpp> +#include <iostream> + +#include "check-regexp_cmd.h" + +using namespace std; + +int main(int argc, char * argv[]) { + gengetopt_args_info args_info; // command line structure + + if (cmdline_parser(argc, argv, &args_info)!= 0) + // calls cmdline parser. The user gived bag args if it doesn't return -1 + return EXIT_FAILURE; + + if (args_info.inputs_num < 2) { + cerr << "Syntax: check-regexp <regular expression> <expressions...>" << endl; + exit(EXIT_FAILURE); + } + + boost::regex regex(args_info.inputs[0]); + + for (unsigned int i = 1; i < args_info.inputs_num; ++i) { + string tomatch = args_info.inputs[i]; + + std::string::const_iterator start, end; + boost::match_results<std::string::const_iterator> what; + boost::match_flag_type flags; + + start = tomatch.begin(); + end = tomatch.end(); + flags = boost::match_default; + + cout << "\nsearching : " << tomatch << endl; + cout << "for the regexp : " << regex << endl; + + int num_of_matches = 0; + + while (boost::regex_search(start, end, what, regex, flags)) { + string prefix = what.prefix(); + if (prefix.size()) + cout << "prefix: " << prefix << endl; + + cout << "what[0]: " << what[0] << endl; + + for (unsigned int i = 1; i < what.size(); ++i) { + if (what[i].matched) { + cout << " what[" << i << "]: "<< what[i] << endl; + + cout << " length: " << what[i].length()<< endl; + } + } + + string suffix = what.suffix(); + if (suffix.size()) + cout << "suffix: " << suffix << endl; + + // update search position: + start = what[0].second; + // update flags: + flags |= boost::match_prev_avail; + flags |= boost::match_not_bob; + + ++num_of_matches; + } + + cout << "total number of matches: " << num_of_matches << endl; + } +} + diff --git a/src/check-regexp_cmd.c b/src/check-regexp_cmd.c new file mode 100644 index 0000000..f20ab59 --- /dev/null +++ b/src/check-regexp_cmd.c @@ -0,0 +1,293 @@ +/* + File autogenerated by gengetopt version 2.19.2 + generated with the following command: + /home/bettini/usr/local/bin/gengetopt --input ./check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd + + The developers of gengetopt consider the fixed text that goes in all + gengetopt output files to be in the public domain: + we make no copyright claims on it. +*/ + +/* If we use autoconf. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "getopt.h" + +#include "check-regexp_cmd.h" + +const char *gengetopt_args_info_purpose = "Tries to match the regular expression passed as the first argument\nagainst the strings passed as remaining arguments"; + +const char *gengetopt_args_info_usage = "Usage: check-regexp 'regular expression' 'string1' 'string2' ..."; + +const char *gengetopt_args_info_description = ""; + +const char *gengetopt_args_info_help[] = { + " -h, --help Print help and exit", + " -V, --version Print version and exit", + 0 +}; + +static +void clear_given (struct gengetopt_args_info *args_info); +static +void clear_args (struct gengetopt_args_info *args_info); + +static int +cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error); + + +static char * +gengetopt_strdup (const char *s); + +static +void clear_given (struct gengetopt_args_info *args_info) +{ + args_info->help_given = 0 ; + args_info->version_given = 0 ; +} + +static +void clear_args (struct gengetopt_args_info *args_info) +{ + +} + +static +void init_args_info(struct gengetopt_args_info *args_info) +{ + args_info->help_help = gengetopt_args_info_help[0] ; + args_info->version_help = gengetopt_args_info_help[1] ; + +} + +void +cmdline_parser_print_version (void) +{ + printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION); +} + +void +cmdline_parser_print_help (void) +{ + int i = 0; + cmdline_parser_print_version (); + + if (strlen(gengetopt_args_info_purpose) > 0) + printf("\n%s\n", gengetopt_args_info_purpose); + + printf("\n%s\n\n", gengetopt_args_info_usage); + + if (strlen(gengetopt_args_info_description) > 0) + printf("%s\n", gengetopt_args_info_description); + + while (gengetopt_args_info_help[i]) + printf("%s\n", gengetopt_args_info_help[i++]); +} + +void +cmdline_parser_init (struct gengetopt_args_info *args_info) +{ + clear_given (args_info); + clear_args (args_info); + init_args_info (args_info); + + args_info->inputs = NULL; + args_info->inputs_num = 0; +} + +static void +cmdline_parser_release (struct gengetopt_args_info *args_info) +{ + + unsigned int i; + + for (i = 0; i < args_info->inputs_num; ++i) + free (args_info->inputs [i]); + + if (args_info->inputs_num) + free (args_info->inputs); + + clear_given (args_info); +} + +int +cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info) +{ + FILE *outfile; + int i = 0; + + outfile = fopen(filename, "w"); + + if (!outfile) + { + fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename); + return EXIT_FAILURE; + } + + if (args_info->help_given) { + fprintf(outfile, "%s\n", "help"); + } + if (args_info->version_given) { + fprintf(outfile, "%s\n", "version"); + } + + fclose (outfile); + + i = EXIT_SUCCESS; + return i; +} + +void +cmdline_parser_free (struct gengetopt_args_info *args_info) +{ + cmdline_parser_release (args_info); +} + + +/* gengetopt_strdup() */ +/* strdup.c replacement of strdup, which is not standard */ +char * +gengetopt_strdup (const char *s) +{ + char *result = NULL; + if (!s) + return result; + + result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + return cmdline_parser2 (argc, argv, args_info, 0, 1, 1); +} + +int +cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required) +{ + int result; + + result = cmdline_parser_internal (argc, argv, args_info, override, initialize, check_required, NULL); + + if (result == EXIT_FAILURE) + { + cmdline_parser_free (args_info); + exit (EXIT_FAILURE); + } + + return result; +} + +int +cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name) +{ + return EXIT_SUCCESS; +} + +int +cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error) +{ + int c; /* Character of the parsed option. */ + + int error = 0; + struct gengetopt_args_info local_args_info; + + if (initialize) + cmdline_parser_init (args_info); + + cmdline_parser_init (&local_args_info); + + optarg = 0; + optind = 0; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + char *stop_char; + + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + stop_char = 0; + c = getopt_long (argc, argv, "hV", long_options, &option_index); + + if (c == -1) break; /* Exit from `while (1)' loop. */ + + switch (c) + { + case 'h': /* Print help and exit. */ + cmdline_parser_print_help (); + cmdline_parser_free (&local_args_info); + exit (EXIT_SUCCESS); + + case 'V': /* Print version and exit. */ + cmdline_parser_print_version (); + cmdline_parser_free (&local_args_info); + exit (EXIT_SUCCESS); + + + case 0: /* Long option with no short option */ + case '?': /* Invalid option. */ + /* `getopt_long' already printed an error message. */ + goto failure; + + default: /* bug: option not considered. */ + fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : "")); + abort (); + } /* switch */ + } /* while */ + + + + + cmdline_parser_release (&local_args_info); + + if ( error ) + return (EXIT_FAILURE); + + if (optind < argc) + { + int i = 0 ; + int found_prog_name = 0; + /* whether program name, i.e., argv[0], is in the remaining args + (this may happen with some implementations of getopt, + but surely not with the one included by gengetopt) */ + + i = optind; + while (i < argc) + if (argv[i++] == argv[0]) { + found_prog_name = 1; + break; + } + i = 0; + + args_info->inputs_num = argc - optind - found_prog_name; + args_info->inputs = + (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + if (argv[optind++] != argv[0]) + args_info->inputs[ i++ ] = gengetopt_strdup (argv[optind-1]) ; + } + + return 0; + +failure: + + cmdline_parser_release (&local_args_info); + return (EXIT_FAILURE); +} diff --git a/src/check-regexp_cmd.ggo b/src/check-regexp_cmd.ggo new file mode 100644 index 0000000..6ddefb2 --- /dev/null +++ b/src/check-regexp_cmd.ggo @@ -0,0 +1,12 @@ +# Copyright (C) 1999-2007 Lorenzo Bettini, http://www.lorenzobettini.it + +# This file is used by gengetopt to generate a command line args parser +# GNU gengetopt can be found at +# http://www.gnu.org/software/gengetopt + +package "check-regexp (GNU Source-highlight)" + +purpose "Tries to match the regular expression passed as the first argument +against the strings passed as remaining arguments" + +usage "check-regexp 'regular expression' 'string1' 'string2' ..." diff --git a/src/check-regexp_cmd.h b/src/check-regexp_cmd.h new file mode 100644 index 0000000..424ef12 --- /dev/null +++ b/src/check-regexp_cmd.h @@ -0,0 +1,62 @@ +/* check-regexp_cmd.h */ + +/* File autogenerated by gengetopt version 2.19.2 */ + +#ifndef CHECK_REGEXP_CMD_H +#define CHECK_REGEXP_CMD_H + +/* If we use autoconf. */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#ifndef CMDLINE_PARSER_PACKAGE +#define CMDLINE_PARSER_PACKAGE "check-regexp (GNU Source-highlight)" +#endif + +#ifndef CMDLINE_PARSER_VERSION +#define CMDLINE_PARSER_VERSION VERSION +#endif + +struct gengetopt_args_info +{ + const char *help_help; /* Print help and exit help description. */ + const char *version_help; /* Print version and exit help description. */ + + int help_given ; /* Whether help was given. */ + int version_given ; /* Whether version was given. */ + + char **inputs ; /* unamed options */ + unsigned inputs_num ; /* unamed options number */ +} ; + +extern const char *gengetopt_args_info_purpose; +extern const char *gengetopt_args_info_usage; +extern const char *gengetopt_args_info_help[]; + +int cmdline_parser (int argc, char * const *argv, + struct gengetopt_args_info *args_info); +int cmdline_parser2 (int argc, char * const *argv, + struct gengetopt_args_info *args_info, + int override, int initialize, int check_required); +int cmdline_parser_file_save(const char *filename, + struct gengetopt_args_info *args_info); + +void cmdline_parser_print_help(void); +void cmdline_parser_print_version(void); + +void cmdline_parser_init (struct gengetopt_args_info *args_info); +void cmdline_parser_free (struct gengetopt_args_info *args_info); + +int cmdline_parser_required (struct gengetopt_args_info *args_info, + const char *prog_name); + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* CHECK_REGEXP_CMD_H */ diff --git a/src/lib/cmdline.c b/src/cmdline.c index a471a23..fdc0f2c 100644 --- a/src/lib/cmdline.c +++ b/src/cmdline.c @@ -1,5 +1,5 @@ /* - File autogenerated by gengetopt version 2.19.1 + File autogenerated by gengetopt version 2.19.2 generated with the following command: /home/bettini/usr/local/bin/gengetopt --input cmdline.ggo --unamed-opts --no-handle-help --no-handle-version diff --git a/src/lib/cmdline.ggo b/src/cmdline.ggo index e855554..25bac44 100644 --- a/src/lib/cmdline.ggo +++ b/src/cmdline.ggo @@ -1,37 +1,9 @@ -# Copyright (C) 1999, 2000 Lorenzo Bettini, http://www.lorenzobettini.it +# Copyright (C) 1999-2007 Lorenzo Bettini, http://www.lorenzobettini.it # This file is used by gengetopt to generate a command line args parser # GNU gengetopt can be found at # http://www.gnu.org/software/gengetopt -# Specification file format: -# -# This file consist in lines of sentences with the following format: -# -# option <long> <short> <desc> <argtype> <required> -# option <long> <short> <desc> flag <onoff> -# option <long> <short> <desc> no -# ... # ... -# -# Where: -# -# <packname> = Double quoted string with upper and lower case chars, digits, -# '-' and '.'. No spaces allowed. -# <version> = Double quoted string with upper and lower case chars, digits, -# '-' and '.'. No spaces allowed. -# <long> = Double quoted string with upper and lower case chars, digits, -# '-' and '.'. No spaces allowed. -# <short> = A single upper or lower case char, or a digit. -# <desc> = String with upper and lower case chars, digits, '-', '.' and -# spaces. First character must be no space. -# <argtype> = string, int, short, long, float, double, longdouble or longlong. -# <required> = yes or no. -# <onoff> = on or off. -# Comments begins with '#' in any place of the line and ends in the -# end of line. -# The third form of option is used if the option does not take an argument; -# it must not be required. - purpose "Highlight the syntax of a source file (e.g. Java) into a specific format (e.g. HTML)" # Options diff --git a/src/lib/cmdline.h b/src/cmdline.h index cc28f14..3a0b158 100644 --- a/src/lib/cmdline.h +++ b/src/cmdline.h @@ -1,6 +1,6 @@ /* cmdline.h */ -/* File autogenerated by gengetopt version 2.19.1 */ +/* File autogenerated by gengetopt version 2.19.2 */ #ifndef CMDLINE_H #define CMDLINE_H diff --git a/src/lib/cmdlineargs.h b/src/cmdlineargs.h index 10ffe45..10ffe45 100644 --- a/src/lib/cmdlineargs.h +++ b/src/cmdlineargs.h diff --git a/src/cpp.lang b/src/cpp.lang index 8a59c08..90a5e87 100644 --- a/src/cpp.lang +++ b/src/cpp.lang @@ -13,6 +13,9 @@ include "number.lang" include "c_string.lang" +(keyword,normal,type) = + `(\<(?:class|struct|typename))([[:blank:]]+)([[:alnum:]]+)` + keyword = "__asm|__cdecl|__declspec|__export|__far16", "__fastcall|__fortran|__import", "__pascal|__rtti|__stdcall|_asm|_cdecl", diff --git a/src/default.css b/src/default.css index e08e98b..52a320b 100644 --- a/src/default.css +++ b/src/default.css @@ -3,6 +3,7 @@ body { background-color: white; } .keyword { color: blue; font-weight: bold; } .type { color: darkgreen; } .string { color: red; font-family: monospace; } +.regexp { color: orange; } .specialchar { color: pink; font-family: monospace; } .comment { color: brown; font-style: italic; } .number { color: purple; } diff --git a/src/default.style b/src/default.style index 5ade28d..14d5117 100644 --- a/src/default.style +++ b/src/default.style @@ -3,6 +3,7 @@ bgcolor "white"; // the background color for documents keyword blue b ; // for language keywords type darkgreen ; // for basic types string red f ; // for strings and chars +regexp orange f ; // for strings and chars specialchar pink f ; // for special chars, e.g., \n, \t, \\ comment brown i, noref; // for comments number purple ; // for literal numbers diff --git a/src/docbook.outlang b/src/docbook.outlang index 923eae0..0ed7a62 100644 --- a/src/docbook.outlang +++ b/src/docbook.outlang @@ -6,6 +6,11 @@ extension "xml" bold "<emphasis role=\"strong\">$text</emphasis>" italics "<emphasis>$text</emphasis>" +anchor "<anchor id=\"line$linenum\"/>$text" +postline_reference "<link linkend='line$linenum'>$text -> $linenum</link>" +postdoc_reference "<link linkend='line$linenum'>$text -> $linenum</link>" +reference "<link linkend='line$linenum'>$text</link>" + translations "&" "&" "<" "<" diff --git a/src/docbookdoc.outlang b/src/docbookdoc.outlang new file mode 100644 index 0000000..cb1f446 --- /dev/null +++ b/src/docbookdoc.outlang @@ -0,0 +1,14 @@ +include "docbook.outlang" + +doctemplate +"<!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook//EN\"> +<article> +<articleinfo> +<title>prova</title> +</articleinfo> +<programlisting>" +"</programlisting> +</article> +" +end + diff --git a/src/esc.style b/src/esc.style index 8c00bda..8a0aebd 100644 --- a/src/esc.style +++ b/src/esc.style @@ -1,6 +1,7 @@ keyword blue b ; type darkgreen ; string red ; +regexp orange ; specialchar pink ; comment cyan i ; number purple ; diff --git a/src/html.lang b/src/html.lang index af706ca..2893308 100644 --- a/src/html.lang +++ b/src/html.lang @@ -4,9 +4,9 @@ end comment delim "<!--" "-->" multiline nested -keyword = '<(/)?[[:alnum:]]+(/)?>' +keyword = '<(/)?[[:alpha:]][[:alnum:]]*(/)?>' -state keyword delim '<(/)?[[:alnum:]]+' '(/)?>' multiline begin +state keyword delim '<(/)?[[:alpha:]][[:alnum:]]*' '(/)?>' multiline begin type = '[^="[:blank:]>]+' symbol = "=" string delim "\"" "\"" escape "\\" multiline diff --git a/src/html_notfixed.outlang b/src/html_notfixed.outlang index 4b5b49c..c6af7b0 100644 --- a/src/html_notfixed.outlang +++ b/src/html_notfixed.outlang @@ -2,6 +2,12 @@ include "html_common.outlang" fixed "<tt>$text</tt>" +doctemplate +"<!-- Generator: $additional --> +$header" +"$footer" +end + translations "\n" "<br>\n" " " " " diff --git a/src/java.lang b/src/java.lang index 742fac1..63371ca 100644 --- a/src/java.lang +++ b/src/java.lang @@ -6,6 +6,8 @@ include "number.lang" include "c_string.lang" +(keyword,normal,type) = `(\<(?:class|interface))([[:blank:]]+)([$[:alnum:]]+)` + keyword = "abstract|assert|break|case|catch|class|const", "continue|default|do|else|extends|false|final", "finally|for|goto|if|implements|instanceof|interface" diff --git a/src/lang.map b/src/lang.map index d15938b..efae070 100644 --- a/src/lang.map +++ b/src/lang.map @@ -38,8 +38,8 @@ caml = caml.lang mli = caml.lang sml = sml.lang sig = sml.lang -syslog = syslog.lang -log = syslog.lang +syslog = log.lang +log = log.lang pas = pascal.lang pascal = pascal.lang fortran = fortran.lang diff --git a/src/langdef.lang b/src/langdef.lang index 2229f23..ade37f6 100644 --- a/src/langdef.lang +++ b/src/langdef.lang @@ -5,13 +5,14 @@ comment start "#" preproc = "include" string delim "\"" "\"" escape "\\" multiline -string delim "'" "'" escape "\\" multiline +regexp delim "'" "'" escape "\\" multiline +regexp delim "`" "`" escape "\\" multiline keyword = "state|environment|begin|end|delim|escape|start", "multiline|nested|vardef|exitall|exit", "redef|subst|nonsensitive" -symbol = "=|+|," +symbol = "=|+|,|(|)" vardef ID = '[[:word:]]+' diff --git a/src/lib/.cvsignore b/src/lib/.cvsignore index f661af7..57922c9 100644 --- a/src/lib/.cvsignore +++ b/src/lib/.cvsignore @@ -52,4 +52,5 @@ test_readtags test_langinfer mytags copyright.c -reportbugs.c
\ No newline at end of file +reportbugs.c +test_regexpreprocessor diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 2aa586e..cc904ae 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2006 Lorenzo Bettini <http://www.lorenzobettini.it> +# Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it> # # This file is free software; as a special exception the author gives # unlimited permission to copy and/or distribute it, with or without @@ -8,19 +8,11 @@ # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -SUFFIXES = .text .ggo +SUFFIXES = .text .text.c: $(TXTC) -c $< -if NO_GENGETOPT -.ggo.c: - touch $@ -else -.ggo.c: - $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version -endif - if NO_DOUBLECPP $(srcdir)/%.H: $(srcdir)/%.h touch $@ @@ -36,19 +28,22 @@ LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la noinst_LTLIBRARIES = libcommon.la -libcommon_la_SOURCES = copyright.text reportbugs.text cmdline.c messages.cc \ +libcommon_la_SOURCES = copyright.text reportbugs.text messages.cc \ fileutil.cc chartranslator.cc generatorfactory.cc docgenerator.cc styleparser.yy \ stylescanner.ll langdefparser.yy langdefscanner.ll outlangdefparser.yy \ stylecssparser.yy stylecssscanner.ll outlangdefscanner.ll srcuntabifier.cpp \ - generatormap.cpp maingeneratormap.cpp outputbuffer.cpp outputgenerator.cpp \ - mainoutputbuffer.cpp lineoutputgenerator.cpp stringdef.cpp langelem.cpp statelangelem.cpp \ + textformatter.cpp outputbuffer.cpp outputgenerator.cpp \ + mainoutputbuffer.cpp statelangelem.cpp \ langelems.cpp regexpreprocessor.cpp regexpstatebuilder.H regexpstatebuilder_dbtab.cc \ regexpstatebuilder.cpp langdefloader.cpp langmap.cpp statestartlangelem.cpp \ - stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp textformatter.cpp \ + stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp \ textstyle.cpp textgenerator.cpp textstylebuilder.cpp doctemplate.cpp substfun.cpp \ - refgeneratormap.cpp readtags.c fileinfo.cpp linebuffer.cpp preformatter.cpp \ + readtags.c fileinfo.cpp preformatter.cpp \ regexpstateprinter.cpp langelemsprinter.cpp langelemsprinter.H langelemsprinter_dbtab.cc \ - languageinfer.cpp stopwatch.cpp utils.cpp utils.h + languageinfer.cpp stopwatch.cpp utils.cpp utils.h \ + regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp \ + namedsubexpslangelem.cpp + libcommon_la_LIBADD = $(BOOST_REGEX_LIB) INCLUDES = -I@top_srcdir@/src @@ -56,12 +51,9 @@ INCLUDES = -I@top_srcdir@/src EXTRA_DIST = colors.h generatorfactory.h \ keys.h styleparser.h stylecssparser.h \ tokens.h messages.h fileutil.h \ - cmdline.h \ - chartranslator.h my_set.h my_sstream.h my_string.h \ + chartranslator.h my_sstream.h \ linenumdigit.h globalostream.h \ docgenerator.h \ - cmdlineargs.h \ - cmdline.ggo \ copyright.h reportbugs.h \ srcuntabifier.h colormap.h \ langdefparser.h outlangdefparser.h \ @@ -98,10 +90,16 @@ styleparser.cc styleparser.h: $(srcdir)/styleparser.yy stylecssparser.cc stylecssparser.h: $(srcdir)/stylecssparser.yy $(YACC) -p stylecsssc_ -o $@ $(srcdir)/stylecssparser.yy --defines=$*.h -TESTS = test_textstyle test_textgenerator test_outlangparser test_readtags \ -test_langinfer +TESTS = test_textstyle test_textgenerator test_outlangparser \ +test_langinfer test_regexpreprocessor + +check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_langinfer test_regexpreprocessor + +if !NO_CTAGS +TESTS += test_readtags +check_PROGRAMS += test_readtags +endif -check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_readtags test_langinfer test_langdefparser_SOURCES = test_langdefparser_main.cpp test_langdefparser_LDADD = libcommon.la @@ -126,6 +124,9 @@ test_readtags_LDADD = readtags.$(OBJEXT) test_langinfer_SOURCES = test_langinfer.cpp test_langinfer_LDADD = libcommon.la +test_regexpreprocessor_SOURCES = test_regexpreprocessor_main.cpp +test_regexpreprocessor_LDADD = libcommon.la + mytags: $(srcdir)/test_readtags_main.cpp $(CTAGS) --excmd=n --fields=+n -o mytags $(srcdir)/test_readtags_main.cpp $(srcdir)/readtags.h @@ -139,14 +140,16 @@ MAINTAINERCLEANFILES = styleparser.cc styleparser.h stylescanner.cc \ langdefparser.cc langdefparser.h langdefscanner.cc \ outlangdefparser.cc outlangdefparser.h outlangdefscanner.cc -noinst_HEADERS = parsestyles.h generatormap.h maingeneratormap.h outputbuffer.h \ - outputgenerator.h mainoutputbuffer.h lineoutputgenerator.h stringdef.h langelem.h \ +noinst_HEADERS = parsestyles.h textformatter.h outputbuffer.h \ + outputgenerator.h mainoutputbuffer.h stringdef.h langelem.h \ statelangelem.h langelems.h langdefparserfun.h outlangdefparserfun.h \ tostringcollection.h regexpreprocessor.h regexpstatebuilder.h langdefloader.h \ langdefscanner.h outlangdefscanner.h parsestruct.h langmap.h statestartlangelem.h \ - stringlistlangelem.h delimitedlangelem.h vardefinitions.h textformatter.h textstyle.h \ + stringlistlangelem.h delimitedlangelem.h vardefinitions.h textstyle.h \ textstyles.h textgenerator.h textstylebuilder.h doctemplate.h substfun.h \ - parserinfo.h refgeneratormap.h readtags.h fileinfo.h linebuffer.h preformatter.h \ - regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h + parserinfo.h readtags.h fileinfo.h linebuffer.h preformatter.h \ + regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h \ + regexpengine.h regexpstate.h regexpenginedebug.h \ + namedsubexpslangelem.h refposition.h diff --git a/src/lib/delimitedlangelem.cpp b/src/lib/delimitedlangelem.cpp index 8cafd20..a11757f 100644 --- a/src/lib/delimitedlangelem.cpp +++ b/src/lib/delimitedlangelem.cpp @@ -38,4 +38,11 @@ DelimitedLangElem::toString() const return res; } +const std::string +DelimitedLangElem::toStringOriginal() const +{ + string res = StateStartLangElem::toString() + " " + start->toStringOriginal() + (end ? " " + end->toStringOriginal() : ""); + return res; +} + diff --git a/src/lib/delimitedlangelem.h b/src/lib/delimitedlangelem.h index c1df355..dceae26 100644 --- a/src/lib/delimitedlangelem.h +++ b/src/lib/delimitedlangelem.h @@ -42,6 +42,8 @@ public: virtual const std::string toString() const; + virtual const std::string toStringOriginal() const; + void set_escape(StringDef *e) { escape = e; } StringDef *getStart() const { return start; } diff --git a/src/lib/fileutil.cc b/src/lib/fileutil.cc index 52c7386..24dcf48 100644 --- a/src/lib/fileutil.cc +++ b/src/lib/fileutil.cc @@ -17,11 +17,14 @@ * */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include <iostream> #include <fstream> #include "fileutil.h" -#include "my_string.h" #include "messages.h" // for verbosity using namespace std; diff --git a/src/lib/generatorfactory.cc b/src/lib/generatorfactory.cc index dcbb908..983f1be 100644 --- a/src/lib/generatorfactory.cc +++ b/src/lib/generatorfactory.cc @@ -25,117 +25,105 @@ #include "textgenerator.h" #include "textstyles.h" #include "textstylebuilder.h" -#include "refgeneratormap.h" - -// global -#include "maingeneratormap.h" - -GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles, - PreFormatter *pf, - bool gen_ref, - const string &_ctags_file, RefPosition position, - bool optimizations) : - textStyles(tstyles), preformatter(pf), - generate_references(gen_ref), - ctags_file(_ctags_file), refposition(position), - noOptimizations(optimizations) -{ - generatormap = createGeneratorMap(); - generatormap->setNoOptimizations(noOptimizations); +#include "textformatter.h" + +GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf, + bool gen_ref, const string &_ctags_file, RefPosition position, + bool optimizations) : + textStyles(tstyles), preformatter(pf), generate_references(gen_ref), + ctags_file(_ctags_file), refposition(position), + noOptimizations(optimizations) { + textformatter = createTextFormatter(); + textformatter->setNoOptimizations(noOptimizations); } -GeneratorFactory::~ GeneratorFactory() -{ - if (generatormap) - delete generatormap; +GeneratorFactory::~GeneratorFactory() { + if (textformatter) + delete textformatter; } -GeneratorMap * -GeneratorFactory::createGeneratorMap() -{ - if (generate_references) - return new RefGeneratorMap(preformatter, ctags_file, - textStyles->refstyle, refposition); - - return new GeneratorMap(preformatter); +TextFormatter *GeneratorFactory::createTextFormatter() { + if (generate_references) + return new TextFormatter(preformatter, ctags_file, + textStyles->refstyle, refposition); + else + return new TextFormatter(preformatter); } -string GeneratorFactory::preprocessColor(const string &color) -{ - if ( color[0] == '"' && color[color.size()-1] == '"') - return color.substr(1, color.size()-2); - else - return textStyles->colorMap->getColor (color); +string GeneratorFactory::preprocessColor(const string &color) { + if (color[0] == '"' && color[color.size()-1] == '"') + return color.substr(1, color.size()-2); + else + return textStyles->colorMap->getColor(color); } bool GeneratorFactory::createGenerator(const string &key, const string &color, - const string &bgcolor, StyleConstantsPtr styleconstants) -{ - if (generatormap->hasGenerator(key)) - return false; - - if (! textStyles->onestyle.empty()) { - generatormap->addGenerator (key, new TextGenerator(textStyles->onestyle.subst_style(key))); - return true; - } - - TextStyleBuilder textStyleBuilder(textStyles->starting_template, textStyles->style_separator); - - textStyleBuilder.start(); - - if (styleconstants.get()) { - for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) { - switch( *it ){ - case ISBOLD: - textStyleBuilder.add(textStyles->bold); - break; - case ISITALIC: - textStyleBuilder.add(textStyles->italics); - break; - case ISUNDERLINE: - textStyleBuilder.add(textStyles->underline); - break; - case ISFIXED: - textStyleBuilder.add(textStyles->fixed); - break; - case ISNOTFIXED: - textStyleBuilder.add(textStyles->notfixed); - break; - case ISNOREF: - generatormap->addNoReference(key); - break; - } + const string &bgcolor, StyleConstantsPtr styleconstants) { + if (textformatter->hasGenerator(key)) + return false; + + if (! textStyles->onestyle.empty()) { + textformatter->addGenerator(key, new TextGenerator(textStyles->onestyle.subst_style(key))); + return true; + } + + TextStyleBuilder textStyleBuilder(textStyles->starting_template, + textStyles->style_separator); + + textStyleBuilder.start(); + + if (styleconstants.get()) { + for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) { + switch (*it) { + case ISBOLD: + textStyleBuilder.add(textStyles->bold); + break; + case ISITALIC: + textStyleBuilder.add(textStyles->italics); + break; + case ISUNDERLINE: + textStyleBuilder.add(textStyles->underline); + break; + case ISFIXED: + textStyleBuilder.add(textStyles->fixed); + break; + case ISNOTFIXED: + textStyleBuilder.add(textStyles->notfixed); + break; + case ISNOREF: + textformatter->addNoReference(key); + break; + } + } } - } - if ( color.size () ) { - textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color))); - } + if (color.size()) { + textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color))); + } - if ( bgcolor.size () ) { - textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor))); - } + if (bgcolor.size()) { + textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor))); + } - TextStyle style = textStyleBuilder.end(); + TextStyle style = textStyleBuilder.end(); - generatormap->addGenerator(key, new TextGenerator(style)); - return true; + textformatter->addGenerator(key, new TextGenerator(style)); + return true; } -void GeneratorFactory::addDefaultGenerator() -{ - TextGenerator *defaultGenerator = generatormap->hasGenerator(NORMAL); - - if (!defaultGenerator) { - - if (textStyles->onestyle.empty()) - defaultGenerator = new TextGenerator(); - else - defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL)); - - generatormap->addGenerator (NORMAL, defaultGenerator); - } - - generatormap->setDefaultGenerator(defaultGenerator); +void GeneratorFactory::addDefaultGenerator() { + TextGenerator *defaultGenerator = textformatter->hasGenerator(NORMAL); + + if (!defaultGenerator) { + + if (textStyles->onestyle.empty()) + defaultGenerator = new TextGenerator(); + else + defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL)); + + textformatter->addGenerator(NORMAL, defaultGenerator); + } + + textformatter->setDefaultGenerator(defaultGenerator); } diff --git a/src/lib/generatorfactory.h b/src/lib/generatorfactory.h index b7da58a..2845e81 100644 --- a/src/lib/generatorfactory.h +++ b/src/lib/generatorfactory.h @@ -27,7 +27,7 @@ #include <boost/shared_ptr.hpp> #include "textstyles.h" -#include "refgeneratormap.h" +#include "refposition.h" typedef enum { ISBOLD=1, ISITALIC, ISUNDERLINE, ISFIXED, ISNOTFIXED, ISNOREF } StyleConstant; typedef std::list<StyleConstant> StyleConstants; @@ -38,6 +38,7 @@ using std::string; class TextGenerator; class PreFormatter; +class TextFormatter; class GeneratorFactory { @@ -52,8 +53,10 @@ class GeneratorFactory RefPosition refposition; /// whether to turn off optimizations for generating output (default false) bool noOptimizations; + /// the main text formatter + TextFormatter *textformatter; - GeneratorMap *createGeneratorMap(); + TextFormatter *createTextFormatter(); public: GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf, @@ -90,6 +93,8 @@ class GeneratorFactory * @return */ string preprocessColor(const string &color); + + TextFormatter *getTextFormatter() { return textformatter; } }; #endif // GENERATORFACTORY_H diff --git a/src/lib/generatormap.cpp b/src/lib/generatormap.cpp deleted file mode 100644 index c7660e6..0000000 --- a/src/lib/generatormap.cpp +++ /dev/null @@ -1,154 +0,0 @@ -// -// C++ Implementation: generatormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// - -#include "generatormap.h" -#include "textgenerator.h" -#include "preformatter.h" -#include "mainoutputbuffer.h" - -using namespace std; - -GeneratorMap::GeneratorMap(PreFormatter *pf) : - default_generator (0), - preformatter(pf), noOptimizations(false) -{ -} - -GeneratorMap::~GeneratorMap() -{ - for (MapType::const_iterator it = generatormap.begin(); it != generatormap.end(); ++it) - delete it->second; -} - -void -GeneratorMap::setDefaultGenerator(TextGenerator *gen) -{ - default_generator = gen; -} - -void -GeneratorMap::addGenerator(const std::string &elem, TextGenerator *gen) -{ - generatormap[elem] = gen; -} - -TextGenerator *GeneratorMap::hasGenerator(const string &elem) -{ - MapType::const_iterator it = generatormap.find(elem); - if (it == generatormap.end()) - return 0; - - return it->second; -} - -TextGenerator * -GeneratorMap::getGenerator(const string &elem) -{ - MapType::const_iterator it = generatormap.find(elem); - if (it == generatormap.end()) - { - // create a copy of the prototype and substitute the style. - TextGenerator *missing = new TextGenerator(*default_generator); - missing->subst_style(elem); - - generatormap[elem] = missing; - return missing; - } - - return it->second; -} - -void -GeneratorMap::addNoReference(const std::string &elem) -{ - noreferences.insert(elem); -} - -bool -GeneratorMap::isNoReference(const std::string &elem) const -{ - return (noreferences.find(elem) != noreferences.end()); -} - -const string -GeneratorMap::generateString( const std::string &elem, const std::string &s , - const FileInfo *p ) -{ - return getGenerator(elem)->generateEntire(preformatter->preformat(s)); -} - -void -GeneratorMap::generateEntire( const std::string &elem, const std::string &s, - const FileInfo *p ) -{ - if (noOptimizations) { - // we generate the element right now, since during debugging - // we want to be very responsive - if (s.size()) - output(generateString(elem, s, p)); - - return; - } - - // otherwise we optmize output generation: delay formatting a specific - // element until we deal with another element; this way strings that belong - // to the same element are formatted using only one tag: e.g., - // <comment>/* mycomment */</comment> - // instead of - // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment> - if (elem == current_elem) { - elem_buffer << s; - } else { - // first format the buffered string - const string toformat = elem_buffer.str(); - if (toformat.size()) - output(generateString(current_elem, toformat, p)); - - // then start a new buffer - elem_buffer.str(""); - elem_buffer << s; - current_elem = elem; - current_file_info = p; - } -} - -void -GeneratorMap::generateNL( const std::string &text ) -{ - // first format the buffered string - flush(); - - string preformat_text = preformatter->preformat(text); - - if (preformat_text == text) - preformat_text = "\n"; - - outputbuffer->output_ln(preformat_text); -} - -void -GeneratorMap::flush() -{ - const string &remainder = elem_buffer.str(); - if (remainder.size()) { - output(generateString(current_elem, remainder, current_file_info)); - elem_buffer.str(""); - current_elem = ""; - // each line is handled separately - } -} - -void -GeneratorMap::output(const string &s) -{ - outputbuffer->output(s); -} diff --git a/src/lib/generatormap.h b/src/lib/generatormap.h deleted file mode 100644 index b6fa977..0000000 --- a/src/lib/generatormap.h +++ /dev/null @@ -1,86 +0,0 @@ -// -// C++ Interface: generatormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#ifndef GENERATORMAP_H -#define GENERATORMAP_H - -#include <map> -#include <string> -#include <set> -#include "my_sstream.h" - -class TextGenerator; -class PreFormatter; -class FileInfo; - -/** -map of generators; associate a generator for each program element, e.g., keyword, string, etc. - -@author Lorenzo Bettini -*/ -class GeneratorMap -{ - protected: - typedef std::map<std::string, TextGenerator *> MapType; - typedef std::set<std::string> NoRefType; - MapType generatormap; - /// those elements for which no reference info is generated - NoRefType noreferences; - TextGenerator *default_generator; - PreFormatter *preformatter; - /// where we buffer strings for the current elem - std::ostringstream elem_buffer; - /// the element that is currently buffered - std::string current_elem; - /// concerns the element currently buffered - const FileInfo *current_file_info; - /// whether to turn off optimizazionts (such as buffering), default: false - bool noOptimizations; - - virtual const std::string generateString(const std::string &elem, - const std::string &s, const FileInfo *); - void output(const std::string &s); - - public: - GeneratorMap(PreFormatter *); - virtual ~GeneratorMap(); - - /** - * Returns the generator for the specific element name or null if - * there's no generator for the element - * @param elem - * @return - */ - TextGenerator *hasGenerator(const std::string &elem); - - /** - * Retrieves the generator for a specific element; if it doesn't find it, - * it creates a generator for that element, using the default generator - * (i.e., the one for "normal" element) - * @param elem - * @return - */ - TextGenerator *getGenerator(const std::string &elem); - void addGenerator(const std::string &elem, TextGenerator *gen); - void addNoReference(const std::string &elem); - bool isNoReference(const std::string &elem) const; - - void setDefaultGenerator(TextGenerator *g); - - void generateEntire( const std::string &elem, - const std::string &s, const FileInfo * ); - void generateNL( const std::string &s ); - void flush(); - - void setNoOptimizations(bool n) { noOptimizations = n; } -}; - -#endif diff --git a/src/lib/keys.h b/src/lib/keys.h index 9fff74b..37332e7 100644 --- a/src/lib/keys.h +++ b/src/lib/keys.h @@ -2,16 +2,5 @@ #define KEYS_H #define NORMAL "normal" -#define KEYWORD "keyword" -#define TYPE "type" -#define STRING "string" -#define COMMENT "comment" -#define NUMBER "number" -#define PREPROC "preproc" -#define SYMBOL "symbol" -#define FUNCTION "function" -#define CBRACKET "cbracket" -#define LINENO "lineno" -#define GLOBAL "global" #endif diff --git a/src/lib/langdefparser.yy b/src/lib/langdefparser.yy index 9ed5d8f..b524c5c 100644 --- a/src/lib/langdefparser.yy +++ b/src/lib/langdefparser.yy @@ -1,6 +1,6 @@ %{ /* - * Copyright (C) 1999-2005 Lorenzo Bettini <http://www.lorenzobettini.it> + * Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,8 +24,6 @@ #include <iostream> #include <string> -#include "my_sstream.h" - #include "messages.h" #include "stringdef.h" #include "statelangelem.h" @@ -36,6 +34,7 @@ #include "langdefparserfun.h" #include "langdefscanner.h" #include "vardefinitions.h" +#include "namedsubexpslangelem.h" using std::cerr; using std::string; @@ -64,6 +63,11 @@ struct Key : public ParserInfo ~Key() { delete key; } }; +// this is a trick since ElementNames is a typedef and cannot +// be used in the union below +struct ElementNamesList : ElementNames { +}; + %} %union { @@ -76,12 +80,15 @@ struct Key : public ParserInfo class StateLangElem *statelangelem; class StateStartLangElem *statestartlangelem; class LangElems *langelems; + class NamedSubExpsLangElem *namedsubexpslangelem; struct Key *key; + class ElementNamesList *keys; int flag ; }; %token <tok> BEGIN_T END_T ENVIRONMENT_T STATE_T MULTILINE_T DELIM_T START_T ESCAPE_T NESTED_T EXIT_ALL EXIT_T VARDEF_T REDEF_T SUBST_T NONSENSITIVE_T -%token <string> KEY STRINGDEF VARUSE +%token <string> KEY STRINGDEF REGEXPNOPREPROC VARUSE +%token <stringdef> REGEXPDEF %type <stringdef> stringdef escapedef %type <stringdefs> stringdefs @@ -91,16 +98,18 @@ struct Key : public ParserInfo %type <booloption> multiline startnewenv nested nonsensitive %type <tok> exitall redefsubst %type <key> key; +%type <keys> keys; %% -allelements : - { - /* no definitions */ - /* synthetize a normal elem that catches everything */ - current_lang_elems = new LangElems; +allelements : + { + /* no definitions (i.e., empty a .lang file with no definition) */ + /* such as, default.lang */ + /* synthetize a normal elem that catches everything */ + current_lang_elems = new LangElems; StringDefs *defs = new StringDefs; - defs->push_back (new StringDef("(.*)")); + defs->push_back (new StringDef("(?:.*)")); current_lang_elems->add(new StringListLangElem("normal", defs, false)); } | elemdefs { current_lang_elems = 1ドル; } @@ -155,6 +164,11 @@ complexelem : key DELIM_T stringdef stringdef escapedef multiline nested $$->setParserInfo(1ドル); delete 1ドル; } + | '(' keys ')' '=' REGEXPNOPREPROC { + $$ = new NamedSubExpsLangElem(2,ドル new StringDef(*5ドル)); + $$->setParserInfo(parsestruct->file_name, @1.first_line); + delete 5ドル; + } ; key: KEY { @@ -164,6 +178,20 @@ key: KEY { } ; +keys: keys ',' KEY + { + $$ = 1ドル; + $$->push_back(*3ドル); + delete 3ドル; + } + | KEY + { + $$ = new ElementNamesList; + $$->push_back(*1ドル); + delete 1ドル; + } +; + escapedef : ESCAPE_T stringdef { $$ = 2ドル; } | { $$ = 0; } ; @@ -200,7 +228,14 @@ stringdefs : stringdefs ',' stringdef { $$ = 1ドル; $$->push_back(3ドル); } $$->push_back(1ドル); } ; -stringdef : STRINGDEF { +stringdef : REGEXPDEF { + $$ = 1ドル; + } + | STRINGDEF { + $$ = new StringDef(*1ドル); + delete 1ドル; + } + | REGEXPNOPREPROC { $$ = new StringDef(*1ドル); delete 1ドル; } @@ -220,13 +255,12 @@ stringdef : STRINGDEF { %% +extern int langdef_lex_destroy (void); + void yyerror( const char *s ) { - ostringstream str ; - str << parsestruct->file_name << ":" << parsestruct->line << ": " << s; // << ", in option declaration"; - printError( str.str(), cerr ) ; - exit(EXIT_FAILURE); + exitError(s, parsestruct); } void @@ -245,6 +279,10 @@ parse_lang_def() delete vardefinitions; parsestruct = 0; vardefinitions = 0; + + // release scanner memory + langdef_lex_destroy (); + return current_lang_elems; } @@ -259,6 +297,10 @@ parse_lang_def(const char *path, const char *name) delete vardefinitions; parsestruct = 0; vardefinitions = 0; + + // release scanner memory + langdef_lex_destroy (); + return current_lang_elems; } diff --git a/src/lib/langdefscanner.ll b/src/lib/langdefscanner.ll index 6104bab..e9ff1de 100644 --- a/src/lib/langdefscanner.ll +++ b/src/lib/langdefscanner.ll @@ -1,6 +1,6 @@ %{ /* - * Copyright (C) 1999-2005, Lorenzo Bettini, http://www.lorenzobettini.it + * Copyright (C) 1999-2007, Lorenzo Bettini, http://www.lorenzobettini.it * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,6 +22,9 @@ #include "langdefparser.h" #include "langdefscanner.h" #include "fileutil.h" +#include "regexpreprocessor.h" +#include "stringdef.h" +#include "messages.h" #include <stack> @@ -40,6 +43,7 @@ static std::ostringstream buff; static void buffer(const char *s); static void buffer_escape(const char *c); static const std::string *flush_buffer(); +static StringDef *flush_buffer_preproc(); static void open_include_file(const char *file); static void close_include_file(); @@ -69,11 +73,11 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])* STRING \"[^\n"]+\" -%s COMMENT_STATE STRING_STATE REGEXP_STATE INCLUDE_STATE +%s COMMENT_STATE STRING_STATE REGEXP_STATE REGEXP_NOPREPROC_STATE INCLUDE_STATE %% -[ \t] {} +<INITIAL,COMMENT_STATE,INCLUDE_STATE>[ \t] {} \r {} @@ -130,21 +134,38 @@ STRING \"[^\n"]+\" <INITIAL>"=" { return '=' ; } <INITIAL>"," { return ',' ; } <INITIAL>"+" { return '+' ; } +<INITIAL>"(" { updateTokenInfo(); return '(' ; } +<INITIAL>")" { return ')' ; } <INITIAL>\" { BEGIN(STRING_STATE) ; } <STRING_STATE>("*"|"."|"?"|"+"|"("|")"|"{"|"}"|"["|"]"|"^"|"$") { buffer_escape( yytext ) ; } <STRING_STATE>\\\| { buffer( yytext ) ; } <STRING_STATE>\\\\ { buffer( yytext ) ; } +<STRING_STATE>\\[[:digit:]] { + printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ; + exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ; + } <STRING_STATE>"\\\"" { buffer( yytext ) ; } <STRING_STATE>\" { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; } <STRING_STATE>[^\n] { buffer( yytext ) ; } <INITIAL>\' { BEGIN(REGEXP_STATE) ; } <REGEXP_STATE>\\\\ { buffer( yytext ) ; } +<REGEXP_STATE>\\[[:digit:]] { + printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ; + exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ; + } <REGEXP_STATE>"\\'" { buffer( "'" ) ; } -<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; } +<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.stringdef = flush_buffer_preproc() ; DEB2("REGEXPDEF",langdef_lval.string); return REGEXPDEF; } <REGEXP_STATE>[^\n] { buffer( yytext ) ; } +<INITIAL>` { BEGIN(REGEXP_NOPREPROC_STATE) ; } +<REGEXP_NOPREPROC_STATE>\\\\ { buffer( yytext ) ; } +<REGEXP_NOPREPROC_STATE>"\\`" { buffer( "'" ) ; } +<REGEXP_NOPREPROC_STATE>` { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("REGEXPNOPREPROCDEF",langdef_lval.string); return REGEXPNOPREPROC; } +<REGEXP_NOPREPROC_STATE>[^\n] { buffer( yytext ) ; } + + <INITIAL>{nl} { DEB("NEWLINE"); ++(parsestruct->line) ; } <INITIAL>. { return yytext[0] ; } @@ -168,6 +189,13 @@ const std::string *flush_buffer() return ret; } +StringDef *flush_buffer_preproc() +{ + StringDef *sd = new StringDef(RegexPreProcessor::preprocess(buff.str()), buff.str()); + buff.str(""); + return sd; +} + void _open_file_to_scan(const string &path, const string &name) { langdef_in = open_data_file_stream(path, name); diff --git a/src/lib/langelem.cpp b/src/lib/langelem.cpp deleted file mode 100644 index f3b53af..0000000 --- a/src/lib/langelem.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// C++ Implementation: %{MODULE} -// -// Description: -// -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "langelem.h" - -using namespace std; - -LangElem::LangElem(const string &n) : name(n), redef(false), subst(false) -{ -} - - -LangElem::~LangElem() -{ -} - -const std::string -LangElem::toString() const -{ - return name; -} - diff --git a/src/lib/langelem.h b/src/lib/langelem.h index 6bf51c8..fcf0f5a 100644 --- a/src/lib/langelem.h +++ b/src/lib/langelem.h @@ -33,12 +33,19 @@ class LangElem : public ParserInfo bool subst; // whether this substitutes an existing language element public: - LangElem(const std::string &n); + LangElem(const std::string &n) : name(n), redef(false), subst(false) {} - virtual ~LangElem(); + virtual ~LangElem() {} const std::string getName() const { return name; } - virtual const std::string toString() const; + virtual const std::string toString() const { return name; } + + /** + * return the original string representation of this element; + * this must be defined by subclasses + */ + virtual const std::string toStringOriginal() const = 0; + bool isRedef() const { return redef; } void setRedef() { redef = true; } bool isSubst() const { return subst; } diff --git a/src/lib/langelems.cpp b/src/lib/langelems.cpp index efcd49c..3788d8c 100644 --- a/src/lib/langelems.cpp +++ b/src/lib/langelems.cpp @@ -33,6 +33,12 @@ LangElems::toString() const return toStringCollection<LangElems>(this, '\n'); } +const string +LangElems::toStringOriginal() const +{ + return toStringOriginalCollection<LangElems>(this, '\n'); +} + void LangElems::add(LangElem *el) { push_back(el); diff --git a/src/lib/langelems.h b/src/lib/langelems.h index d8eb321..ff8548f 100644 --- a/src/lib/langelems.h +++ b/src/lib/langelems.h @@ -59,6 +59,7 @@ class LangElems : protected list<LangElem *> void subst(LangElem *el); const std::string toString() const; + const std::string toStringOriginal() const; // doublecpp: dispatch methods, DO NOT MODIFY public: virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state); diff --git a/src/lib/languageinfer.cpp b/src/lib/languageinfer.cpp index 4e5962c..4eca066 100644 --- a/src/lib/languageinfer.cpp +++ b/src/lib/languageinfer.cpp @@ -43,6 +43,10 @@ const string LanguageInfer::infer( istream &stream ) boost::regex langRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*([[:alnum:]]+)"); // the regular expression for finding the language specification in a script file + // this such as #! /usr/bin/env perl + boost::regex langEnvRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*(?:env)[[:blank:]]+([[:alnum:]]+)"); + + // the regular expression for finding the language specification in a script file // according to Emacs convention: # -*- language -*- boost::regex langRegExEmacs("-\\*-[[:blank:]]*([[:alnum:]]+).*-\\*-"); @@ -61,6 +65,7 @@ const string LanguageInfer::infer( istream &stream ) read_line(&stream, secondLine); boost::match_results<std::string::const_iterator> what; + boost::match_results<std::string::const_iterator> whatEnv; boost::match_results<std::string::const_iterator> whatEamcs; // first try the emacs specification @@ -77,12 +82,19 @@ const string LanguageInfer::infer( istream &stream ) return whatEamcs[1]; } - // try the sha-bang specification + // try also the env specification + boost::regex_search(firstLine, + whatEnv, langEnvRegEx, boost::match_default); + + if (whatEnv[1].matched) + return whatEnv[1]; + + // finally try the sha-bang specification boost::regex_search(firstLine, what, langRegEx, boost::match_default); if (what[1].matched) return what[1]; - + return ""; } diff --git a/src/lib/linebuffer.cpp b/src/lib/linebuffer.cpp deleted file mode 100644 index 66d3b4b..0000000 --- a/src/lib/linebuffer.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// C++ Implementation: linebuffer -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "linebuffer.h" - -LineBuffer::LineBuffer() -{ -} - - -LineBuffer::~LineBuffer() -{ -} - -bool LineBuffer::empty() const -{ - return (buffer.str().size() == 0 && post.size() == 0); -} diff --git a/src/lib/linebuffer.h b/src/lib/linebuffer.h index f55115b..e4d64d0 100644 --- a/src/lib/linebuffer.h +++ b/src/lib/linebuffer.h @@ -33,8 +33,8 @@ class LineBuffer PostContents post; // to be generated after the line public: - LineBuffer(); - ~LineBuffer(); + LineBuffer() {} + ~LineBuffer() {} void output(const std::string &s) { buffer << s; } void output_post(const std::string &s) { post.insert(s); } @@ -42,7 +42,7 @@ class LineBuffer const std::string getContents() const { return buffer.str(); } const PostContents &getPostContents() const { return post; } - bool empty() const; + bool empty() const { return (buffer.str().size() == 0 && post.size() == 0); } }; typedef boost::shared_ptr<LineBuffer> LineBufferPtr; diff --git a/src/lib/lineoutputgenerator.cpp b/src/lib/lineoutputgenerator.cpp deleted file mode 100644 index bcdb3e0..0000000 --- a/src/lib/lineoutputgenerator.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// -// C++ Implementation: lineoutputgenerator -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "lineoutputgenerator.h" - -#include <iomanip> -#include "linenumdigit.h" -#include "my_sstream.h" -#include "maingeneratormap.h" -#include "textgenerator.h" -#include "textstyle.h" - -using namespace std; - -static SubstitutionMapping substitutionmapping; - -LineOutputGenerator::LineOutputGenerator(ostream& os, - TextStyle *anch, bool genref, const string &prefix, - const std::string &line_pref): - OutputGenerator(os, line_pref), generate_ref(genref && ! anch->empty()), - anchor(anch), anchor_line_prefix(prefix), line_num(1) -{ - line_num_generator = generatormap->getGenerator("linenum"); -} - - -LineOutputGenerator::~LineOutputGenerator() -{ -} - - -void LineOutputGenerator::generate_line(const string &line) -{ - generate_line_info(); - ++line_num; - OutputGenerator::generate_line(line); -} - -void LineOutputGenerator::reset() -{ - OutputGenerator::reset(); - line_num = 1; -} - -void -LineOutputGenerator::generate_line_info() -{ - ostringstream sout; - - sout << std::setw (line_num_digit) << std::setfill ('0') - << line_num << ":"; - - string line_str = line_num_generator->generateEntire(sout.str().c_str()); - - if (generate_ref) { - ostringstream line_n; - line_n << anchor_line_prefix << line_num; - sout.str(""); - substitutionmapping["$text"] = line_str; - substitutionmapping["$linenum"] = line_n.str(); - sout << anchor->output(substitutionmapping); - line_str = sout.str(); - } - - output_string(line_str + " "); -} diff --git a/src/lib/lineoutputgenerator.h b/src/lib/lineoutputgenerator.h deleted file mode 100644 index 201abc1..0000000 --- a/src/lib/lineoutputgenerator.h +++ /dev/null @@ -1,48 +0,0 @@ -// -// C++ Interface: lineoutputgenerator -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#ifndef LINEOUTPUTGENERATOR_H -#define LINEOUTPUTGENERATOR_H - -#include <outputgenerator.h> - -class TextGenerator; -class TextStyle; - -/** -a specialized output generator that also generates the line number before each line - -@author Lorenzo Bettini -*/ -class LineOutputGenerator : public OutputGenerator -{ -private: - bool generate_ref; - TextStyle *anchor; - std::string anchor_line_prefix; - unsigned int line_num; - TextGenerator *line_num_generator; - -public: - LineOutputGenerator(std::ostream& os, - TextStyle *anch, bool genref, const std::string &prefix, - const std::string &line_pref); - - ~LineOutputGenerator(); - -protected: - virtual void generate_line(const std::string &line); - virtual void reset(); - - virtual void generate_line_info(); -}; - -#endif diff --git a/src/lib/maingeneratormap.cpp b/src/lib/maingeneratormap.cpp deleted file mode 100644 index 8ebb379..0000000 --- a/src/lib/maingeneratormap.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// -// C++ Implementation: maingeneratormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "maingeneratormap.h" - -GeneratorMap *generatormap; diff --git a/src/lib/maingeneratormap.h b/src/lib/maingeneratormap.h deleted file mode 100644 index f356940..0000000 --- a/src/lib/maingeneratormap.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// C++ Interface: maingeneratormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#ifndef MAINGENERATORMAP_H -#define MAINGENERATORMAP_H - -#include "generatormap.h" - -extern GeneratorMap *generatormap; - -#endif diff --git a/src/lib/messages.cc b/src/lib/messages.cc index b5439af..32c1df6 100644 --- a/src/lib/messages.cc +++ b/src/lib/messages.cc @@ -1,5 +1,5 @@ /* -** Copyright (C) 1999, 2000, Lorenzo Bettini <http://www.lorenzobettini.it> +** Copyright (C) 1999-2007, Lorenzo Bettini <http://www.lorenzobettini.it> ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by @@ -27,6 +27,8 @@ #include <stdlib.h> #include "messages.h" +#include "parserinfo.h" +#include "parsestruct.h" static Messages *_messager = 0 ; @@ -65,9 +67,16 @@ void printError( const std::string &s, ostream &stream ) { stream << s << endl; } +void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream ) { + printError(pinfo->filename, pinfo->line, s, stream); +} + void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream) { - stream << filename << ":" << line << ": " << error << endl; + stream << filename << ":"; + if (line) + stream << line << ": "; + stream << error << endl; } void setMessager( Messages *m ) { @@ -86,9 +95,29 @@ void exitError(const std::string &error) exit(EXIT_FAILURE); } +void exitError(const std::string &error, const ParserInfo *pinfo) +{ + exitError(pinfo->filename, pinfo->line, error); +} + +void exitError(const std::string &error, const ParseStruct *pinfo) +{ + exitError((pinfo->path.size() ? pinfo->path + "/" : "") + pinfo->file_name, pinfo->line, error); +} + +void exitError(const std::string &filename, unsigned int line, const std::string &error) +{ + cerr << PACKAGE << ": "; + printError(filename, line, error); + exit(EXIT_FAILURE); +} + void foundBug(const std::string &error, const std::string &file, int line) { cerr << PACKAGE << ": " << error << ", " << file << ":" << line << endl; + cerr << PACKAGE << ": " << "it looks like you found a bug of this program" << endl; + cerr << PACKAGE << ": " << "could you please send this output and the input file" << endl; + cerr << PACKAGE << ": " << "to the author of this program?" << endl; exit(EXIT_FAILURE); } diff --git a/src/lib/messages.h b/src/lib/messages.h index 833aa72..4fb5f8b 100644 --- a/src/lib/messages.h +++ b/src/lib/messages.h @@ -11,6 +11,9 @@ using std::ostream; using std::cerr; using std::endl; +class ParserInfo; +class ParseStruct; + class Messages { public: /// whether to print anything @@ -66,9 +69,13 @@ void printMessage_noln( const std::string &s, ostream &stream = cerr ) ; void printWarning( const char *s, ostream &stream = cerr ) ; void printError( const char *s, ostream &stream = cerr ) ; void printError( const std::string &s, ostream &stream = cerr ) ; +void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream = cerr ) ; void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream = cerr ); void memory_exhausted(); void exitError(const std::string &error); +void exitError(const std::string &error, const ParserInfo *pinfo); +void exitError(const std::string &error, const ParseStruct *pinfo); +void exitError(const std::string &filename, unsigned int line, const std::string &error); void foundBug(const std::string &error, const std::string &file, int line); bool shouldPrint(); diff --git a/src/lib/my_set.h b/src/lib/my_set.h deleted file mode 100644 index c7cce53..0000000 --- a/src/lib/my_set.h +++ /dev/null @@ -1,16 +0,0 @@ -// deal with namespace problems - -#ifndef _MY_SET_H -#define _MY_SET_H - -#include <set> - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_NAMESPACES -using std::set; -#endif - -#endif // _MY_SET_H diff --git a/src/lib/my_string.h b/src/lib/my_string.h deleted file mode 100644 index e22d2c7..0000000 --- a/src/lib/my_string.h +++ /dev/null @@ -1,11 +0,0 @@ -// deal with namespace problems - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif // HAVE_CONFIG_H - -#include <string> - -#ifdef HAVE_NAMESPACES -using std::string; -#endif diff --git a/src/lib/namedsubexpslangelem.cpp b/src/lib/namedsubexpslangelem.cpp new file mode 100644 index 0000000..c3a4201 --- /dev/null +++ b/src/lib/namedsubexpslangelem.cpp @@ -0,0 +1,47 @@ +// +// C++ Interface: NamedSubExpsLangElem +// +// Description: represents a regular expression made by many marked groups +// and each marked group represents a different language element +// +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#include "namedsubexpslangelem.h" +#include "stringdef.h" +#include "tostringcollection.h" + +using namespace std; + +NamedSubExpsLangElem::NamedSubExpsLangElem(const ElementNames *names, StringDef *def, + bool ex, bool al) : + StateStartLangElem("named subexps", ex, al), // "named subexps" is a bogus name + elementNames(names), regexpDef(def) +{ +} + +NamedSubExpsLangElem::~NamedSubExpsLangElem() { + if (elementNames) + delete elementNames; + if (regexpDef) + delete regexpDef; +} + +const std::string +NamedSubExpsLangElem::toString() const +{ + string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') + + regexpDef->toString(); + return res; +} + +const std::string +NamedSubExpsLangElem::toStringOriginal() const +{ + string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') + + regexpDef->toStringOriginal(); + return res; +} diff --git a/src/lib/namedsubexpslangelem.h b/src/lib/namedsubexpslangelem.h new file mode 100644 index 0000000..43126a4 --- /dev/null +++ b/src/lib/namedsubexpslangelem.h @@ -0,0 +1,50 @@ +// +// C++ Interface: NamedSubExpsLangElem +// +// Description: represents a regular expression made by many marked groups +// and each marked group represents a different language element +// +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef NAMEDSUBEXPSLANGELEM_H_ +#define NAMEDSUBEXPSLANGELEM_H_ + +#include "statestartlangelem.h" + +#include <list> + +class StringDef; + +typedef std::list<std::string> ElementNames; + +// doublecpp: forward declarations, DO NOT MODIFY +class RegExpStateBuilder; // file: regexpstatebuilder.h +class RegExpStatePointer; // file: regexpstatebuilder.h +// doublecpp: end, DO NOT MODIFY + +class NamedSubExpsLangElem : public StateStartLangElem +{ + const ElementNames *elementNames; + StringDef *regexpDef; +public: + NamedSubExpsLangElem(const ElementNames *names, StringDef *def, bool exit = false, bool all = false); + virtual ~NamedSubExpsLangElem(); + + virtual const std::string toString() const; + + virtual const std::string toStringOriginal() const; + + const ElementNames *getElementNames() const { return elementNames; } + const StringDef *getRegexpDef() const { return regexpDef; } + +// doublecpp: dispatch methods, DO NOT MODIFY +public: +virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state); +// doublecpp: end, DO NOT MODIFY +}; + +#endif /*NAMEDSUBEXPSLANGELEM_H_*/ diff --git a/src/lib/outlangdefparser.yy b/src/lib/outlangdefparser.yy index 4c8a28c..8caf551 100644 --- a/src/lib/outlangdefparser.yy +++ b/src/lib/outlangdefparser.yy @@ -1,6 +1,6 @@ %{ /* - * Copyright (C) 1999-2005 Lorenzo Bettini <http://www.lorenzobettini.it> + * Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,8 +24,6 @@ #include <iostream> #include <string> -#include "my_sstream.h" - #include "messages.h" #include "parsestruct.h" #include "outlangdefscanner.h" @@ -235,22 +233,13 @@ translation : REGEXDEF REGEXDEF %% -void -yyerror( const char *s ) -{ - ostringstream str ; - str << outlang_parsestruct->file_name << ":" << outlang_parsestruct->line << ": " << s; // << ", in option declaration"; - printError( str.str(), cerr ) ; - exit(EXIT_FAILURE); -} +extern int outlangdef_lex_destroy (void); -/* void -yyerror( const string &s ) +yyerror( const char *s ) { - yyerror(s.c_str()); + exitError(s, outlang_parsestruct); } -*/ TextStylesPtr parse_outlang_def() @@ -262,6 +251,10 @@ parse_outlang_def() outlangdef_parse(); delete outlang_parsestruct; outlang_parsestruct = 0; + + // release scanner memory + outlangdef_lex_destroy (); + return textstyles; } @@ -276,6 +269,10 @@ parse_outlang_def(const char *path, const char *name) outlangdef_parse(); delete outlang_parsestruct; outlang_parsestruct = 0; + + // release scanner memory + outlangdef_lex_destroy (); + return textstyles; } @@ -292,6 +289,10 @@ parse_outlang_def_file(const char *path, const char *name) outlangdef_parse(); delete outlang_parsestruct; outlang_parsestruct = 0; + + // release scanner memory + outlangdef_lex_destroy (); + return textstyles; } diff --git a/src/lib/outputgenerator.cpp b/src/lib/outputgenerator.cpp index c43fccf..abeb06c 100644 --- a/src/lib/outputgenerator.cpp +++ b/src/lib/outputgenerator.cpp @@ -4,59 +4,95 @@ // Description: // // -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007 // // Copyright: See COPYING file that comes with this distribution // // #include "outputgenerator.h" +#include <iomanip> +#include "linenumdigit.h" +#include "my_sstream.h" +#include "textgenerator.h" +#include "textstyle.h" + using namespace std; +/// used for line information generation +static SubstitutionMapping substitutionmapping; + OutputGenerator::OutputGenerator(ostream &os, const std::string &pref) : - output(os), line_prefix(pref), alwaysFlush(false) -{ + output(os), line_prefix(pref), alwaysFlush(false), generateLineInfo(false), + line_num(1) { +} + +OutputGenerator::OutputGenerator(ostream& os, TextGenerator *linegen, + TextStyle *anch, bool genref, const string &prefix, + const std::string &line_pref) : + output(os), line_prefix(line_pref), alwaysFlush(false), generateLineInfo(true), + generate_ref(genref && ! anch->empty()), anchor(anch), anchor_line_prefix(prefix), line_num(1), + line_num_generator(linegen) { } -OutputGenerator::~OutputGenerator() -{ +OutputGenerator::~OutputGenerator() { } -void -OutputGenerator::output_string(const string &s) -{ +void OutputGenerator::output_string(const string &s) { output << s; if (alwaysFlush) - flush(); + flush(); } -void -OutputGenerator::outputLine(const string &line) -{ - if (line_prefix.size()) - output_string(line_prefix); +void OutputGenerator::outputLine(const string &line) { + if (line_prefix.size()) + output_string(line_prefix); - output_string(line); + output_string(line); } -void -OutputGenerator::generateLine(const string &line) -{ - if (line_prefix.size()) - output_string(line_prefix); +void OutputGenerator::generateLine(const string &line) { + if (line_prefix.size()) + output_string(line_prefix); + + generate_line(line); +} + +void OutputGenerator::generate_line(const string &line) { + if (generateLineInfo) + generate_line_info(); + + ++line_num; - generate_line(line); + output_string(line); } -void -OutputGenerator::generate_line(const string &line) -{ - output_string(line); +void OutputGenerator::flush() { + output << std::flush; } -void -OutputGenerator::flush() -{ - output << std::flush; +void OutputGenerator::reset() { + line_num = 1; } + +void OutputGenerator::generate_line_info() { + ostringstream sout; + + sout << std::setw(line_num_digit)<< std::setfill('0')<< line_num << ":"; + + string line_str = line_num_generator->generateEntire(sout.str().c_str()); + + if (generate_ref) { + ostringstream line_n; + line_n << anchor_line_prefix << line_num; + sout.str(""); + substitutionmapping["$text"] = line_str; + substitutionmapping["$linenum"] = line_n.str(); + sout << anchor->output(substitutionmapping); + line_str = sout.str(); + } + + output_string(line_str + " "); +} + diff --git a/src/lib/outputgenerator.h b/src/lib/outputgenerator.h index fd85058..09ee1dc 100644 --- a/src/lib/outputgenerator.h +++ b/src/lib/outputgenerator.h @@ -14,6 +14,9 @@ #include <iostream> +class TextGenerator; +class TextStyle; + /** base class that actually writes the generated output to the output stream @@ -28,11 +31,26 @@ class OutputGenerator std::string line_prefix; /// whether to flush the stream at each output (default = false) bool alwaysFlush; + + /// whether to generate line information + bool generateLineInfo; + + // for line information + + bool generate_ref; + TextStyle *anchor; + std::string anchor_line_prefix; + unsigned int line_num; + TextGenerator *line_num_generator; public: OutputGenerator(std::ostream &os, const std::string &pref); + + OutputGenerator(std::ostream& os, TextGenerator *linegen, + TextStyle *anch, bool genref, const std::string &prefix, + const std::string &line_pref); - virtual ~OutputGenerator(); + ~OutputGenerator(); void setAlwaysFlush(bool b) { alwaysFlush = b; } @@ -61,10 +79,9 @@ class OutputGenerator void flush(); /** - * Resets the generator. By default it does nothing, but it can be - * overidden by subclasses + * Resets the generator (i.e., resets line number) */ - virtual void reset() {} + void reset(); protected: /** @@ -73,7 +90,12 @@ class OutputGenerator * * @param s */ - virtual void generate_line(const std::string &s); + void generate_line(const std::string &s); + + /** + * Generates line information + */ + void generate_line_info(); }; #endif diff --git a/src/lib/parserinfo.h b/src/lib/parserinfo.h index 9d6eee3..e2c54af 100644 --- a/src/lib/parserinfo.h +++ b/src/lib/parserinfo.h @@ -24,7 +24,7 @@ struct ParserInfo { std::string filename; // including path unsigned int line; - ParserInfo() {} + ParserInfo() : line(0) {} ParserInfo(const std::string &n) : filename(n) {} void setParserInfo(const std::string &name, unsigned int l) diff --git a/src/lib/parsestyles.h b/src/lib/parsestyles.h index a753917..6de12d1 100644 --- a/src/lib/parsestyles.h +++ b/src/lib/parsestyles.h @@ -19,7 +19,7 @@ class GeneratorFactory; void parseStyles(const std::string &path, const std::string &name, GeneratorFactory *generatorFactory, std::string &bodyBgColor) ; -void parseStyleError(const std::string &error) ; +void parseStyleError(const std::string &error, bool exit = true) ; /// for css style files void parseCssStyles(const std::string &path, const std::string &name, diff --git a/src/lib/readtags.c b/src/lib/readtags.c index 8cc0291..8a58827 100644 --- a/src/lib/readtags.c +++ b/src/lib/readtags.c @@ -1,5 +1,5 @@ /* -* $Id: readtags.c,v 1.7 2007年03月23日 18:13:11 bettini Exp $ +* $Id: readtags.c,v 1.8 2007年06月08日 10:11:30 bettini Exp $ * * Copyright (c) 1996-2003, Darren Hiebert * diff --git a/src/lib/readtags.h b/src/lib/readtags.h index ce32611..2bf2ccf 100644 --- a/src/lib/readtags.h +++ b/src/lib/readtags.h @@ -1,5 +1,5 @@ /* -* $Id: readtags.h,v 1.7 2007年03月23日 18:13:11 bettini Exp $ +* $Id: readtags.h,v 1.8 2007年06月08日 10:11:30 bettini Exp $ * * Copyright (c) 1996-2003, Darren Hiebert * diff --git a/src/lib/refgeneratormap.cpp b/src/lib/refgeneratormap.cpp deleted file mode 100644 index 8a1e30f..0000000 --- a/src/lib/refgeneratormap.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// -// C++ Implementation: refgeneratormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "refgeneratormap.h" -#include "messages.h" -#include "fileinfo.h" -#include "fileutil.h" -#include "mainoutputbuffer.h" -#include "preformatter.h" - -#include <boost/regex.hpp> -#include <list> - -using namespace std; - -bool isTaggable(const string &elem) -{ - return elem.find_first_of(' ') == string::npos; -} - -RefGeneratorMap::RefGeneratorMap(PreFormatter *pf, const string &_ctags_file_name, - const TextStyles::RefTextStyle &r, RefPosition pos) - : GeneratorMap(pf), ctags_file_name(_ctags_file_name), - refstyle(r), refposition(pos) -{ - ctags_file = tagsOpen (ctags_file_name.c_str(), &info); - if (ctags_file == 0) - { - exitError("cannot open tag file: " + ctags_file_name); - } -} - - -RefGeneratorMap::~RefGeneratorMap() -{ - tagsClose(ctags_file); -} - -//#define DEBUGREF -#ifdef DEBUGREF -#include <iostream> -#define DEB(x) cerr << x << endl; -#define DEB2(x) cerr << x ; -#else -#define DEB(x) ; -#define DEB2(x) ; -#endif - -static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)"); -static SubstitutionMapping ref_substitutionmapping; - -#define SPACE 2 -#define NOT_SPACE 1 - -/* -* separates a line in block of spaces and block of non spaces. -* the stringbuffer tokens stores the pieces seen so far for which no -* entry in the tag file was found. -* -* for each block of non spaces tries to look for an entry in the tag. -* if it finds it flushes the stringbuffer tokens (by passing its contents -* to the parent class implementation of generateString). -* -* For instance (notice the spaces among the +) -* "myfield + myfield2 + myfield3 -* if only an entry for myfield2 is found, the we will generate -* 3 blocks: -* "myfield + " -* "myfield2" -* " + myfield3" -*/ -const std::string RefGeneratorMap::generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo) -{ - if (isNoReference(elem)) - return GeneratorMap::generateString(elem, s, fileinfo); - - buffer.str(""); - ostringstream tokens; - - boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex); - boost::sregex_iterator j; - while (i != j) { - if ((*i)[SPACE].matched) { - tokens << string((*i)[SPACE].first, (*i)[SPACE].second); - } else { - string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second); - string found_refs = _generateString(elem, not_spaces, fileinfo); - if (found_refs.size()) { - const string &previous = tokens.str(); - if (previous.size()) { - buffer << GeneratorMap::generateString(elem, previous, fileinfo); - tokens.str(""); - } - buffer << found_refs; - } else { - tokens << not_spaces; - } - } - - ++i; - } - - const string &remainder = tokens.str(); - if (remainder.size()) { - buffer << GeneratorMap::generateString(elem, remainder, fileinfo); - } - - return buffer.str(); -} - -struct RefEntry -{ - string filename; - unsigned long linenumber; -}; - -const string RefGeneratorMap::_generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo) -{ - tagEntry entry; - bool found = false; // whether we found a tag - bool found_anchor = false; // whether we found an anchor - string output; - typedef list<RefEntry> FoundRefList; - FoundRefList foundreflist; - - DEB("inspecting " + s) - - if (tagsFind (ctags_file, &entry, s.c_str(), TAG_FULLMATCH) == TagSuccess) - { - found = true; - do - { - RefEntry refentry; - refentry.filename = entry.file; - if ((refentry.filename == fileinfo->filename || - refentry.filename == fileinfo->input_file_name) && - entry.address.lineNumber == fileinfo->line) { - ostringstream gen_info; - // we just found the reference to this very element - // so we must generate an anchor - gen_info << entry.address.lineNumber; - DEB(" found anchor " + gen_info.str()); - ref_substitutionmapping["$text"] = preformatter->preformat(s); - ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename); - ref_substitutionmapping["$infile"] = refentry.filename; - ref_substitutionmapping["$linenum"] = gen_info.str(); - output = refstyle.anchor.output(ref_substitutionmapping); - found_anchor = true; - break; - } - DEB2(" found " + string(entry.name) + " : "); - DEB(entry.address.lineNumber); - refentry.linenumber = entry.address.lineNumber; - foundreflist.push_back(refentry); - } while (tagsFindNext (ctags_file, &entry) == TagSuccess); - } - - if (found) { - if (! found_anchor) { - ref_substitutionmapping["$text"] = preformatter->preformat(s); - TextStyle *referencestyle = 0; - if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE) - referencestyle = &(refstyle.postline_reference); - else if (refposition == POSTDOC) - referencestyle = &(refstyle.postdoc_reference); - else - referencestyle = &(refstyle.inline_reference); - - for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) { - ostringstream gen_info; - // we found where this element appears so we generate a reference - // if it's a link in the same file, we use the output_file_name... - if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name) - gen_info << fileinfo->output_file_name; - else - gen_info << it->filename << fileinfo->output_file_extension; - // ...otherwise we build the referenced file by using the output_file_extension - // in fact, in this case, it probably means that multiple input files have been specified - - ref_substitutionmapping["$outfile"] = gen_info.str(); - ref_substitutionmapping["$infilename"] = strip_file_path(it->filename); - ref_substitutionmapping["$infile"] = it->filename; - - gen_info.str(""); - gen_info << it->linenumber; - ref_substitutionmapping["$linenum"] = gen_info.str(); - output += referencestyle->output(ref_substitutionmapping); - - // if the following is true, it means that there more than one reference - if (foundreflist.size() > 1 || refposition != INLINE) { - output += preformatter->preformat("\n"); - - if (refposition == POSTLINE || refposition == INLINE) { - outputbuffer->output_postline(output); - } else { // (refposition == POSTDOC) - outputbuffer->output_post(output); - } - - output = ""; // no need to modify the current element - } - } - } - } - - return output; -} diff --git a/src/lib/refgeneratormap.h b/src/lib/refgeneratormap.h deleted file mode 100644 index 3db34dc..0000000 --- a/src/lib/refgeneratormap.h +++ /dev/null @@ -1,49 +0,0 @@ -// -// C++ Interface: refgeneratormap -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#ifndef REFGENERATORMAP_H -#define REFGENERATORMAP_H - -#include <generatormap.h> - -#include "my_sstream.h" -#include "readtags.h" -#include "textstyles.h" - -typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition; - -/** -A specialized GeneratorMap that also generates anchors and references, by using ctags information - -@author Lorenzo Bettini -*/ -class RefGeneratorMap : public GeneratorMap -{ -private: - const std::string ctags_file_name; - TextStyles::RefTextStyle refstyle; - RefPosition refposition; - tagFile *ctags_file; - tagFileInfo info; - std::ostringstream buffer; - -public: - RefGeneratorMap(PreFormatter *pf, const std::string &_ctags_file_name, - const TextStyles::RefTextStyle &r, RefPosition pos); - - ~RefGeneratorMap(); - -protected: - virtual const std::string generateString(const std::string& elem, const std::string& s, const FileInfo* arg1); - const std::string _generateString(const std::string& elem, const std::string& s, const FileInfo* arg1); -}; - -#endif diff --git a/src/lib/refposition.h b/src/lib/refposition.h new file mode 100644 index 0000000..f80c9e0 --- /dev/null +++ b/src/lib/refposition.h @@ -0,0 +1,8 @@ +#ifndef REFPOSITION_H +#define REFPOSITION_H + +// where a reference must be put + +typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition; + +#endif // REFPOSITION_H diff --git a/src/lib/regexpengine.cpp b/src/lib/regexpengine.cpp new file mode 100644 index 0000000..37d377d --- /dev/null +++ b/src/lib/regexpengine.cpp @@ -0,0 +1,265 @@ +// +// C++ Implementation: regexpengine +// +// Description: +// +// +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "regexpengine.h" + +RegExpEngine::~RegExpEngine() { +} + +#include <fstream> +#include <iostream> +#include <stdlib.h> + +#include "textformatter.h" +#include "keys.h" +#include "langdefloader.h" +#include "messages.h" +#include "parserinfo.h" + +// purpose: +// takes the contents of a file and transform to +// syntax highlighted code in html format + +using namespace std; + +typedef enum {FOUND_EOF=0, FOUND_NL, FOUND_END} load_line_ret; + +load_line_ret load_line(std::string& s, std::istream& is) { + s.erase(); + if (is.bad()|| is.eof()) + return FOUND_EOF; + + char c; + while (is.get(c)) { + if (c == '\n') + return FOUND_NL; + if (c != '\r') + s.append(1, c); + } + + return FOUND_END; +} + +void RegExpEngine::process_file(const char *file) { + istream *is = 0; + + if (file) { + is = new ifstream(file); + if (!is || ! (*is)) { + cerr << "Error in opening " << file<< " for input" << endl; + exit(1); + } + } else + is = &cin; + + std::string s; + + std::string::const_iterator start, end; + boost::smatch match; + boost::smatch what; + boost::match_flag_type flags; + + // the regexp state we try at the moment. + RegExpStatePtr alternative; + + // the regexp state matching best; + RegExpStatePtr best_alternative; + + initial_state = currentstate; + + fileinfo->line = 1; + + // for selecting the formatter + int index_of_formatter = 0; + // for selecting the subexpression (or the whole expression) + int index_of_subexpression = 0; + + int smallest_prefix = -1; + int biggest_length = -1; + + string prefix; + + load_line_ret ret; + while ((ret = load_line(s, *is)) != FOUND_EOF) { + bool matched = true; + bool found_eol = false; + start = s.begin(); + end = s.end(); + // reset the flags + flags = boost::match_default; + + // always start with the current state + alternative = currentstate; + + while (matched) { + matched = false; + + if (alternative->has_alternative()) { + // this means that the state contains a list of alternative states + // so we must try to match all the states and use the one that matches best + // i.e., with the smallest prefix and the biggest match length + smallest_prefix = -1; + biggest_length = -1; + while (alternative.get()) { + if (boost::regex_search(start, end, match, + alternative->reg_exp, flags)) { + const std::string &match_prefix = match.prefix(); + if (smallest_prefix < 0 || (boost::smatch::size_type)smallest_prefix >= match_prefix.size()) { + if ((boost::smatch::size_type)smallest_prefix > match_prefix.size()|| biggest_length < 0 || (boost::smatch::difference_type)biggest_length < match.length()) { + prefix = match.prefix(); + smallest_prefix = match_prefix.size(); + biggest_length = match.length(); + best_alternative = alternative; + matched = true; + // copy it, otherwise the next call will overwrite it + what = match; + } + } + } + alternative = alternative->alternative; + } + + if (matched) { + // store the one that matched best + alternative = best_alternative; + } else { + // reset the original current_state + alternative = currentstate; + } + } else if (boost::regex_search(start, end, what, + alternative->reg_exp, flags)) { + // otherwise, all the alternatives of the state are stored as + // a big alternation, where all alternatives are grouped + prefix = what.prefix(); + matched = true; + } + + if (matched) { + if (alternative->hasMarkedAlternatives) { + // we must inspect all the sub_matches + // to find the subexpression that matched + for (unsigned int i = 1; i < what.size(); ++i) { + if (what[i].matched) { + index_of_formatter = i; + index_of_subexpression = i; + break; // no other match is possible + } + } + } else { + // the alternative state contains only one expression, with + // marked subexpressions, so we must format the whole match + + // the formatter at 0 is the normal formatter + index_of_formatter = 1; + // we select the whole match + index_of_subexpression = 0; + + // this is OK also in the case when allAlternativesCanMatch: + // we consider the whole expression, and all formatters share the + // same exit state, so we can use the first one + } + + // part that possibly did not match + if (prefix.size()) + format(-1, alternative, prefix); + + if (alternative->allAlternativesCanMatch) { + // we must format each subexpression that matched + for (unsigned int i = 1; i < what.size(); ++i) { + if (what[i].matched) { + format(i, alternative, what[i]); + } + } + + // only the last formatter has the correct next state + index_of_formatter = what.size() - 1; + } else { + // format the part that matched + format(index_of_formatter, alternative, + what[index_of_subexpression]); + } + + if (alternative->formatters[index_of_formatter]->getNextState()) { + // we must enter another state + enterState(alternative, index_of_formatter); + } else if (alternative->formatters[index_of_formatter]->exit_state_level) { + if (alternative->formatters[index_of_formatter]->exit_all) { + // we must go back to the outermost state + exitAll(); + } else { + // we must get back to exit_state_level states + exitState(alternative->formatters[index_of_formatter]->exit_state_level); + } + } + + // now let's continue with what's left of the original input + start = what[index_of_subexpression].second; + if (!(*start)) { + if (found_eol) + matched = false; // we had already matched end of line + + // we might need to match the eol itself, so let's perform another loop + found_eol = true; + } + + if (what[0].first != what[0].second) { + // we actually consumed something, so the start of the string + // must not be interpreted as the beginning of the line + flags |= boost::match_not_bol; + } + + // we always search for the next match by using the original current state + alternative = currentstate; + } else { + // format the non-matching part as normal + format(-1, alternative, string(start, end)); + matched = false; + } + } + + if (ret == FOUND_NL) + formatter->format_nl("\n"); + + (fileinfo->line)++; + } + + // make sure we flush all the buffered parts + formatter->flush(); + + if (file) + delete is; + + currentstate = initial_state; // reset the initial state +} + +void RegExpEngine::enterState(RegExpStatePtr state, int index) { + states_stack.push(currentstate); + currentstate = state->formatters[index]->getNextState(); +} + +void RegExpEngine::exitState(int level) { + // remove additional levels + for (int l = 1; l < level; ++l) + states_stack.pop(); + + currentstate = states_stack.top(); + states_stack.pop(); +} + +void RegExpEngine::exitAll() { + currentstate = initial_state; + states_stack = stack_of_states(); +} + +void RegExpEngine::format(int index, RegExpStatePtr state, const std::string &s) { + formatter->format(state->get_elem(index), s, fileinfo); +} + diff --git a/src/regexpengine.h b/src/lib/regexpengine.h index b2e9976..aefa716 100644 --- a/src/regexpengine.h +++ b/src/lib/regexpengine.h @@ -22,35 +22,34 @@ class TextFormatter; #include "fileinfo.h" /** -the class that actually performs regular expression processing + the class that actually performs regular expression processing -@author Lorenzo Bettini -*/ -class RegExpEngine -{ - protected: + @author Lorenzo Bettini + */ +class RegExpEngine { +protected: RegExpStatePtr currentstate, initial_state; FileInfo *fileinfo; - private: +private: TextFormatter *formatter; typedef std::stack<RegExpStatePtr> stack_of_states; stack_of_states states_stack; - protected: - virtual void enterState(int index); +protected: + virtual void enterState(RegExpStatePtr state, int index); virtual void exitState(int level); virtual void exitAll(); - virtual void format(int index, const std::string &s); + virtual void format(int index, RegExpStatePtr state, const std::string &s); public: - RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) : - currentstate(v), fileinfo(f), formatter(pre) - {} + RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) : + currentstate(v), fileinfo(f), formatter(pre) { + } - virtual ~RegExpEngine(); + virtual ~RegExpEngine(); - void process_file(const char *file); + void process_file(const char *file); }; typedef boost::shared_ptr<RegExpEngine> RegExpEnginePtr; diff --git a/src/regexpenginedebug.cpp b/src/lib/regexpenginedebug.cpp index 886af8d..7e8ad78 100644 --- a/src/regexpenginedebug.cpp +++ b/src/lib/regexpenginedebug.cpp @@ -25,11 +25,22 @@ RegExpEngineDebug::~RegExpEngineDebug() { } +void printRegExpState(RegExpStatePtr state) +{ + cout << state->reg_exp; + RegExpStatePtr alternative = state->alternative; + while (alternative.get()) { + cout << "\n" << " " << alternative->reg_exp; + alternative = alternative->alternative; + } +} -void RegExpEngineDebug::enterState(int index) +void RegExpEngineDebug::enterState(RegExpStatePtr state, int index) { - cout << "entering: " << currentstate->formatters[index]->getNextState()->reg_exp << endl; - RegExpEngine::enterState(index); + cout << "entering: "; + printRegExpState(state->formatters[index]->getNextState()); + cout << endl; + RegExpEngine::enterState(state, index); //step(); } @@ -44,27 +55,32 @@ void RegExpEngineDebug::exitState(int level) { RegExpEngine::exitState(level); - cout << "exiting " << level << " level(s): " << currentstate->reg_exp << endl; + cout << "exiting " << level << " level(s): "; + printRegExpState(currentstate); + cout << endl; //step(); } void printInfo(const SubExpressionInfo &e) { + if (!e.second.line) + return; // it concerns a subexpressions + cout << e.second.filename << ":" << e.second.line << ": " << e.first << endl; } -void RegExpEngineDebug::format(int index, const std::string& s) +void RegExpEngineDebug::format(int index, RegExpStatePtr state, const std::string& s) { - RegExpEngine::format(index, s); + RegExpEngine::format(index, state, s); if(index >= 0) { unsigned int i = (unsigned int)index; - assert(i <= currentstate->subExpressions.size()); + assert(i <= state->subExpressions.size()); - printInfo(currentstate->subExpressions[i-1]); + printInfo(state->subExpressions[i-1]); } - cout << "formatting: \"" << s << "\" as " << currentstate->get_elem(index) << endl; + cout << "formatting: \"" << s << "\" as " << state->get_elem(index) << endl; step(); } diff --git a/src/regexpenginedebug.h b/src/lib/regexpenginedebug.h index d14a5bd..b673d4f 100644 --- a/src/regexpenginedebug.h +++ b/src/lib/regexpenginedebug.h @@ -32,10 +32,10 @@ protected: /// whether it's an interactive debug session bool interactive; - virtual void enterState(int index); + virtual void enterState(RegExpStatePtr state, int index); virtual void exitAll(); virtual void exitState(int level); - virtual void format(int index, const std::string& s); + virtual void format(int index, RegExpStatePtr state, const std::string& s); /** * Waits for a step command (if in interactive mode) diff --git a/src/lib/regexpreprocessor.cpp b/src/lib/regexpreprocessor.cpp index f0f7e5f..516c2e9 100644 --- a/src/lib/regexpreprocessor.cpp +++ b/src/lib/regexpreprocessor.cpp @@ -1,10 +1,10 @@ // -// C++ Implementation: %{MODULE} +// C++ Implementation: RegexPreProcessor // -// Description: +// Description: performs operations or inspections on a string representing +// a valid regular expression // -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007 // // Copyright: See COPYING file that comes with this distribution // @@ -16,71 +16,189 @@ using namespace std; +// IMPORTANT: the following regular expressions assume that the +// regular expression they try to match is a valid regular expression + +// matches character sets in a regular expression const boost::regex char_set_exp("\\[([^\\|]*)\\]"); -RegexPreProcessor::RegexPreProcessor() -{ -} +// substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char +const boost::regex from("(\\\\\\()|(\\((?!\\?))"); +const string into = "(?1\\\\\\()(?2\\(\\?\\:)"; +// found actual marking parenthesis, i.e., not preceeded by \\ and not followed by ? +const boost::regex paren("(?<!\\\\)\\((?!\\?)"); -RegexPreProcessor::~RegexPreProcessor() -{ -} +// regular expression matching a backreference, e.g., 1円 or inside a conditional (?(1)...) +const boost::regex backreference("(\\\\[[:digit:]])|(\\(\\?\\([[:digit:]])"); const string -RegexPreProcessor::preprocess(const string &s) -{ - // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char - boost::regex from("(\\\\\\()|(\\((?!\\?))"); - string into = "(?1\\\\\\()(?2\\(\\?\\:)"; + subexpressions_info::ERR_OUTER_UNMARKED = "unmarked subexpressions are allowed only inside marked subexpressions"; +const string + subexpressions_info::ERR_NESTED_SUBEXP = "subexpressions of subexpressions are not allowed"; +const string subexpressions_info::ERR_UNBALANCED_PAREN = "unbalanced parenthesis"; +const string + subexpressions_info::ERR_OUTSIDE_SUBEXP = "parts outside marked subexpressions are not allowed"; - return boost::regex_replace(s, from, into, boost::match_default | boost::format_all); +RegexPreProcessor::RegexPreProcessor() { } -const string -_make_nonsensitive(const string &s) -{ - ostringstream result; +RegexPreProcessor::~RegexPreProcessor() { +} + +const string RegexPreProcessor::preprocess(const string &s) { + // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char + return boost::regex_replace(s, from, into, + boost::match_default | boost::format_all); +} + +const string _make_nonsensitive(const string &s) { + ostringstream result; - for (string::const_iterator it = s.begin(); it != s.end(); ++it) - if (isalpha(*it)) - result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]"; - else - result << *it; + for (string::const_iterator it = s.begin(); it != s.end(); ++it) + if (isalpha(*it)) + result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]"; + else + result << *it; - return result.str(); + return result.str(); } -const string -RegexPreProcessor::make_nonsensitive(const string &s) -{ - boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp); - boost::sregex_iterator m2; +const string RegexPreProcessor::make_nonsensitive(const string &s) { + boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp); + boost::sregex_iterator m2; - if (m1 == m2) - return _make_nonsensitive(s); + if (m1 == m2) + return _make_nonsensitive(s); - ostringstream buffer; - string prefix; - string suffix; + ostringstream buffer; + string prefix; + string suffix; - for (boost::sregex_iterator it = m1; it != m2; ++it) - { - prefix = it->prefix(); - suffix = it->suffix(); + for (boost::sregex_iterator it = m1; it != m2; ++it) { + prefix = it->prefix(); + suffix = it->suffix(); - if (prefix.size()) { - buffer << _make_nonsensitive(prefix); + if (prefix.size()) { + buffer << _make_nonsensitive(prefix); + } + + buffer << (*it)[0]; } - buffer << (*it)[0]; - } + if (suffix.size()) { + buffer << _make_nonsensitive(suffix); + } - if (suffix.size()) { - buffer << _make_nonsensitive(suffix); - } + return buffer.str(); +} - return buffer.str(); +unsigned int RegexPreProcessor::num_of_subexpressions(const string &s) +{ + boost::sregex_iterator m1(s.begin(), s.end(), paren); + boost::sregex_iterator m2; + + int counter = 0; + + for (boost::sregex_iterator it = m1; it != m2; ++it) + { + ++counter; + } + + return counter; } +const subexpressions_strings *RegexPreProcessor::split_marked_subexpressions(const string &s) { + boost::sregex_iterator m1(s.begin(), s.end(), paren); + boost::sregex_iterator m2; + + // we don't need to parse it (we can use the regex) since we assume that + // the regular expression represented by s is made up of only + // marked subexpressions and no nested subexpressions and char outside subexpressions + + subexpressions_strings *split = new subexpressions_strings; + + for (boost::sregex_iterator it = m1; it != m2; ) + { + string prefix = it->prefix(); + if (prefix.size()) + split->push_back("(" + prefix); + + string suffix = it->suffix(); + if (++it == m2) + split->push_back("(" + suffix); + } + + return split; +} + +subexpressions_info RegexPreProcessor::num_of_marked_subexpressions(const string &s) { + subexpressions_info sexps; + + // number of open parenthesis + int open_paren_num = 0; + // whether we're inside a marked subexpressions + bool found_marked_subexp = false; + // len of string + int len = s.size(); + // char we're examining + char c; + + for (int i = 0; i < len; ++i) { + c = s[i]; + if (c == '\\' && (i+1) < len && (s[i+1] == '(' || s[i+1] == ')')) { + // skip the escaped paren + ++i; + } else if (c == '(') { + // we found a subexp + ++open_paren_num; + + if ((i+1) < len && s[i+1] == '?') { + if (!found_marked_subexp) { + // outer subexpressions must be marked + sexps.errors = subexpressions_info::ERR_OUTER_UNMARKED; + return sexps; + } + } else { + // it's a marked subexp + if (found_marked_subexp) { + // we don't allow nested subexpressions + sexps.errors = subexpressions_info::ERR_NESTED_SUBEXP; + return sexps; + } + + // we found a marked subexp + found_marked_subexp = true; + ++(sexps.marked); + } + } else if (c == ')') { + if (!open_paren_num) { + // unbalanced parenthesis + sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN; + return sexps; + } + + --open_paren_num; + + // end of marked subexp + if (!open_paren_num && found_marked_subexp) + found_marked_subexp = false; + } else { + // we don't allow non marked parts + if (!found_marked_subexp) { + sexps.errors = subexpressions_info::ERR_OUTSIDE_SUBEXP; + return sexps; + } + } + } + + // check that all paren are closed + if (open_paren_num) + sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN; + return sexps; +} + +bool RegexPreProcessor::contains_backreferences(const std::string &s) { + return boost::regex_search(s, backreference); +} diff --git a/src/lib/regexpreprocessor.h b/src/lib/regexpreprocessor.h index 0842c6a..8625023 100644 --- a/src/lib/regexpreprocessor.h +++ b/src/lib/regexpreprocessor.h @@ -1,10 +1,10 @@ // -// C++ Interface: %{MODULE} +// C++ Interface: RegexPreProcessor // -// Description: +// Description: performs operations or inspections on a string representing +// a valid regular expression // -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007 // // Copyright: See COPYING file that comes with this distribution // @@ -13,20 +13,90 @@ #define REGEXPREPROCESSOR_H #include <string> +#include <list> + +/** + * info about subexpressions + */ +struct subexpressions_info { + /// errors for subexpression checking + const static std::string ERR_OUTER_UNMARKED; + const static std::string ERR_NESTED_SUBEXP; + const static std::string ERR_UNBALANCED_PAREN; + const static std::string ERR_OUTSIDE_SUBEXP; + + /// num of marked subexpressions + unsigned int marked; + /// error specifications, if any + std::string errors; + + subexpressions_info() : marked(0) {} +}; + +/** + * all the marked subexpressions in a list + */ +typedef std::list<std::string> subexpressions_strings; /** -preprocess a regular expression, e.g., transform "()" into "(?:)" + preprocess a regular expression, e.g., transform "()" into "(?:)" -@author Lorenzo Bettini -*/ -class RegexPreProcessor{ + @author Lorenzo Bettini + */ +class RegexPreProcessor { public: RegexPreProcessor(); ~RegexPreProcessor(); + /** + * translates marked subexpressions (...) into non marked subexpressions (?: ) + * @return the translated string + */ static const std::string preprocess(const std::string &s); + + /** + * translates the expression into a case nonsensitive expression, i.e., + * foo is translated into [Ff][Oo][Oo] + * @return the translated string + */ static const std::string make_nonsensitive(const std::string &s); + + /** + * counts the number of marked subexpressions (...) + * @return the number of marked subexpressions + */ + static unsigned int num_of_subexpressions(const std::string &s); + + /** + * check that the expressions is made up of marked subexpressions (...) + * and no nested subexpressions and no char outside subexpressions + * + * @return the struct containing the number of marked subexpressions + * and possible errors + */ + static subexpressions_info num_of_marked_subexpressions(const std::string &s); + + /** + * Splits the marked subexpressions of a regular expression made up of only + * marked subexpressions and no nested subexpressions and char outside subexpressions + * (thus, before calling this, you must make sure that num_of_marked_subexpressions + * did not return an error. + * + * @return the subexpressions in a collection (this is allocated on the heap, so + * it is up to the caller to delete it) + */ + static const subexpressions_strings *split_marked_subexpressions(const std::string &s); + + /** + * Checks whether the passed regular expression string contains + * a backreference (e.g., either 1円 or a conditional with a backreference + * (?(1)...) + * + * @return true if the passed regular expression string contains + * a backreference + */ + static bool contains_backreferences(const std::string &s); }; #endif diff --git a/src/lib/regexpstate.cpp b/src/lib/regexpstate.cpp new file mode 100644 index 0000000..af308ef --- /dev/null +++ b/src/lib/regexpstate.cpp @@ -0,0 +1,207 @@ +// +// C++ Implementation: RegExpState +// +// Description: as regular expression state: contains the regular expression to +// match and the formatters for each alternative. +// +// +// Author: Lorenzo Bettini, http://www.lorenzobettini.it, (C) 1999-2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "regexpstate.h" + +#include "messages.h" + +#include <stdlib.h> + +using namespace std; + +int RegExpState::global_id = 1; + +RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit, + bool all) : + elem(el), exit_state_level(exit), exit_all(all), next_state(r) { +} + +void RegExpFormatter::setNextState(RegExpStatePtr r) { + next_state_strong = r; +} + +RegExpStatePtr RegExpFormatter::getNextState() const { + RegExpStatePtr next = next_state.lock(); + if (!next) + return next_state_strong; + + return next; +} + +/** + * Return the formatter associated to the passed index. + * If the index is negative, it returns the default formatter. + * @param index + * @return + */ +const string &RegExpState::get_elem(int index) { + return formatters[(index<0 ? 0 : index)]->elem; +} + +void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo, + RegExpFormatterPtr f) { + if (alternative.get()) { + alternative->add_exp(s, parserInfo, f); + return; + } else { + if (nextAddMustCreateAnAlternative) { + // a previous operation had recorded the fact that the next + // add_exp should have been performed as a creation of an alternative + add_alternative(s, parserInfo, f); + + // but further addition on the alternative must not create further alternatives + alternative->nextAddMustCreateAnAlternative = false; + return; + } + } + + const string &ex = buffer.str(); + if (ex.size()) + buffer << "|"; + + buffer << s; + + formatters.push_back(f); + subExpressions.push_back(make_pair(s, *parserInfo)); +} + +void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo, + const format_vector &f) { + if (alternative.get()) { + alternative->add_exp(s, parserInfo, f); + return; + } + + RegExpState *myalternative; + + if (buffer.str()== "") { + // we must not add an alternative: this state is the alternative + myalternative = this; + } else { + // we must isolate this regexp in an alternative + alternative = RegExpStatePtr(new RegExpState); + + myalternative = alternative.get(); + } + + // the default formatter could be already set, and so we must + // transmit it right now, otherwise the alternative will never have it. + if (formatters[0].get()) { + myalternative->set_default_formatter(formatters[0]); + } + + // another add exp on the alternative must create another alternative + myalternative->nextAddMustCreateAnAlternative = true; + + myalternative->buffer << s; + + // if the default formatter is not set, reserve space for it + if (myalternative->formatters[0].get()) + ; + + std::copy(f.begin(), f.end(), back_inserter(myalternative->formatters)); + + // add a subexpression for each formatter (just to be consistent with + // other cases). + for (format_vector::const_iterator it = f.begin(); it != f.end(); ++it) + myalternative->subExpressions.push_back(make_pair(s, *parserInfo)); +} + +void RegExpState::add_alternative(const std::string &s, ParserInfo *parserInfo, + RegExpFormatterPtr f) { + if (!alternative.get()) { + if (buffer.str()== "") { + // we must not add an alternative: this state is the alternative + add_exp(s, parserInfo, f); + + // but the next add should be put in an alternative + nextAddMustCreateAnAlternative = true; + } else { + alternative = RegExpStatePtr(new RegExpState); + + // the default formatter could be already set, and so we must + // transmit it right now, otherwise the alternative will never have it. + if (formatters[0].get()) { + alternative->set_default_formatter(formatters[0]); + } + + // forward to the alternative + alternative->add_exp(s, parserInfo, f); + + // another add exp on the alternative must create another alternative + alternative->nextAddMustCreateAnAlternative = true; + } + } else { + alternative->add_alternative(s, parserInfo, f); + } +} + +void RegExpState::setHasMarkedAlternatives() { + if (alternative.get()) { + alternative->setHasMarkedAlternatives(); + } else { + hasMarkedAlternatives = true; + } +} + +void RegExpState::setAllAlternativesCanMatch() { + if (alternative.get()) { + alternative->setAllAlternativesCanMatch(); + } else { + allAlternativesCanMatch = true; + } +} + +void RegExpState::add_subexp_formatter(RegExpFormatterPtr f) { + if (alternative.get()) { + alternative->add_subexp_formatter(f); + return; + } + + formatters.push_back(f); + + // FIXME: insert a bogus subexpression + subExpressions.push_back(make_pair("", ParserInfo())); +} + +void RegExpState::freeze() throw(boost::bad_expression) { + const string &buffered = buffer.str(); + try { + reg_exp.assign(buffered); + + // call freeze also on alternative + if (alternative.get()) { + alternative->freeze(); + } + } catch (boost::bad_expression &e) { + printError("bad expression: " + buffered); + throw; + } +} + +void RegExpState::set_default_formatter(RegExpFormatterPtr f) { + formatters[0] = f; + + if (alternative.get()) + alternative->set_default_formatter(f); +} + +RegExpFormatterPtr RegExpState::getLastFormatter() const { + if (alternative.get()) + return alternative->getLastFormatter(); + + return formatters[formatters.size()-1]; +} + +bool RegExpState::has_alternative() const { + return (alternative.get()); +} diff --git a/src/lib/regexpstate.h b/src/lib/regexpstate.h new file mode 100644 index 0000000..00085f4 --- /dev/null +++ b/src/lib/regexpstate.h @@ -0,0 +1,191 @@ +// +// C++ Interface: RegExpState +// +// Description: as regular expression state: contains the regular expression to +// match and the formatters for each alternative. +// +// +// Author: Lorenzo Bettini, http://www.lorenzobettini.it, (C) 1999-2007 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#ifndef REGEXPSTATE_H +#define REGEXPSTATE_H + +#include <boost/regex.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/weak_ptr.hpp> +#include <deque> +#include <vector> +#include "my_sstream.h" +#include "parserinfo.h" + +struct RegExpState; + +typedef boost::shared_ptr<RegExpState> RegExpStatePtr; +typedef boost::weak_ptr<RegExpState> RegExpStatePtrW; + +struct RegExpFormatter +{ + const std::string elem; // the element represented + int exit_state_level; // how many states we must leave + bool exit_all; + + RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false); + + void setNextState(RegExpStatePtr r); + RegExpStatePtr getNextState() const; + + private: + RegExpStatePtrW next_state; + RegExpStatePtr next_state_strong; + /* + FIXME + the next_state is a weak pointer when there's a "nested" situation. + This allows to avoid cycles, that otherwise would prevent memory from + being correctly freed. + */ +}; + +typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr; +typedef std::deque<RegExpFormatterPtr> format_vector; +typedef std::pair<std::string, ParserInfo> SubExpressionInfo; +typedef std::vector<SubExpressionInfo> SubExpressions; + +/** +class representing a state for the regular expression engine + +@author Lorenzo Bettini + */ +struct RegExpState +{ + static int global_id; + const int id; // the identifier of the state + + /// the regular expression (with all the alternatives) for this state + boost::regex reg_exp; + + /// for each alternative keep the parser info + SubExpressions subExpressions; + + /// the formatters (one for each alternative) + format_vector formatters; + + /// where to buffer the expression strings (added with add_exp) + /// when freeze is called these will be used to create the reg_exp + std::ostringstream buffer; + + /// if the regular expression is not matched try with this alternative + /// state (this is used to split regular expressions where an alternative + /// has a back reference, since back references are limited to 9) + RegExpStatePtr alternative; + + /** + * Records that the fact that if an add_exp is invoked on this object, + * then an alternative must be created and the exp should be inserted there + */ + bool nextAddMustCreateAnAlternative; + + /** + * Means that this state has a regular expression made up of (possible) + * many marked subexpressions each of one is an alternative, e.g., + * (foo)|(#)|... + * + * This is crucial since, when formatting, we need to inspect each sub_match + * of match_result to find out which one matched (and so, which formatter to use). + */ + bool hasMarkedAlternatives; + + /** + * Means that this state has a regular expression made up of marked subexpressions + * where all of them can match, e.g., + * (class)([[:blank:]]*)([[:alnum:]]+) + * + * This is crucial since, when formatting, we need to inspect each sub_match of + * match_result to find out all those that matched. This is different from the + * case of hasMarkedAlternatives: in that case only one can match + */ + bool allAlternativesCanMatch; + + RegExpState() : + id(global_id++), formatters(1), alternative(RegExpStatePtr()), + nextAddMustCreateAnAlternative(false), + hasMarkedAlternatives(false), + allAlternativesCanMatch(false) {} + + const std::string &get_elem(int index = -1); + + /** + * Adds the formatter for the given regular expression (and the file info + * of the original language definition file). + * + * The expression is only buffered (i.e., the regular expression is not built + * untile freeze is called) + * + * @param s the regular expression string + * @param parserInfo + * @param f + */ + void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f); + + /** + * Adds the formatters for the given regular expression (and the file info + * of the original language definition file). Each formatter of the passed + * vector is related to the corresponding marked subexpression of the passed expression. + * + * The expression is only buffered (i.e., the regular expression is not built + * untile freeze is called) + * + * @param s the regular expression string + * @param parserInfo + * @param f + */ + void add_exp(const std::string &s, ParserInfo *parserInfo, const format_vector &f); + + /** + * Basically the same as add_exp, but + * 1. creates the alternative RegExpState + * 2. forwards all the operations to it from now on + */ + void add_alternative(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f); + + /** + * Sets that we added a marked subexpression + */ + void setHasMarkedAlternatives(); + + /** + * Sets that we added a subexpression where all alternatives can match + */ + void setAllAlternativesCanMatch(); + + /** + * Adds the formatter for a (marked) subexpression + * + * @param f + */ + void add_subexp_formatter(RegExpFormatterPtr f); + + /** + * Actually build the regular expression from the buffered strings + */ + void freeze() throw(boost::bad_expression); + + /** + * The default formatter in case the regular expression is not matched + */ + void set_default_formatter(RegExpFormatterPtr f); + + /** + * @return the last formatter + */ + RegExpFormatterPtr getLastFormatter() const; + + /** + * @return whether this state has an alternative + */ + bool has_alternative() const; +}; + +#endif diff --git a/src/lib/regexpstatebuilder.H b/src/lib/regexpstatebuilder.H index 5e3fcb1..c47ddc6 100644 --- a/src/lib/regexpstatebuilder.H +++ b/src/lib/regexpstatebuilder.H @@ -1,10 +1,10 @@ // -// C++ Interface: %{MODULE} +// C++ Interface: RegExpStateBuilder // -// Description: +// Description: Builds the RegExpStates starting from all the language elements. +// // -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007 // // Copyright: See COPYING file that comes with this distribution // @@ -34,6 +34,7 @@ build a RegExpState starting from language definitions class DelimitedLangElem; // file: delimitedlangelem.h class LangElem; // file: langelem.h class LangElems; // file: langelems.h +class NamedSubExpsLangElem; // file: namedsubexpslangelem.h class RegExpStatePointer; // file: regexpstatebuilder.h class StateLangElem; // file: statelangelem.h class StateStartLangElem; // file: statestartlangelem.h @@ -66,8 +67,10 @@ virtual void build(DelimitedLangElem * elem, RegExpStatePointer state); #line 50 "regexpstatebuilder.h" virtual void build(StateStartLangElem * elem, RegExpStatePointer state); #line 51 "regexpstatebuilder.h" -virtual void build(LangElem * elem, RegExpStatePointer state); +virtual void build(NamedSubExpsLangElem * elem, RegExpStatePointer state); #line 52 "regexpstatebuilder.h" +virtual void build(LangElem * elem, RegExpStatePointer state); +#line 53 "regexpstatebuilder.h" virtual void build(LangElems * elems, RegExpStatePointer state); public: void _forward_build(DelimitedLangElem * elem, RegExpStatePointer state) @@ -85,6 +88,11 @@ void _forward_build(LangElems * elems, RegExpStatePointer state) build(elems, state); }; +void _forward_build(NamedSubExpsLangElem * elem, RegExpStatePointer state) +{ + build(elem, state); +}; + void _forward_build(StateLangElem * elem, RegExpStatePointer state) { build(elem, state); @@ -103,7 +111,7 @@ void _forward_build(StringListLangElem * elem, RegExpStatePointer state) protected: virtual void build_DB(LangElem * elem, RegExpStatePointer state); virtual void build_DB(LangElems * elems, RegExpStatePointer state); -#line 52 "regexpstatebuilder.h" +#line 53 "regexpstatebuilder.h" // doublecpp: end, DO NOT MODIFY diff --git a/src/lib/regexpstatebuilder.cpp b/src/lib/regexpstatebuilder.cpp index f98e37f..aeac068 100644 --- a/src/lib/regexpstatebuilder.cpp +++ b/src/lib/regexpstatebuilder.cpp @@ -1,10 +1,10 @@ // -// C++ Implementation: %{MODULE} +// C++ Implementation: regexpstatebuilder.cpp // -// Description: +// Description: Builds the regexp automaton // // -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini, 2007, http://www.lorenzobettini.it // // Copyright: See COPYING file that comes with this distribution // @@ -17,6 +17,7 @@ #include "statelangelem.h" #include "stringlistlangelem.h" #include "delimitedlangelem.h" +#include "namedsubexpslangelem.h" #include "regexpstate.h" #include "stringdef.h" #include "tostringcollection.h" @@ -27,17 +28,22 @@ using namespace std; static const string buildex(const string &s); -static const string buildex_pre(const string &s); -void add_exp(RegExpStatePtr state, const string &orig, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f) +void add_exp(RegExpStatePtr state, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f) { - try { - state->add_exp(exp, parserInfo, f); - } catch (boost::bad_expression &e) { - exitError("wrong original expression: " + orig); - } -} + unsigned int numOfSubexpressions = RegexPreProcessor::num_of_subexpressions(exp); + if (numOfSubexpressions) { + // for marked subexpressions we must not use buildex, otherwise we might change + // the subexpressions indexes (e.g., for backreferences) + state->add_alternative(exp, parserInfo, f); + } else { + state->add_exp(buildex(exp), parserInfo, f); + + // record that we (manually) added an explicit marked subexpression + state->setHasMarkedAlternatives(); + } +} /** * Definitely associate the regular expression to this state @@ -64,6 +70,22 @@ RegExpStateBuilder::~RegExpStateBuilder() { } +void setFormatterExitLevel(StateStartLangElem *elem, RegExpFormatterPtr formatter) { + bool exit_all = elem->exitAll(); + bool exit = elem->doExit(); + + /* + only act on the exit state (if any exist statement is defined) + */ + if (exit_all) { + formatter->exit_state_level = 1; + formatter->exit_all = true; + } + + if (exit) + formatter->exit_state_level = 1; +} + RegExpStatePtr RegExpStateBuilder::build(LangElems *elems) { @@ -93,11 +115,11 @@ RegExpStateBuilder::build(LangElems *elems, RegExpStatePointer state) // try to find out where the problem is... RegExpStatePtr temp_state(new RegExpState()); for (LangElems::const_iterator it = elems->begin(); it != elems->end(); ++it) { - build(*it, temp_state); + build_DB(*it, temp_state); try { temp_state->freeze(); } catch (boost::bad_expression &e) { - exitError("problem in this expression: " + (*it)->toString()); + exitError("problem in this expression: " + (*it)->toStringOriginal(), *it); } } } else { @@ -111,23 +133,24 @@ RegExpStateBuilder::build(LangElem *elem, RegExpStatePointer state) } /** - * Build a subexpression starting from s + * Build a non-marking group (i.e., (? ... ) starting from s * @param s * @return */ -const string buildex(const string &s) +const string non_marking_group(const string &s) { - return "(" + s + ")"; + return "(?:" + s + ")"; } + /** - * Build a subexpression starting from s, after preprocessing s + * Build a subexpression starting from s * @param s * @return */ -const string buildex_pre(const string &s) +const string buildex(const string &s) { - return buildex(RegexPreProcessor::preprocess(s)); + return "(" + s + ")"; } /** @@ -185,17 +208,59 @@ RegExpStateBuilder::build(StringListLangElem *elem, RegExpStatePointer state) if (!elem->isCaseSensitive()) stringdef = RegexPreProcessor::make_nonsensitive(stringdef); - string exp_string = buildex_pre(stringdef); + string exp_string = non_marking_group(stringdef); if (isToIsolate) exp_string = buildex_isolated(exp_string); RegExpFormatterPtr formatter(new RegExpFormatter(name)); - add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter); + add_exp(state, exp_string, elem, formatter); build(static_cast<StateStartLangElem *>(elem), state); } /** + * Case of a list of language elements, each representing a + * marked subexpression + * @param elem + * @param state + */ +void +RegExpStateBuilder::build(NamedSubExpsLangElem *elem, RegExpStatePointer state) +{ + const ElementNames *elems = elem->getElementNames(); + const StringDef *regexp = elem->getRegexpDef(); + format_vector formatters; + const string ®exp_string = regexp->toString(); + + // first check that the number of marked subexpressions is the same of + // the specified element names + subexpressions_info sexps = RegexPreProcessor::num_of_marked_subexpressions(regexp_string); + + if (sexps.errors.size()) { + exitError(sexps.errors, elem); + } + + if (sexps.marked != elems->size()) { + exitError("number of marked subexpressions does not match number of elements", elem); + } + + // for each named group build a formatter, that corresponds to that element + for (ElementNames::const_iterator it = elems->begin(); it != elems->end(); ++it) { + RegExpFormatterPtr formatter = RegExpFormatterPtr(new RegExpFormatter(*it)); + // each formatter will share the same exit level, since it represents the + // same matched regexp + setFormatterExitLevel(elem, formatter); + formatters.push_back(formatter); + } + + // now add all the formatters for this element + state->add_exp(regexp_string, elem, formatters); + + // record that all the subexpressions can match + state->setAllAlternativesCanMatch(); +} + +/** * Case of a delimited element * @param elem * @param state @@ -242,10 +307,10 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state) "<(?:[^<>])*>" */ if (!escape) { - exp_string = start_string + "([^" + + exp_string = start_string + non_marking_group("[^" + start_string + (end_string != start_string ? end_string : "") + - "])*" + end_string; + "]") + "*" + end_string; } else { /* in case of a specified escape character it will use it for the @@ -255,12 +320,12 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state) <(?:[^\\<\\>]|\\.)*> */ - exp_string = start_string + "([^" + + exp_string = start_string + non_marking_group("[^" + escape_string + start_string + (end_string != start_string ? escape_string + end_string : "") + - "]|"+ escape_string + "." + - ")*" + end_string; + "]|"+ escape_string + ".") + + "*" + end_string; } } else { /* @@ -300,7 +365,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state) 1 + (elem->doExit() ? 1 : 0), elem->exitAll())); if (end) - add_exp(inner, end_string, buildex_pre(end_string), elem, exit); + add_exp(inner, end_string, elem, exit); else inner->add_exp(buildex("\\z"), elem, exit); @@ -313,7 +378,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state) (\*\])|(\\.) */ if (escape) { - add_exp(inner, escape_string, buildex_pre(escape_string + "."), + add_exp(inner, escape_string + ".", elem, RegExpFormatterPtr(new RegExpFormatter(name))); } @@ -339,16 +404,17 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state) if (elem->isNested()) { RegExpFormatterPtr nested(new RegExpFormatter(name, inner)); nested_formatters.push_back(nested); - add_exp(inner, start_string, buildex_pre(start_string), elem, nested); + add_exp(inner, start_string, elem, nested); } } if (inner) { - freeze_state(inner); + if (!freeze_state(inner)) + foundBug("bug in expression parsing", __FILE__, __LINE__);; } RegExpFormatterPtr formatter(new RegExpFormatter(name, inner)); - add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter); + add_exp(state, exp_string, elem, formatter); build(static_cast<StateStartLangElem *>(elem), state); } @@ -363,19 +429,7 @@ RegExpStateBuilder::build(StateStartLangElem *elem, RegExpStatePointer state) { RegExpFormatterPtr formatter = state->getLastFormatter(); - bool exit_all = elem->exitAll(); - bool exit = elem->doExit(); - - /* - only act on the exit state (if any exist statement is defined) - */ - if (exit_all) { - formatter->exit_state_level = 1; - formatter->exit_all = true; - } - - if (exit) - formatter->exit_state_level = 1; + setFormatterExitLevel(elem, formatter); } /** diff --git a/src/lib/regexpstatebuilder.h b/src/lib/regexpstatebuilder.h index 3fc05fb..f3ed3ff 100644 --- a/src/lib/regexpstatebuilder.h +++ b/src/lib/regexpstatebuilder.h @@ -1,10 +1,10 @@ // -// C++ Interface: %{MODULE} +// C++ Interface: RegExpStateBuilder // -// Description: +// Description: Builds the RegExpStates starting from all the language elements. +// // -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007 // // Copyright: See COPYING file that comes with this distribution // @@ -48,6 +48,7 @@ class RegExpStateBuilder void (StringListLangElem *elem, RegExpStatePointer state); void (DelimitedLangElem *elem, RegExpStatePointer state); void (StateStartLangElem *elem, RegExpStatePointer state); + void (NamedSubExpsLangElem *elem, RegExpStatePointer state); void (LangElem *elem, RegExpStatePointer state); void (LangElems *elems, RegExpStatePointer state); endbranches diff --git a/src/lib/regexpstatebuilder_dbtab.cc b/src/lib/regexpstatebuilder_dbtab.cc index 9dd404c..56b58c5 100644 --- a/src/lib/regexpstatebuilder_dbtab.cc +++ b/src/lib/regexpstatebuilder_dbtab.cc @@ -4,6 +4,8 @@ #include "statelangelem.h" +#include "namedsubexpslangelem.h" + #include "delimitedlangelem.h" #include "langelems.h" @@ -43,6 +45,12 @@ LangElems::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStateP } void +NamedSubExpsLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state) +{ + RegExpStateBuilder_o->_forward_build(this, state); +} + +void StateLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state) { RegExpStateBuilder_o->_forward_build(this, state); diff --git a/src/lib/regexpstateprinter.cpp b/src/lib/regexpstateprinter.cpp index 9a56b91..be5abb2 100644 --- a/src/lib/regexpstateprinter.cpp +++ b/src/lib/regexpstateprinter.cpp @@ -10,6 +10,7 @@ // // #include "regexpstateprinter.h" +#include "regexpreprocessor.h" #include <iostream> @@ -33,21 +34,54 @@ void RegExpStatePrinter::printRegExpState(RegExpStatePtr state) { do_indent; cout << " STATE " << state->id << endl; + do_indent; + cout << " regexp " << state->reg_exp << + (state->has_alternative() ? " (has alternatives)" : "") << endl; inc_indent; - int i = 0; - for (format_vector::const_iterator it = state->formatters.begin(); - it != state->formatters.end(); ++it) - { - do_indent; - cout << i << ": " << (*it)->elem << " " - << (i > 0 ? state->subExpressions[i-1].first : ""); - printRegExpFormatter(*it); - ++i; + unsigned int i = 0; + if (state->allAlternativesCanMatch) { + // print the default formatter + do_indent; + cout << i << ": " << state->formatters[0]->elem << " "; + printRegExpFormatter(state->formatters[0]); + + // we need to get all the subexpressions + const subexpressions_strings *split = RegexPreProcessor::split_marked_subexpressions(state->reg_exp.str()); + + i = 1; + for (subexpressions_strings::const_iterator it = split->begin(); it != split->end(); ++it) { + do_indent; + cout << i << ": " << state->formatters[i]->elem << " " + << *it ; + ++i; + if (i < state->formatters.size()) + cout << endl; + } + + // and print only the last state which has all the next state + // and exit level information + do_indent; + printRegExpFormatter(state->formatters[i-1]); + + delete split; + } else { + for (format_vector::const_iterator it = state->formatters.begin(); + it != state->formatters.end(); ++it) + { + do_indent; + cout << i << ": " << (*it)->elem << " " + << (i > 0 ? state->subExpressions[i-1].first : ""); + printRegExpFormatter(*it); + ++i; + } } dec_indent; + + if (state->alternative.get()) + printRegExpState(state->alternative); } void RegExpStatePrinter::printRegExpFormatter(RegExpFormatterPtr formatter) diff --git a/src/lib/statelangelem.cpp b/src/lib/statelangelem.cpp index 254b015..13d4c05 100644 --- a/src/lib/statelangelem.cpp +++ b/src/lib/statelangelem.cpp @@ -38,3 +38,11 @@ StateLangElem::toString() const return res; } +const std::string +StateLangElem::toStringOriginal() const +{ + string res = statestartlangelem->toString(); + if (langelems) + res += "\n" + langelems->toStringOriginal(); + return res; +} diff --git a/src/lib/statelangelem.h b/src/lib/statelangelem.h index 003b651..762d647 100644 --- a/src/lib/statelangelem.h +++ b/src/lib/statelangelem.h @@ -48,6 +48,8 @@ public: virtual const std::string toString() const; + virtual const std::string toStringOriginal() const; + StateStartLangElem *getStateStart() const { return statestartlangelem; } bool isState() const { return state; } LangElems *getElems() const { return langelems; } diff --git a/src/lib/stringdef.cpp b/src/lib/stringdef.cpp deleted file mode 100644 index e1ba02f..0000000 --- a/src/lib/stringdef.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// C++ Implementation: %{MODULE} -// -// Description: -// -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "stringdef.h" - -StringDef::StringDef(const char *s) : - stringdef(s) -{ -} - -StringDef::StringDef(const std::string &s) : - stringdef(s) -{ -} - -StringDef::~StringDef() -{ -} - - diff --git a/src/lib/stringdef.h b/src/lib/stringdef.h index 48ec110..483307e 100644 --- a/src/lib/stringdef.h +++ b/src/lib/stringdef.h @@ -1,10 +1,10 @@ // -// C++ Interface: %{MODULE} +// C++ Interface: StringDef // -// Description: +// Description: a string definition that is used by all the language elements. // // -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it> // // Copyright: See COPYING file that comes with this distribution // @@ -16,34 +16,55 @@ #include <list> /** -represent a string for a language element + represent a string for a language element -@author Lorenzo Bettini -*/ -class StringDef -{ - private: + @author Lorenzo Bettini + */ +class StringDef { +private: + /// the actual content std::string stringdef; - - public: - StringDef(const char *s); - StringDef(const std::string &s); - - ~StringDef(); - - const std::string toString() const { return stringdef; } - static StringDef *concat(const StringDef *s1, const StringDef *s2) - { return new StringDef(s1->stringdef + s2->stringdef); } + /// the original representation (without any preprocessing) + std::string orig; + +public: + /** + * constructs a StringDef and store also the original representation + * @param s the actual content + * @param o the original representation + */ + StringDef(const std::string &s, const std::string &o) : + stringdef(s), orig(o) { + } + + StringDef(const std::string &s) : + stringdef(s) { + } + + const std::string toString() const { + return stringdef; + } + + /** + * return the original representation (this is useful for printing errors) + * @return the original representation + */ + const std::string toStringOriginal() const { + return orig; + } + + static StringDef *concat(const StringDef *s1, const StringDef *s2) { + return new StringDef(s1->stringdef + s2->stringdef); + } }; typedef std::list<StringDef *> StringDefsBase; -class StringDefs : public StringDefsBase -{ - public: +class StringDefs : public StringDefsBase { +public: ~StringDefs() { - for (StringDefsBase::iterator it = begin(); it != end(); ++it) - delete *it; + for (StringDefsBase::iterator it = begin(); it != end(); ++it) + delete *it; } }; diff --git a/src/lib/stringlistlangelem.cpp b/src/lib/stringlistlangelem.cpp index 8f0e266..59f5be6 100644 --- a/src/lib/stringlistlangelem.cpp +++ b/src/lib/stringlistlangelem.cpp @@ -30,7 +30,13 @@ StringListLangElem::~StringListLangElem() const std::string StringListLangElem::toString() const { - string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives);; + string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives); return res; } +const std::string +StringListLangElem::toStringOriginal() const +{ + string res = StateStartLangElem::toString() + " " + toStringOriginalCollection<StringDefs>(alternatives); + return res; +} diff --git a/src/lib/stringlistlangelem.h b/src/lib/stringlistlangelem.h index 7b3fc8e..90a3baa 100644 --- a/src/lib/stringlistlangelem.h +++ b/src/lib/stringlistlangelem.h @@ -38,6 +38,8 @@ public: virtual ~StringListLangElem(); virtual const std::string toString() const; + + virtual const std::string toStringOriginal() const; StringDefs *getAlternatives() const { return alternatives; } bool isCaseSensitive() const { return !nonsensitive; } diff --git a/src/lib/stylecssparser.yy b/src/lib/stylecssparser.yy index 86b8659..8ba3697 100644 --- a/src/lib/stylecssparser.yy +++ b/src/lib/stylecssparser.yy @@ -23,8 +23,6 @@ #include <iostream> #include <string> -#include "my_sstream.h" - #include "generatorfactory.h" #include "colors.h" #include "keys.h" @@ -51,6 +49,7 @@ static string bodyBgColor; extern int stylecsssc_lex() ; extern FILE *stylecsssc_in ; +extern int stylecsssc_lex_destroy (void); /// the global pointer to style constant for a specific element static StyleConstantsPtr currentStyleConstants; @@ -79,11 +78,11 @@ static string currentBGColor; stylefile : { /* allow empty files */ } | statements ; - + statements : statements statement | statement ; - + statement : option ; @@ -109,11 +108,11 @@ option : keylist // check whether it's the body specification if (Utils::tolower(key) == "body") { updateBgColor(currentBGColor); - + // notice that for text style specification for the body, the background // is assumed for the entire document and not for the normal text // following the semantics of css - + // avoid adding an empty style definition for normal if (currentColor != "" || currentStyleConstants->size()) { if (!generatorFactory->createGenerator(NORMAL, currentColor, "", currentStyleConstants)) { @@ -205,22 +204,24 @@ void parseCssStyles(const string &path, const string &name, GeneratorFactory *ge printMessage_noln( "Parsing ", cerr ) ; printMessage_noln (current_file, cerr); printMessage( " file ...", cerr ) ; - + bodyBgColor = ""; - + yyparse() ; - + bodyBgColor_ = bodyBgColor; - + printMessage( "Parsing done!", cerr ) ; fclose(stylecsssc_in); + + // release scanner memory + stylecsssc_lex_destroy(); } void yyerror( char *s ) { parseStyleError(s); - exit(EXIT_FAILURE); } void updateBgColor(const std::string &c) diff --git a/src/lib/stylecssscanner.ll b/src/lib/stylecssscanner.ll index 1806907..b1a6ae3 100644 --- a/src/lib/stylecssscanner.ll +++ b/src/lib/stylecssscanner.ll @@ -43,7 +43,6 @@ extern int line ; #define DEB2(s,s2) #endif - %} %option prefix="stylecsssc_" @@ -207,16 +206,6 @@ STRING \"[^\"\n]*\" DEB2("CSS PROPERTIES discarding", yytext); } -<<EOF>> { - DEB("reached EOF of the style file"); - - DEB("freeing scanner memory"); - /* For non-reentrant C scanner only. */ - yy_delete_buffer(YY_CURRENT_BUFFER); - - yyterminate(); -} - <INITIAL>. { return yytext[0] ; } %% diff --git a/src/lib/styleparser.yy b/src/lib/styleparser.yy index a203137..7f000b6 100644 --- a/src/lib/styleparser.yy +++ b/src/lib/styleparser.yy @@ -23,8 +23,6 @@ #include <iostream> #include <string> -#include "my_sstream.h" - #include "generatorfactory.h" #include "colors.h" #include "keys.h" @@ -42,6 +40,7 @@ int line = 1 ; extern int stylesc_lex() ; extern FILE *stylesc_in ; +extern int stylesc_lex_destroy (void); static string bodyBgColor; @@ -74,15 +73,15 @@ static GeneratorFactory *generatorFactory; stylefile : { /* allow empty files */ } | statements ; - + statements : statements statement | statement ; - + statement : option | bodybgcolor ; - + option : keylist color bgcolor { printSequence( 1ドル ) ; @@ -164,7 +163,7 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory string &bodyBgColor_) { generatorFactory = genFactory; - + // opens the file for yylex stylesc_in = open_data_file_stream(path, name); @@ -173,29 +172,33 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory printMessage_noln( "Parsing ", cerr ) ; printMessage_noln (current_file, cerr); printMessage( " file ...", cerr ) ; - + bodyBgColor = ""; - + yyparse() ; - + bodyBgColor_ = bodyBgColor; - + printMessage( "Parsing done!", cerr ) ; fclose(stylesc_in); + + // free scanner memory + stylesc_lex_destroy(); } void yyerror( char *s ) { parseStyleError(s); - exit(EXIT_FAILURE); } -void parseStyleError(const std::string &error) +void parseStyleError(const std::string &error, bool exit) { - ostringstream str ; - str << current_file << ":" << line << ": " << error; - printError( str.str(), cerr ) ; + if (exit) + exitError(current_file, line, error); + else { + printError(current_file, line, error); + } } void updateBgColor(const std::string *c) @@ -204,7 +207,7 @@ void updateBgColor(const std::string *c) yyerror("bgcolor already defined"); else bodyBgColor = *c; - + // we don't need it anymore delete c; } diff --git a/src/lib/stylescanner.ll b/src/lib/stylescanner.ll index c5b5c19..0baf33f 100644 --- a/src/lib/stylescanner.ll +++ b/src/lib/stylescanner.ll @@ -60,7 +60,7 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])* STRING \"[^\"\n]*\" -%s COMMENT_STATE STRING_STATE +%s COMMENT_STATE STRING_STATE %% @@ -76,7 +76,7 @@ STRING \"[^\"\n]*\" <INITIAL>\#[a-fA-F0-9]{6} { const std::string wrong = yytext ; - parseStyleError("use of direct colors has changed"); + parseStyleError("use of direct colors has changed", false); parseStyleError("use double quoted syntax: \"" + wrong + "\" instead of " + wrong); exit(EXIT_FAILURE); return COLOR ; @@ -108,16 +108,6 @@ STRING \"[^\"\n]*\" \n { ++line ; } -<<EOF>> { - DEB("reached EOF of the style file"); - - DEB("freeing scanner memory"); - /* For non-reentrant C scanner only. */ - yy_delete_buffer(YY_CURRENT_BUFFER); - - yyterminate(); -} - <INITIAL>. { /* anything else will generate a parsing error */ return yytext[0] ; } %% diff --git a/src/lib/test_langinfer.cpp b/src/lib/test_langinfer.cpp index b683cf3..a4c8994 100644 --- a/src/lib/test_langinfer.cpp +++ b/src/lib/test_langinfer.cpp @@ -43,6 +43,10 @@ main() testInfer("#!/bin/sh\n# -*- tcl, as specified in Emacs -*-", "tcl"); + // try with the env specification + testInfer("#! /usr/bin/env python", "python"); + testInfer("#! /bin/env perl", "perl"); + // just a small check for tolower string s = "Lisp"; Utils::toLower(s); diff --git a/src/lib/test_regexpreprocessor_main.cpp b/src/lib/test_regexpreprocessor_main.cpp new file mode 100644 index 0000000..daa5a9d --- /dev/null +++ b/src/lib/test_regexpreprocessor_main.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2007 Lorenzo Bettini <http://www.lorenzobettini.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * This program is part of GNU source-highlight + * + * This tests regex preprocessing + */ + +#include <iostream> +#include <boost/regex.hpp> +#include <algorithm> + +#include "asserttest.h" +#include "regexpreprocessor.h" + +using namespace std; + +// variable to test results for all the tests +int result = 0; + +void testPreprocess(const string &original, const string &expected) { + cout << "original : " << original << endl; + + const string &preprocessed = RegexPreProcessor::preprocess(original); + + cout << "preprocessed : " << preprocessed << endl; + result += assertEquals(expected, preprocessed); +} + +void testMakeNonSensitive(const string &original, const string &expected) { + cout << "original : " << original << endl; + + const string &preprocessed = RegexPreProcessor::make_nonsensitive(original); + + cout << "preprocessed : " << preprocessed << endl; + result += assertEquals(expected, preprocessed); +} + +void testOnlyNumOfMarkedSubexpressions(const string &original, + unsigned int expected) { + cout << "original : " << original << endl; + + unsigned int found = RegexPreProcessor::num_of_subexpressions(original); + + cout << "found : " << found << endl; + result += assertEquals(expected, found); +} + +void testNumOfMarkedSubexpressions(const string &original, + unsigned int expected, const string &error = "") { + cout << "original : " << original << endl; + + subexpressions_info + sexp = RegexPreProcessor::num_of_marked_subexpressions(original); + unsigned int found = sexp.marked; + + cout << "found : " << found << endl; + if (sexp.errors.size()) + cout << "error : " << sexp.errors << endl; + result += assertEquals(expected, found); + result += assertEquals(error, sexp.errors); +} + +void testBackReference(const string &original, bool expected) { + cout << "original : " << original << endl; + + bool found = RegexPreProcessor::contains_backreferences(original); + + cout << "found : " << found << endl; + result += assertEquals(expected, found); +} + +void testSplit(const string &original, const subexpressions_strings &expected) { + const subexpressions_strings *split; + split = RegexPreProcessor::split_marked_subexpressions(original); + cout << "split : "; + std::copy(split->begin(), split->end(), std::ostream_iterator<string>(cout)); + cout << endl; + if (!std::equal(split->begin(), split->end(), expected.begin())) { + ++result; + cout << "are not equal!" << endl; + cout << "expected : "; + std::copy(expected.begin(), expected.end(), + std::ostream_iterator<string>(cout)); + } +} + +int main() { + cout << boolalpha; + + testPreprocess("simple", "simple"); + testPreprocess("(inside)", "(?:inside)"); + testPreprocess("(dou(b)le)", "(?:dou(?:b)le)"); + + testMakeNonSensitive("foo", "[Ff][Oo][Oo]"); + + testOnlyNumOfMarkedSubexpressions("none", 0); + testOnlyNumOfMarkedSubexpressions("just (one)", 1); + testOnlyNumOfMarkedSubexpressions("(3 of (them)) just (one)", 3); + + testOnlyNumOfMarkedSubexpressions("none \\(", 0); + testOnlyNumOfMarkedSubexpressions("(?: again) none \\(", 0); + + testNumOfMarkedSubexpressions("none", 0, + subexpressions_info::ERR_OUTSIDE_SUBEXP); + testNumOfMarkedSubexpressions("just (one)", 0, + subexpressions_info::ERR_OUTSIDE_SUBEXP); + testNumOfMarkedSubexpressions("(3 of (them)) just (one)", 1, + subexpressions_info::ERR_NESTED_SUBEXP); + + testNumOfMarkedSubexpressions("none \\(", 0, + subexpressions_info::ERR_OUTSIDE_SUBEXP); + testNumOfMarkedSubexpressions("(?: again) none \\(", 0, + subexpressions_info::ERR_OUTER_UNMARKED); + + testNumOfMarkedSubexpressions("(just one)", 1); + testNumOfMarkedSubexpressions("(just one (?:some) and unmarked)", 1); + testNumOfMarkedSubexpressions("(just one \\( and escapes)", 1); + testNumOfMarkedSubexpressions("(just one \\( and \\) escapes)", 1); + testNumOfMarkedSubexpressions("(one) ", 1, + subexpressions_info::ERR_OUTSIDE_SUBEXP); + + testNumOfMarkedSubexpressions("(one", 1, + subexpressions_info::ERR_UNBALANCED_PAREN); + testNumOfMarkedSubexpressions("(one))", 1, + subexpressions_info::ERR_UNBALANCED_PAREN); + + testNumOfMarkedSubexpressions("(one)(two)((?:three)*)", 3); + testNumOfMarkedSubexpressions("(one) (two)", 1, + subexpressions_info::ERR_OUTSIDE_SUBEXP); + + subexpressions_strings expected; + expected.push_back("(this)"); + expected.push_back("(is)"); + expected.push_back("(one)"); + testSplit("(this)(is)(one)", expected); + + expected.clear(); + expected.push_back("(this)"); + expected.push_back("(contains \\( some \\) other parenthesis)"); + expected.push_back("(and (?:non marked) ones)"); + testSplit("(this)(contains \\( some \\) other parenthesis)(and (?:non marked) ones)", expected); + + testBackReference("this does not contain any", false); + testBackReference("this does contain \1円 one", true); + testBackReference("and also this one (?(2)...) does", true); + testBackReference("while this one (?(foo)...) does NOT does", false); + + return result; +} diff --git a/src/lib/textformatter.cpp b/src/lib/textformatter.cpp index bf681fb..cf87460 100644 --- a/src/lib/textformatter.cpp +++ b/src/lib/textformatter.cpp @@ -4,42 +4,334 @@ // Description: // // -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 +// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007 // // Copyright: See COPYING file that comes with this distribution // // + #include "textformatter.h" +#include "textgenerator.h" +#include "preformatter.h" +#include "mainoutputbuffer.h" +#include "messages.h" +#include "fileutil.h" +#include "fileinfo.h" -#include "maingeneratormap.h" +#include <boost/regex.hpp> +#include <list> using namespace std; -TextFormatter::TextFormatter() -{ +bool isTaggable(const string &elem) { + return elem.find_first_of(' ')== string::npos; +} + +static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)"); +static SubstitutionMapping ref_substitutionmapping; + +#define SPACE 2 +#define NOT_SPACE 1 + +TextFormatter::TextFormatter(PreFormatter *pf) : + default_generator(0), preformatter(pf), noOptimizations(false), + generateReferences(false), ctags_file(0) { +} + +TextFormatter::TextFormatter(PreFormatter *pf, const string &_ctags_file_name, + const TextStyles::RefTextStyle &r, RefPosition pos) : + default_generator(0), preformatter(pf), noOptimizations(false), + generateReferences(true), ctags_file_name(_ctags_file_name), + refstyle(r), refposition(pos) { + ctags_file = tagsOpen(ctags_file_name.c_str(), &info); + if (ctags_file == 0) { + exitError("cannot open tag file: " + ctags_file_name); + } +} + +TextFormatter::~TextFormatter() { + for (MapType::const_iterator it = textformatter.begin(); it != textformatter.end(); ++it) + delete it->second; + + if (ctags_file) + tagsClose(ctags_file); +} + +void TextFormatter::setDefaultGenerator(TextGenerator *gen) { + default_generator = gen; } -TextFormatter::~TextFormatter() -{ +void TextFormatter::addGenerator(const std::string &elem, TextGenerator *gen) { + textformatter[elem] = gen; } -void -TextFormatter::format(const string &elem, const string &text, const FileInfo *p) -{ - if (! text.size()) - return; +TextGenerator *TextFormatter::hasGenerator(const string &elem) { + MapType::const_iterator it = textformatter.find(elem); + if (it == textformatter.end()) + return 0; + + return it->second; +} + +TextGenerator *TextFormatter::getGenerator(const string &elem) { + MapType::const_iterator it = textformatter.find(elem); + if (it == textformatter.end()) { + // create a copy of the prototype and substitute the style. + TextGenerator *missing = new TextGenerator(*default_generator); + missing->subst_style(elem); + + textformatter[elem] = missing; + return missing; + } - generatormap->generateEntire(elem, text, p); + return it->second; } -void -TextFormatter::format_nl(const string &text) -{ - generatormap->generateNL(text); +void TextFormatter::addNoReference(const std::string &elem) { + noreferences.insert(elem); +} + +bool TextFormatter::isNoReference(const std::string &elem) const { + return (noreferences.find(elem) != noreferences.end()); +} + +const string TextFormatter::generateString(const std::string &elem, + const std::string &s, const FileInfo *p) { + if (!generateReferences || isNoReference(elem)) { + return generateStringNoRef(elem, s); + } else { + return generateStringAndRef(elem, s, p); + } +} + +const string TextFormatter::generateStringNoRef(const std::string &elem, + const std::string &s) { + return getGenerator(elem)->generateEntire(preformatter->preformat(s)); +} + +void TextFormatter::generateEntire(const std::string &elem, + const std::string &s, const FileInfo *p) { + if (noOptimizations) { + // we generate the element right now, since during debugging + // we want to be very responsive + if (s.size()) + output(generateString(elem, s, p)); + + return; + } + + // otherwise we optmize output generation: delay formatting a specific + // element until we deal with another element; this way strings that belong + // to the same element are formatted using only one tag: e.g., + // <comment>/* mycomment */</comment> + // instead of + // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment> + if (elem == current_elem) { + elem_buffer << s; + } else { + // first format the buffered string + const string toformat = elem_buffer.str(); + if (toformat.size()) + output(generateString(current_elem, toformat, p)); + + // then start a new buffer + elem_buffer.str(""); + elem_buffer << s; + current_elem = elem; + current_file_info = p; + } +} + +void TextFormatter::generateNL(const std::string &text) { + // first format the buffered string + flush(); + + string preformat_text = preformatter->preformat(text); + + if (preformat_text == text) + preformat_text = "\n"; + + outputbuffer->output_ln(preformat_text); +} + +void TextFormatter::flush() { + const string &remainder = elem_buffer.str(); + if (remainder.size()) { + output(generateString(current_elem, remainder, current_file_info)); + elem_buffer.str(""); + current_elem = ""; + // each line is handled separately + } +} + +void TextFormatter::output(const string &s) { + outputbuffer->output(s); +} + +//#define DEBUGREF +#ifdef DEBUGREF +#include <iostream> +#define DEB(x) cerr << x << endl; +#define DEB2(x) cerr << x ; +#else +#define DEB(x) ; +#define DEB2(x) ; +#endif + +/* + * separates a line in block of spaces and block of non spaces. + * the stringbuffer tokens stores the pieces seen so far for which no + * entry in the tag file was found. + * + * for each block of non spaces tries to look for an entry in the tag. + * if it finds it flushes the stringbuffer tokens (by passing its contents + * to the parent class implementation of generateString). + * + * For instance (notice the spaces among the +) + * "myfield + myfield2 + myfield3 + * if only an entry for myfield2 is found, the we will generate + * 3 blocks: + * "myfield + " + * "myfield2" + * " + myfield3" + */ +const std::string TextFormatter::generateStringAndRef(const std::string& elem, + const std::string& s, const FileInfo* fileinfo) { + buffer.str(""); + ostringstream tokens; + + boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex); + boost::sregex_iterator j; + while (i != j) { + if ((*i)[SPACE].matched) { + tokens << string((*i)[SPACE].first, (*i)[SPACE].second); + } else { + string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second); + string found_refs = generateRefInfo(elem, not_spaces, fileinfo); + if (found_refs.size()) { + const string &previous = tokens.str(); + if (previous.size()) { + buffer << generateStringNoRef(elem, previous); + tokens.str(""); + } + buffer << found_refs; + } else { + tokens << not_spaces; + } + } + + ++i; + } + + const string &remainder = tokens.str(); + if (remainder.size()) { + buffer << generateStringNoRef(elem, remainder); + } + + return buffer.str(); +} + +struct RefEntry { + string filename; + unsigned long linenumber; +}; + +const string TextFormatter::generateRefInfo(const std::string& elem, + const std::string& s, const FileInfo* fileinfo) { + tagEntry entry; + bool found = false; // whether we found a tag + bool found_anchor = false; // whether we found an anchor + string output; + typedef list<RefEntry> FoundRefList; + FoundRefList foundreflist; + + DEB("inspecting " + s) + + if (tagsFind(ctags_file, &entry, s.c_str(), TAG_FULLMATCH)== TagSuccess) { + found = true; + do { + RefEntry refentry; + refentry.filename = entry.file; + if ((refentry.filename == fileinfo->filename || + refentry.filename == fileinfo->input_file_name) &&entry.address.lineNumber == fileinfo->line) { + ostringstream gen_info; + // we just found the reference to this very element + // so we must generate an anchor + gen_info << entry.address.lineNumber;DEB(" found anchor " + gen_info.str()); + ref_substitutionmapping["$text"] = preformatter->preformat(s); + ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename); + ref_substitutionmapping["$infile"] = refentry.filename; + ref_substitutionmapping["$linenum"] = gen_info.str(); + output = refstyle.anchor.output(ref_substitutionmapping); + found_anchor = true; + break; + } + + DEB2(" found " + string(entry.name) + " : ");DEB(entry.address.lineNumber); + + refentry.linenumber = entry.address.lineNumber; + foundreflist.push_back(refentry); + } while (tagsFindNext(ctags_file, &entry)== TagSuccess); + } + + if (found) { + if (! found_anchor) { + ref_substitutionmapping["$text"] = preformatter->preformat(s); + TextStyle *referencestyle = 0; + if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE) + referencestyle = &(refstyle.postline_reference); + else if (refposition == POSTDOC) + referencestyle = &(refstyle.postdoc_reference); + else + referencestyle = &(refstyle.inline_reference); + + for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) { + ostringstream gen_info; + // we found where this element appears so we generate a reference + // if it's a link in the same file, we use the output_file_name... + if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name) + gen_info << fileinfo->output_file_name; + else + gen_info << it->filename << fileinfo->output_file_extension; + // ...otherwise we build the referenced file by using the output_file_extension + // in fact, in this case, it probably means that multiple input files have been specified + + ref_substitutionmapping["$outfile"] = gen_info.str(); + ref_substitutionmapping["$infilename"] = strip_file_path(it->filename); + ref_substitutionmapping["$infile"] = it->filename; + + gen_info.str(""); + gen_info << it->linenumber; + ref_substitutionmapping["$linenum"] = gen_info.str(); + output += referencestyle->output(ref_substitutionmapping); + + // if the following is true, it means that there more than one reference + if (foundreflist.size()> 1 || refposition != INLINE) { + output += preformatter->preformat("\n"); + + if (refposition == POSTLINE || refposition == INLINE) { + outputbuffer->output_postline(output); + } else { // (refposition == POSTDOC) + outputbuffer->output_post(output); + } + + output = ""; // no need to modify the current element + } + } + } + } + + return output; +} + +void TextFormatter::format(const string &elem, const string &text, + const FileInfo *p) { + if (! text.size()) + return; + + generateEntire(elem, text, p); } -void -TextFormatter::flush() -{ - generatormap->flush(); +void TextFormatter::format_nl(const string &text) { + generateNL(text); } diff --git a/src/lib/textformatter.h b/src/lib/textformatter.h index 3d6230c..ec0bf6e 100644 --- a/src/lib/textformatter.h +++ b/src/lib/textformatter.h @@ -9,33 +9,119 @@ // Copyright: See COPYING file that comes with this distribution // // -#ifndef TEXTFORMATTER_H -#define TEXTFORMATTER_H +#ifndef GENERATORMAP_H +#define GENERATORMAP_H +#include <map> #include <string> -#include <boost/shared_ptr.hpp> +#include <set> +#include "my_sstream.h" +#include "readtags.h" +#include "textstyles.h" +#include "refposition.h" + +class TextGenerator; +class PreFormatter; +class FileInfo; /** -format text + * Formats the elements of a source file, using a map of generators; + * associate a generator for each program element, e.g., keyword, string, etc. + * + * @author Lorenzo Bettini + */ +class TextFormatter { +protected: + typedef std::map<std::string, TextGenerator *> MapType; + typedef std::set<std::string> NoRefType; + MapType textformatter; + /// those elements for which no reference info is generated + NoRefType noreferences; + TextGenerator *default_generator; + PreFormatter *preformatter; + /// where we buffer strings for the current elem + std::ostringstream elem_buffer; + /// the element that is currently buffered + std::string current_elem; + /// concerns the element currently buffered + const FileInfo *current_file_info; + /// whether to turn off optimizazionts (such as buffering), default: false + bool noOptimizations; -@author Lorenzo Bettini -*/ + bool generateReferences; -class FileInfo; + // for references + + const std::string ctags_file_name; + TextStyles::RefTextStyle refstyle; + RefPosition refposition; + tagFile *ctags_file; + tagFileInfo info; + std::ostringstream buffer; + + const std::string generateString(const std::string &elem, + const std::string &s, const FileInfo *); + + const std::string generateStringAndRef(const std::string &elem, + const std::string &s, const FileInfo *); + + const std::string generateStringNoRef(const std::string &elem, + const std::string &s); + + /** + * Generates the reference information + */ + const std::string generateRefInfo(const std::string& elem, + const std::string& s, const FileInfo* arg1); + + /** + * Actually performs the output through an OutputBuffer + */ + void output(const std::string &s); + + void generateEntire(const std::string &elem, const std::string &s, + const FileInfo *); + void generateNL(const std::string &s); -class TextFormatter -{ public: - TextFormatter(); + TextFormatter(PreFormatter *); - ~TextFormatter(); + TextFormatter(PreFormatter *pf, const std::string &_ctags_file_name, + const TextStyles::RefTextStyle &r, RefPosition pos); - void format(const std::string &elem, const std::string &text, - const FileInfo *); - void format_nl(const std::string &text = "\n"); - void flush(); -}; + ~TextFormatter(); + + /** + * Returns the generator for the specific element name or null if + * there's no generator for the element + * @param elem + * @return + */ + TextGenerator *hasGenerator(const std::string &elem); + + /** + * Retrieves the generator for a specific element; if it doesn't find it, + * it creates a generator for that element, using the default generator + * (i.e., the one for "normal" element) + * @param elem + * @return + */ + TextGenerator *getGenerator(const std::string &elem); + void addGenerator(const std::string &elem, TextGenerator *gen); + void addNoReference(const std::string &elem); + bool isNoReference(const std::string &elem) const; -typedef boost::shared_ptr<TextFormatter> TextFormatterPtr; + void setDefaultGenerator(TextGenerator *g); + + void flush(); + + void setNoOptimizations(bool n) { + noOptimizations = n; + } + + void format(const std::string &elem, const std::string &text, + const FileInfo *); + void format_nl(const std::string &text = "\n"); +}; #endif diff --git a/src/lib/tostringcollection.h b/src/lib/tostringcollection.h index 99b753d..a4b20f6 100644 --- a/src/lib/tostringcollection.h +++ b/src/lib/tostringcollection.h @@ -1,10 +1,8 @@ // -// C++ Interface: %{MODULE} +// Description: given a collection generates a string representation // -// Description: // -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it> // // Copyright: See COPYING file that comes with this distribution // @@ -16,6 +14,13 @@ #include <string> #include "my_sstream.h" +/** + * Converts a collection of objects with method toString into a string, + * using the passed separator to separate the elements. + * + * @param collection + * @param sep + */ template <class T> const std::string toStringCollection(const T *collection, char sep = ' ') { @@ -32,4 +37,51 @@ const std::string toStringCollection(const T *collection, char sep = ' ') return buf.str(); } +/** + * Converts a collection of objects with method toStringOriginal into a string, + * using the passed separator to separate the elements. + * + * @param collection + * @param sep + */ +template <class T> +const std::string toStringOriginalCollection(const T *collection, char sep = ' ') +{ + std::ostringstream buf; + + for (typename T::const_iterator it = collection->begin(); + it != collection->end(); ) + { + buf << (*it)->toStringOriginal(); + if (++it != collection->end()) + buf << sep; + } + + return buf.str(); +} + +/** + * Converts a collection of objects into a string, + * using the passed separator to separate the elements. + * + * @param collection + * @param sep + */ +template <class T> +const std::string collectionToString(const T *collection, char sep = ' ') +{ + std::ostringstream buf; + + for (typename T::const_iterator it = collection->begin(); + it != collection->end(); ) + { + buf << (*it); + if (++it != collection->end()) + buf << sep; + } + + return buf.str(); +} + + #endif // TOSTRINGCOLLECTION_H diff --git a/src/log.lang b/src/log.lang index af1611e..6a7b946 100644 --- a/src/log.lang +++ b/src/log.lang @@ -39,9 +39,7 @@ ip = $ip string = "root","failure" -state normal = '(port|pid)[[:blank:]]' begin - port = '[[:digit:]]+' exit -end +(normal,port) = `((?:port|pid)[[:blank:]])([[:digit:]]+)` state normal start '[[:blank:]](?=(IN|OUT)=)' begin state normal = '(IN|OUT|PROTO)=(?=[^[:blank:]]+)' begin diff --git a/src/logtalk.lang b/src/logtalk.lang index d9b5405..b2c03ff 100644 --- a/src/logtalk.lang +++ b/src/logtalk.lang @@ -55,14 +55,14 @@ variable = '\<[A-Z_][A-Za-z0-9_]*' cbracket = "{|}" -preproc = '^[[:blank:]]*:-[[:blank:]](ca(lls|tegory) | p(ublic|r(ot(ocol|ected)|ivate)) | e(ncoding|xports) | in(fo|itialization) | alias | d(ynamic|iscontiguous) | meta_predicate | m(etapredicate|od(e|ule)|ultifile) | o(bject|p) | use(s|_module))(?=\()', - '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol) | dynamic)\.' +preproc = '^[[:blank:]]*:-[[:blank:]](ca(lls|tegory)|p(ublic|r(ot(ocol|ected)|ivate))|e(ncoding|xports)|in(fo|itialization)|alias|d(ynamic|iscontiguous)|meta_predicate|m(etapredicate|od(e|ule)|ultifile)|o(bject|p)|use(s|_module))(?=\()', + '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol)|dynamic)\.' preproc = '\<(extends|i(nstantiates|mp(lements|orts))|specializes)(?=\()' normal = '\<[a-z][A-Za-z0-9_]*' -number = '0\'[A-Za-z0-9] | 0b[0-1]+ | 0o[0-7]+ | 0x[0-9a-fA-F]+ | [0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?' +number = '0\'[A-Za-z0-9]|0b[0-1]+|0o[0-7]+|0x[0-9a-fA-F]+|[0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?' symbol = "::", "^^", ">>", "<<", "/\\", "\\/", "\\", diff --git a/src/outlang.map b/src/outlang.map index b8907e5..9a0ced6 100644 --- a/src/outlang.map +++ b/src/outlang.map @@ -16,4 +16,5 @@ latexcolor = latexcolor.outlang latexcolor-doc = latexcolordoc.outlang texinfo = texinfo.outlang javadoc = javadoc.outlang -docbook = docbook.outlang
\ No newline at end of file +docbook = docbook.outlang +docbook-doc = docbookdoc.outlang
\ No newline at end of file diff --git a/src/perl.lang b/src/perl.lang index 8c3672b..00d1011 100644 --- a/src/perl.lang +++ b/src/perl.lang @@ -1,19 +1,117 @@ preproc = "import" +# these might be unreadable but I don't know how else to do that... +regexp = 's\{(\\\}|[^}])*\}\{(\\\}|[^}])*\}[ixsmogce]*' +regexp = 's\((\\\)|[^)])*\)\((\\\)|[^)])*\)[ixsmogce]*' +regexp = 's\[(\\\]|[^\]])*\]\[(\\\]|[^\]])*\][ixsmogce]*' +regexp = 's<.*><.*>[ixsmogce]*' + +# the last (lookahead) expression is used to deal with # used +# as a delimiter. +# otherwise, with a line such as +# s#foo\###; # my comment +# the # of the comment would be match as the closing delimiter +regexp = `s([^[:alnum:][:blank:]]).*1円.*1円[ixsmogce]*(?=[[:blank:]]*(\)|;))` + +# this is to deal with cases where the delimiters for the first and the +# second part are not the same (and spaces are allowed between the first +# closing and the second opening) +regexp = `s([^[:alnum:][:blank:]]).*1円[[:blank:]]*([^[:alnum:][:blank:]]).*2円[ixsmogce]*(?=[[:blank:]]*(\)|;))` + include "script_comment.lang" include "number.lang" +# this won't work if # has something (non blank) before +vardef comment_in_exp = '[[:blank:]]+#.*' + +vardef var_in_exp = '\$([[:word:]]+|\{[[:word:]]+\})' + +# this is to highlight correctly regular expressions +# (and don't mix them with { } code blocks +environment keyword = '(m|qr)(?=\{)' begin + environment regexp = '\{' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\\{|\\\}' + regexp = "}" exitall + end +end + +# repeat for other non alpha numerical chars +environment keyword = '(m|qr)(?=#)' begin + environment regexp = '#' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\#' + regexp = "#" exitall + end +end + +environment keyword = '(m|qr)(?=\|)' begin + environment regexp = '\|' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\\|' + regexp = "\|" exitall + end +end + +environment keyword = '(m|qr)(?=@)' begin + environment regexp = '@' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\@' + regexp = "@" exitall + end +end + +environment keyword = '(m|qr)(?=<)' begin + environment regexp = '<' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\<|\\>' + regexp = ">" exitall + end +end + +environment keyword = '(m|qr)(?=\[)' begin + environment regexp = '\[' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\]' + regexp = "]" exitall + end +end + +environment keyword = '(m|qr)(?=\\)' begin + environment regexp = '\\' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\\\' + regexp = "\\" exitall + end +end + +environment keyword = '(m|qr)(?=/)' begin + environment regexp = '/' begin + comment = $comment_in_exp + variable = $var_in_exp + regexp = '\\/' + regexp = "/" exitall + end +end + string delim "\"" "\"" escape "\\" string delim "'" "'" escape "\\" string delim "<" ">" string = '[[:word:]]*/[^\n]*/[[:word:]]*' -keyword = "chomp|chop|chr|crypt|hex|index|lc|lcfirst|length|oct|ord|pack|q|qq|reverse|rindex|sprintf|substr|tr|uc|ucfirst|m|s|g|qw|abs|atan2|cos|exp|hex|int|log|oct|rand|sin|sqrt|srand|my|local|our|delete|each|exists|keys|values|pack|read|syscall|sysread|syswrite|unpack|vec|undef|unless|return|length|grep|sort|caller|continue|dump|eval|exit|goto|last|next|redo|sub|wantarray|pop|push|shift|splice|unshift|split|switch|join|defined|foreach|last|chop|chomp|bless|dbmclose|dbmopen|ref|tie|tied|untie|while|next|map|eq|die|cmp|lc|uc|and|do|if|else|elsif|for|use|require|package|import|chdir|chmod|chown|chroot|fcntl|glob|ioctl|link|lstat|mkdir|open|opendir|readlink|rename|rmdir|stat|symlink|umask|unlink|utime|binmode|close|closedir|dbmclose|dbmopen|die|eof|fileno|flock|format|getc|print|printf|read|readdir|rewinddir|seek|seekdir|select|syscall|sysread|sysseek|syswrite|tell|telldir|truncate|warn|write|alarm|exec|fork|getpgrp|getppid|getュpriority|kill|pipe|qx|setpgrp|setpriority|sleep|system|times|wait|waitpid" +keyword = "chomp|chop|chr|crypt|hex|i|index|lc|lcfirst|length|oct|ord|pack|q|qq|reverse|rindex|sprintf|substr|tr|uc|ucfirst|m|s|g|qw|abs|atan2|cos|exp|hex|int|log|oct|rand|sin|sqrt|srand|my|local|our|delete|each|exists|keys|values|pack|read|syscall|sysread|syswrite|unpack|vec|undef|unless|return|length|grep|sort|caller|continue|dump|eval|exit|goto|last|next|redo|sub|wantarray|pop|push|shift|splice|unshift|split|switch|join|defined|foreach|last|chop|chomp|bless|dbmclose|dbmopen|ref|tie|tied|untie|while|next|map|eq|die|cmp|lc|uc|and|do|if|else|elsif|for|use|require|package|import|chdir|chmod|chown|chroot|fcntl|glob|ioctl|link|lstat|mkdir|open|opendir|readlink|rename|rmdir|stat|symlink|umask|unlink|utime|binmode|close|closedir|dbmclose|dbmopen|die|eof|fileno|flock|format|getc|print|printf|read|readdir|rewinddir|seek|seekdir|select|syscall|sysread|sysseek|syswrite|tell|telldir|truncate|warn|write|alarm|exec|fork|getpgrp|getppid|getュpriority|kill|pipe|qx|setpgrp|setpriority|sleep|system|times|x|wait|waitpid" comment delim '^\=(?:head1|head2|item)' '\=cut' multiline -type = '(?:\$[#]?|@|%)[[:word:]]+' +variable = '(?:\$[#]?|@|%)[/[:word:]]+' include "symbols.lang" diff --git a/src/postscript.lang b/src/postscript.lang index b0842c6..0ffeaf3 100644 --- a/src/postscript.lang +++ b/src/postscript.lang @@ -31,6 +31,10 @@ keyword = "abs|add|aload|anchorsearch|and|arc|arcn|arct|arcto|array|ashow|astore variable = $ID +environment string delim "(" ")" multiline nested begin + specialchar = '\\.' +end + comment start "%" include "number.lang" diff --git a/src/regexpengine.cpp b/src/regexpengine.cpp deleted file mode 100644 index fda8409..0000000 --- a/src/regexpengine.cpp +++ /dev/null @@ -1,177 +0,0 @@ -// -// C++ Implementation: regexpengine -// -// Description: -// -// -// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004 -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "regexpengine.h" - - -RegExpEngine::~RegExpEngine() -{ -} - -#include <fstream> -#include <iostream> -#include <stdlib.h> - -#include "maingeneratormap.h" -#include "keys.h" -#include "langdefloader.h" -#include "messages.h" -#include "textformatter.h" -#include "parserinfo.h" - -// purpose: -// takes the contents of a file and transform to -// syntax highlighted code in html format - -using namespace std; - -typedef enum { FOUND_EOF=0, FOUND_NL, FOUND_END } load_line_ret; - -load_line_ret -load_line(std::string& s, std::istream& is) -{ - s.erase(); - if (is.bad() || is.eof()) - return FOUND_EOF; - - char c; - while (is.get(c)) - { - if (c == '\n') - return FOUND_NL; - if (c != '\r') - s.append(1, c); - } - - return FOUND_END; -} - -void -RegExpEngine::process_file(const char *file) -{ - istream *is = 0; - - if (file) - { - is = new ifstream(file); - if (!is || ! (*is)) - { - cerr << "Error in opening " << file - << " for input" << endl ; - exit(1) ; - } - } - else - is = &cin; - - std::string s; - - std::string::const_iterator start, end; - boost::match_results<std::string::const_iterator> what; - boost::match_flag_type flags; - - initial_state = currentstate; - - fileinfo->line = 1; - - load_line_ret ret; - while ((ret = load_line(s, *is)) != FOUND_EOF) - { - bool matched = true; - bool found_eol = false; - start = s.begin(); - end = s.end(); - flags = boost::match_default; - - while (matched) { - if (boost::regex_search(start, end, what, currentstate->reg_exp, flags)) - { - string prefix = what.prefix(); - if (prefix.size()) - format(-1, prefix); - - for (unsigned int i = 1; i < what.size(); ++i) { - if (what[i].matched) { - format(i, string(what[i].first, what[i].second)); - if (currentstate->formatters[i]->getNextState()) { - enterState(i); - } else if (currentstate->formatters[i]->exit_state_level) { - if (currentstate->formatters[i]->exit_all) { - exitAll(); - } else { - exitState(currentstate->formatters[i]->exit_state_level); - } - } - start = what[i].second; - if (!(*start)) { - if (found_eol) - matched = false; // we had already matched end of line - found_eol = true; - } - break; // no other match is possible - } - } - - if (what[0].first != what[0].second) // matched more than 0 - flags |= boost::match_not_bol; - } - else - { - format(-1, string(start, end)); - matched = false; - } - } - - if (ret == FOUND_NL) - formatter->format_nl("\n"); - - (fileinfo->line)++; - } - - formatter->flush(); - - if (file) - delete is; - - currentstate = initial_state; // reset the initial state -} - -void -RegExpEngine::enterState(int index) -{ - states_stack.push(currentstate); - currentstate = currentstate->formatters[index]->getNextState(); -} - -void -RegExpEngine::exitState(int level) -{ - // remove additional levels - for (int l = 1; l < level; ++l) - states_stack.pop(); - - currentstate = states_stack.top(); - states_stack.pop(); -} - -void -RegExpEngine::exitAll() -{ - currentstate = initial_state; - states_stack = stack_of_states(); -} - -void -RegExpEngine::format(int index, const std::string &s) -{ - formatter->format(currentstate->get_elem(index), s, fileinfo); -} - diff --git a/src/regexpstate.cpp b/src/regexpstate.cpp deleted file mode 100644 index 997db08..0000000 --- a/src/regexpstate.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// -// C++ Implementation: %{MODULE} -// -// Description: -// -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} -// -// Copyright: See COPYING file that comes with this distribution -// -// -#include "regexpstate.h" - -#include "keys.h" -#include "messages.h" - -#include <stdlib.h> - -using namespace std; - -int RegExpState::global_id = 1; - -RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit, bool all) : - elem(el), exit_state_level(exit), exit_all(all), next_state(r) -{ -} - -void RegExpFormatter::setNextState(RegExpStatePtr r) -{ - next_state_strong = r; -} - -RegExpStatePtr RegExpFormatter::getNextState() const -{ - RegExpStatePtr next = next_state.lock(); - if (!next) - return next_state_strong; - - return next; -} - -/** - * Return the formatter associated to the passed index. - * If the index is negative, it returns the default formatter. - * @param index - * @return - */ -const string & -RegExpState::get_elem(int index) -{ - return formatters[(index<0 ? 0 : index)]->elem; -} - -void -RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f) -{ - const string &ex = buffer.str(); - if (ex.size()) - buffer << "|"; - - buffer << s; - - formatters.push_back(f); - subExpressions.push_back(make_pair(s, *parserInfo)); -} - -void -RegExpState::freeze() -{ - const string &buffered = buffer.str(); - try { - reg_exp.assign(buffered); - } catch (boost::bad_expression &e) { - printError("bad expression: " + buffered); - throw; - } -} - -void -RegExpState::set_default_formatter(RegExpFormatterPtr f) -{ - formatters[0] = f; -} diff --git a/src/regexpstate.h b/src/regexpstate.h deleted file mode 100644 index f38ea51..0000000 --- a/src/regexpstate.h +++ /dev/null @@ -1,78 +0,0 @@ -// -// C++ Interface: %{MODULE} -// -// Description: -// -// -// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} -// -// Copyright: See COPYING file that comes with this distribution -// -// -#ifndef REGEXPSTATE_H -#define REGEXPSTATE_H - -#include <boost/regex.hpp> -#include <boost/shared_ptr.hpp> -#include <boost/weak_ptr.hpp> -#include <deque> -#include <vector> -#include "my_sstream.h" -#include "parserinfo.h" - -struct RegExpState; - -typedef boost::shared_ptr<RegExpState> RegExpStatePtr; -typedef boost::weak_ptr<RegExpState> RegExpStatePtrW; - -struct RegExpFormatter -{ - const std::string elem; // the element represented - int exit_state_level; // how many states we must leave - bool exit_all; - - RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false); - - void setNextState(RegExpStatePtr r); - RegExpStatePtr getNextState() const; - - private: - RegExpStatePtrW next_state; - RegExpStatePtr next_state_strong; - /* - FIXME - the next_state is a weak pointer when there's a "nested" situation. - This allows to avoid cycles, that otherwise would prevent memory from - being correctly freed. - */ -}; - -typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr; -typedef std::deque<RegExpFormatterPtr> format_vector; -typedef std::pair<std::string, ParserInfo> SubExpressionInfo; -typedef std::vector<SubExpressionInfo> SubExpressions; - -/** -class representing a state for the regular expression engine - -@author Lorenzo Bettini - */ -struct RegExpState -{ - static int global_id; - const int id; // the identifier of the state - boost::regex reg_exp; - SubExpressions subExpressions; - format_vector formatters; - std::ostringstream buffer; - - RegExpState() : id(global_id++), formatters(1) {} - - const std::string &get_elem(int index = -1); - void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f); - void freeze(); - void set_default_formatter(RegExpFormatterPtr f); - RegExpFormatterPtr getLastFormatter() const { return formatters[formatters.size()-1];} -}; - -#endif diff --git a/src/ruby.lang b/src/ruby.lang index 711fb7e..dcf64b7 100644 --- a/src/ruby.lang +++ b/src/ruby.lang @@ -7,7 +7,8 @@ include "number.lang" string delim "\"" "\"" escape "\\" string delim "'" "'" escape "\\" string delim "<" ">" -string = '[[:word:]]*/[^\n]*/[[:word:]]*' +regexp = '/[^\n]*/' +(symbol,regexp) = `(%r)(\{(?:\\\}|#\{[[:alnum:]]+\}|[^}])*\})` keyword = "alias|begin|BEGIN|break|case|defined|do|else|elsif|end|END|ensure|for|if|in|include|loop|next|raise|redo|rescue|retry|return|super|then|undef|unless|until|when|while|yield|false|nil|self|true|__FILE__|__LINE__|and|not|or|def|class|module|catch|fail|load|throw" @@ -15,9 +16,17 @@ comment delim '(^\=begin)' '^(\=end)' multiline type = '(\$[#]?|@@|@)([[:word:]]+|\'|\"|/)' +# don't highlight ? and ! as symbols if they are part of a method call +normal = '[[:alnum:]]+(\?|!)' + include "symbols.lang" +# for variable interpolation, #{ is not a comment +(symbol,cbracket) = `(#)(\{)` + cbracket = "{|}" -include "function.lang" +# no function highlighting for Ruby, since a method invocation +# can be written even without parenthesis +# include "function.lang" diff --git a/src/startapp.cc b/src/startapp.cc index a025801..5409637 100644 --- a/src/startapp.cc +++ b/src/startapp.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 1999-2005 Lorenzo Bettini, www.lorenzobettini.it + * Copyright (C) 1999-2007 Lorenzo Bettini, www.lorenzobettini.it * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,11 +41,10 @@ #include "parsestyles.h" #include "generatorfactory.h" -#include "textformatter.h" #include "srcuntabifier.h" #include "chartranslator.h" #include "langdefloader.h" -#include "lineoutputgenerator.h" +#include "outputgenerator.h" #include "langmap.h" #include "regexpengine.h" #include "regexpenginedebug.h" @@ -54,6 +53,7 @@ #include "outlangdefparserfun.h" #include "fileinfo.h" #include "stopwatch.h" +#include "textformatter.h" #include "languageinfer.h" @@ -72,10 +72,9 @@ ostream* sout; #include "envmapper.h" #endif // BUILD_AS_CGI - unsigned int line_num_digit = 0; // num of digits to represent line number -gengetopt_args_info args_info ; // command line structure +gengetopt_args_info args_info; // command line structure static void print_cgi_header(); static void run_ctags(const string &cmd); @@ -84,668 +83,621 @@ static void run_ctags(const string &cmd); * Print progress status information (provided --quiet is not specified) * @param message */ -static void progressInfo(const string &message) -{ - if (args_info.quiet_given) - return; +static void progressInfo(const string &message) { + if (args_info.quiet_given) + return; - cerr << message; + cerr << message; } StartApp::StartApp() : - docgenerator(0), formatter(0), preformatter(0), - langmap(new LangMap), outlangmap(new LangMap), generator_factory(0), - entire_doc (0), verbose (0), cssUrl (0), - use_css (0), is_cgi (0), gen_version(true), - generate_line_num(false), generate_ref(false) -{ + docgenerator(0), preformatter(0), langmap(new LangMap), + outlangmap(new LangMap), generator_factory(0), entire_doc(0), + verbose(0), cssUrl(0), use_css(0), is_cgi(0), gen_version(true), + generate_line_num(false), generate_ref(false) { } -StartApp::~StartApp() -{ - // cout << "destroying StartApp..." << endl; - cmdline_parser_free(&args_info); - - if (formatter) - delete formatter; +StartApp::~StartApp() { + // cout << "destroying StartApp..." << endl; + cmdline_parser_free(&args_info); - if (preformatter) - delete preformatter; + if (preformatter) + delete preformatter; - if (docgenerator) - delete docgenerator; + if (docgenerator) + delete docgenerator; - if (generator_factory) - delete generator_factory; + if (generator_factory) + delete generator_factory; } -int -StartApp::start(int argc, char * argv[]) -{ - char *docTitle; - char *docHeader; // the buffer with the header - char *docFooter; // the buffer with the footer - const char *header_fileName = 0; - const char *footer_fileName = 0; - unsigned i; - int v; - int tabSpaces = 0; +int StartApp::start(int argc, char * argv[]) { + char *docTitle; + char *docHeader; // the buffer with the header + char *docFooter; // the buffer with the footer + const char *header_fileName = 0; + const char *footer_fileName = 0; + unsigned i; + int v; + int tabSpaces = 0; #ifdef BUILD_AS_CGI - // map environment to parameters if used as CGI - char **temp_argv; - temp_argv = map_environment(&argc, argv); - is_cgi = temp_argv != argv; - argv = temp_argv; + // map environment to parameters if used as CGI + char **temp_argv; + temp_argv = map_environment(&argc, argv); + is_cgi = temp_argv != argv; + argv = temp_argv; #endif // BUILD_AS_CGI + if ((v = cmdline_parser(argc, argv, &args_info)) != 0) + // calls cmdline parser. The user gived bag args if it doesn't return -1 + return EXIT_FAILURE; + + if (args_info.version_given) { + print_version(); + print_copyright(); + return EXIT_SUCCESS; + } - if((v = cmdline_parser(argc, argv, &args_info)) != 0) - // calls cmdline parser. The user gived bag args if it doesn't return -1 - return EXIT_FAILURE; + if (args_info.help_given) { + cout << "GNU "; + cmdline_parser_print_help(); + print_reportbugs(); + return EXIT_SUCCESS; + } - if (args_info.version_given) - { - print_version (); - print_copyright (); - return EXIT_SUCCESS; + gen_version = (args_info.gen_version_flag != 0); + + /* initialization of global symbols */ + inputFileName = outputFileName = 0; + sout = 0; + docTitle = 0; + docHeader = 0; + docFooter = 0; + + docTitle = args_info.title_arg; + header_fileName = args_info.header_arg; + footer_fileName = args_info.footer_arg; + verbose = args_info.verbose_given; + const string style_file = args_info.style_file_arg; + + if (args_info.tab_given > 0) + tabSpaces = args_info.tab_arg; + + if (header_fileName) + docHeader = read_file(header_fileName); + + if (footer_fileName) + docFooter = read_file(footer_fileName); + + cssUrl = args_info.css_arg; + use_css = ( cssUrl != 0 ); + + entire_doc =(! args_info.no_doc_given) &&( args_info.doc_given || (docTitle != 0) || use_css ); + + string inputFileName; + if (args_info.input_given) + inputFileName = args_info.input_arg; + + string outputFileName; + if (inputFileName.size()&& ! is_cgi && args_info.output_given) + outputFileName = args_info.output_arg; + + bool generate_to_stdout =(args_info.output_arg && + strcmp (args_info.output_arg, "STDOUT") == 0); + + if (verbose) + setMessager(new DefaultMessages); + + printMessage( PACKAGE); + printMessage( VERSION); + printMessage(argv[0]); + + if (verbose) { + printMessage("command line arguments: "); + for (int i = 0; i < argc; ++i) { + printMessage(argv[i]); + } } - if (args_info.help_given) - { - cout << "GNU "; - cmdline_parser_print_help (); - print_reportbugs (); - return EXIT_SUCCESS; + /* + the starting default path to search for files is computed at + run-time: it is + the path of the binary + ".." + RELATIVEDATADIR + this should make the package relocable (i.e., not stuck + with a fixed installation directory). + Of course, the GNU standards for installation directories + should be followed, but this is not a problem if you use + configure and make install features. + If no path is specified in the running program we go back to + the absolute datadir. + */ + // this is defined in fileutil.cc + string prefix_dir = get_file_path(argv[0]); + if (prefix_dir.size()) + start_path = get_file_path(argv[0])+ RELATIVEDATADIR; + else + start_path = ABSOLUTEDATADIR; + + if (args_info.data_dir_given) + data_dir = args_info.data_dir_arg; + + if (args_info.show_regex_given) { + if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) { + return (EXIT_SUCCESS); + } + + return (EXIT_FAILURE); } - gen_version = (args_info.gen_version_flag != 0); + if (args_info.check_lang_given) { + cout << "checking " << args_info.check_lang_arg << "... "; + if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) { + cout << "OK" << endl; + return (EXIT_SUCCESS); + } - /* initialization of global symbols */ - inputFileName = outputFileName = 0 ; - sout = 0 ; - docTitle = 0 ; - docHeader = 0 ; - docFooter = 0 ; + return (EXIT_FAILURE); + } + + if (args_info.check_outlang_given) { + cout << "checking " << args_info.check_outlang_arg << "... "; + textstyles = parse_outlang_def(data_dir.c_str(), + args_info.check_outlang_arg); + cout << "OK" << endl; + return (EXIT_SUCCESS); + } + + if (args_info.show_lang_elements_given) { + // we simply printe all the language elements defined in the + // language definition file + if (LangDefLoader::show_lang_elements(data_dir, + args_info.show_lang_elements_arg)) + return EXIT_SUCCESS; + + return EXIT_FAILURE; + } + + string lang_map = args_info.lang_map_arg; + assert(lang_map.size()); + if (! args_info.lang_def_given) + langmap = LangMapPtr(new LangMap(data_dir, lang_map)); - docTitle = args_info.title_arg ; - header_fileName = args_info.header_arg ; - footer_fileName = args_info.footer_arg ; - verbose = args_info.verbose_given ; - const string style_file = args_info.style_file_arg; + string outlang_map = args_info.outlang_map_arg; + assert(outlang_map.size()); + if (! args_info.outlang_def_given) + outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map)); - if ( args_info.tab_given > 0 ) - tabSpaces = args_info.tab_arg ; + if (args_info.lang_list_given) { + cout << "Supported languages (file extensions)\nand associated language definition files\n\n"; + langmap->print(); + return (EXIT_SUCCESS); + } + + if (args_info.outlang_list_given) { + cout << "Supported output languages\nand associated language definition files\n\n"; + outlangmap->print(); + return (EXIT_SUCCESS); + } - if (header_fileName) - docHeader = read_file (header_fileName); + outputbuffer = new OutputBuffer; + // when debugging, always flush the output + outputbuffer->setAlwaysFlush(args_info.debug_langdef_given); - if (footer_fileName) - docFooter = read_file (footer_fileName); + string title; + string doc_header; + string doc_footer; + string css_url; - cssUrl = args_info.css_arg ; - use_css = ( cssUrl != 0 ) ; + if (docTitle) + title = docTitle; + if ((! docTitle) && inputFileName.size()) + title = inputFileName; + if (docHeader) + doc_header = docHeader; + if (docFooter) + doc_footer = docFooter; + if (cssUrl) + css_url = cssUrl; - entire_doc = - (! args_info.no_doc_given) && - ( args_info.doc_given || (docTitle != 0) || use_css ) ; + if (args_info.line_number_ref_given) + args_info.line_number_given = args_info.line_number_ref_given; - string inputFileName; - if (args_info.input_given) - inputFileName = args_info.input_arg ; + string outlangfile; - string outputFileName; - if ( inputFileName.size() && ! is_cgi && args_info.output_given) - outputFileName = args_info.output_arg ; + if (! args_info.outlang_def_given) { + string out_format = args_info.out_format_arg; - bool generate_to_stdout = - (args_info.output_arg && - strcmp (args_info.output_arg, "STDOUT") == 0); + if (use_css) + out_format += "-css"; - if ( verbose ) - setMessager( new DefaultMessages ) ; + if (entire_doc) + out_format += "-doc"; - printMessage( PACKAGE ) ; - printMessage( VERSION ) ; - printMessage( argv[0] ) ; + outlangfile = outlangmap->get_file(out_format); - if (verbose) { - printMessage("command line arguments: "); - for (int i = 0; i < argc; ++i) { - printMessage(argv[i]); + if (! outlangfile.size()) { + cerr << PACKAGE << ": "; + cerr << "output language " << out_format<< " not handled" << endl; + return EXIT_FAILURE; + } + } else { + outlangfile = args_info.outlang_def_arg; } - } - - /* - the starting default path to search for files is computed at - run-time: it is - the path of the binary + ".." + RELATIVEDATADIR - this should make the package relocable (i.e., not stuck - with a fixed installation directory). - Of course, the GNU standards for installation directories - should be followed, but this is not a problem if you use - configure and make install features. - If no path is specified in the running program we go back to - the absolute datadir. - */ - // this is defined in fileutil.cc - string prefix_dir = get_file_path(argv[0]); - if (prefix_dir.size()) - start_path = get_file_path(argv[0]) + RELATIVEDATADIR; - else - start_path = ABSOLUTEDATADIR; - - if (args_info.data_dir_given) - data_dir = args_info.data_dir_arg; - - if (args_info.show_regex_given) { - if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) { - return(EXIT_SUCCESS); + + textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str()); + + if (! textstyles->file_extension.size() && ! outputFileName.size()) { + cerr << PACKAGE << ": "; + cerr << "empty file extension in output language file " <<outlangfile << endl; + return EXIT_FAILURE; + } + + const string ext = "." + textstyles->file_extension; + + RefPosition refposition; + if (strcmp(args_info.gen_references_arg, "inline")==0) + refposition = INLINE; + else if (strcmp(args_info.gen_references_arg, "postline")==0) + refposition = POSTLINE; + else if (strcmp(args_info.gen_references_arg, "postdoc")==0) + refposition = POSTDOC; + else { + cerr << PACKAGE << ": "; + cerr << "Bug: unhandled reference position " <<args_info.gen_references_arg << endl; + return EXIT_FAILURE; } - return (EXIT_FAILURE); - } + if (args_info.gen_references_given && strlen(args_info.ctags_arg)> 0) { + string ctags_cmd = args_info.ctags_arg; + + if (inputFileName.size()) { + ctags_cmd += " "; + ctags_cmd += inputFileName; + } else if (args_info.inputs_num) { + for (i = 0; i < (args_info.inputs_num); ++i) { + ctags_cmd += " "; + ctags_cmd += args_info.inputs[i]; + } + } - if (args_info.check_lang_given) { - cout << "checking " << args_info.check_lang_arg << "... "; - if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) { - cout << "OK" << endl; - return(EXIT_SUCCESS); + run_ctags(ctags_cmd); } - return (EXIT_FAILURE); - } - - if (args_info.check_outlang_given) { - cout << "checking " << args_info.check_outlang_arg << "... "; - textstyles = parse_outlang_def(data_dir.c_str(), args_info.check_outlang_arg); - cout << "OK" << endl; - return (EXIT_SUCCESS); - } - - if (args_info.show_lang_elements_given) { - // we simply printe all the language elements defined in the - // language definition file - if (LangDefLoader::show_lang_elements(data_dir, args_info.show_lang_elements_arg)) - return EXIT_SUCCESS; - - return EXIT_FAILURE; - } - - string lang_map = args_info.lang_map_arg; - assert(lang_map.size()); - if (! args_info.lang_def_given) - langmap = LangMapPtr(new LangMap(data_dir, lang_map)); - - string outlang_map = args_info.outlang_map_arg; - assert(outlang_map.size()); - if (! args_info.outlang_def_given) - outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map)); - - if (args_info.lang_list_given) { - cout << "Supported languages (file extensions)\nand associated language definition files\n\n"; - langmap->print(); - return (EXIT_SUCCESS); - } - - if (args_info.outlang_list_given) { - cout << "Supported output languages\nand associated language definition files\n\n"; - outlangmap->print(); - return (EXIT_SUCCESS); - } - - outputbuffer = new OutputBuffer; - // when debugging, always flush the output - outputbuffer->setAlwaysFlush( args_info.debug_langdef_given ); - - string title; - string doc_header; - string doc_footer; - string css_url; - - if (docTitle) - title = docTitle; - if ((! docTitle) && inputFileName.size()) - title = inputFileName; - if (docHeader) - doc_header = docHeader; - if (docFooter) - doc_footer = docFooter; - if (cssUrl) - css_url = cssUrl; - - if (args_info.line_number_ref_given) - args_info.line_number_given = args_info.line_number_ref_given; - - string outlangfile; - - if (! args_info.outlang_def_given) { - string out_format = args_info.out_format_arg; - - if (use_css) - out_format += "-css"; - - if (entire_doc) - out_format += "-doc"; - - outlangfile = outlangmap->get_file(out_format); - - if (! outlangfile.size()) { - cerr << PACKAGE << ": "; - cerr << "output language " << out_format - << " not handled" << endl; - return EXIT_FAILURE ; + if (tabSpaces) + preformatter = new Untabifier (tabSpaces); + else if (args_info.line_number_given) + preformatter = new Untabifier(8); + else + preformatter = new PreFormatter(); + + PreFormatterPtr chartranslator(textstyles->charTranslator); + preformatter->setFormatter(chartranslator); + + string background_color; + + generator_factory =new GeneratorFactory(textstyles, preformatter, + args_info.gen_references_given, + args_info.ctags_file_arg, + refposition, args_info.debug_langdef_given); + + if (args_info.style_css_file_given) { + parseCssStyles(data_dir, args_info.style_css_file_arg, + generator_factory, background_color); + } else { + parseStyles(data_dir, style_file, generator_factory, background_color); } - } else { - outlangfile = args_info.outlang_def_arg; - } - - textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str()); - - if (! textstyles->file_extension.size() && ! outputFileName.size()) { - cerr << PACKAGE << ": "; - cerr << "empty file extension in output language file " << - outlangfile << endl; - return EXIT_FAILURE ; - } - - const string ext = "." + textstyles->file_extension; - - RefPosition refposition; - if (strcmp(args_info.gen_references_arg, "inline")==0) - refposition = INLINE; - else if (strcmp(args_info.gen_references_arg, "postline")==0) - refposition = POSTLINE; - else if (strcmp(args_info.gen_references_arg, "postdoc")==0) - refposition = POSTDOC; - else { - cerr << PACKAGE << ": "; - cerr << "Bug: unhandled reference position " << - args_info.gen_references_arg << endl; - return EXIT_FAILURE ; - } - - if (args_info.gen_references_given && strlen(args_info.ctags_arg) > 0) { - string ctags_cmd = args_info.ctags_arg; - if (inputFileName.size()) { - ctags_cmd += " "; - ctags_cmd += inputFileName; - } else if (args_info.inputs_num) { - for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) { - ctags_cmd += " "; - ctags_cmd += args_info.inputs[i]; - } + generator_factory->addDefaultGenerator(); + + if (background_color != "") + background_color = generator_factory->preprocessColor(background_color); + + docgenerator = new DocGenerator(title, inputFileName, + doc_header, doc_footer, + css_url, background_color, entire_doc, + textstyles->docTemplate.toStringBegin(), + textstyles->docTemplate.toStringEnd());; + + if (is_cgi) + print_cgi_header(); + + // let's start the translation :-) + + generate_line_num =(args_info.line_number_given || args_info.line_number_ref_given); + generate_ref = args_info.line_number_ref_given; + + if (args_info.lang_def_arg) + lang_file = args_info.lang_def_arg; + + int result= EXIT_SUCCESS; + + if (args_info.src_lang_given) + source_language = args_info.src_lang_arg; + + // if a stopwatch is created, when it is deleted (automatically + // since we're using a shared pointer, it will print the + // elapsed seconds. + boost::shared_ptr<StopWatch> stopwatch; + if (args_info.statistics_given) + stopwatch = boost::shared_ptr<StopWatch>(new StopWatch); + + // first the --input file + if (! args_info.inputs_num) { + result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext); } - run_ctags(ctags_cmd); - } - - formatter = new TextFormatter; - - if (tabSpaces) - preformatter = new Untabifier (tabSpaces); - else if (args_info.line_number_given) - preformatter = new Untabifier(8); - else - preformatter = new PreFormatter(); - - PreFormatterPtr chartranslator(textstyles->charTranslator); - preformatter->setFormatter(chartranslator); - - string background_color; - - generator_factory = - new GeneratorFactory(textstyles, preformatter, - args_info.gen_references_given, - args_info.ctags_file_arg, - refposition, args_info.debug_langdef_given); - - if (args_info.style_css_file_given) { - parseCssStyles(data_dir, args_info.style_css_file_arg, generator_factory, background_color); - } else { - parseStyles(data_dir, style_file, generator_factory, background_color); - } - - generator_factory->addDefaultGenerator(); - - if (background_color != "") - background_color = generator_factory->preprocessColor( background_color ); - - docgenerator = new DocGenerator(title, inputFileName, - doc_header, doc_footer, - css_url, background_color, entire_doc, - textstyles->docTemplate.toStringBegin(), - textstyles->docTemplate.toStringEnd());; - - if ( is_cgi ) - print_cgi_header() ; - - // let's start the translation :-) - - generate_line_num = - (args_info.line_number_given || args_info.line_number_ref_given); - generate_ref = args_info.line_number_ref_given; - - if (args_info.lang_def_arg) - lang_file = args_info.lang_def_arg; - - int result = EXIT_SUCCESS; - - if (args_info.src_lang_given) - source_language = args_info.src_lang_arg; - - // if a stopwatch is created, when it is deleted (automatically - // since we're using a shared pointer, it will print the - // elapsed seconds. - boost::shared_ptr<StopWatch> stopwatch; - if (args_info.statistics_given) - stopwatch = boost::shared_ptr<StopWatch>(new StopWatch); - - // first the --input file - if ( ! args_info.inputs_num ) { - result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext) ; - } - - // let's process other files, if there are any - if ( args_info.inputs_num && !is_cgi ) { - for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) { - progressInfo(string("Processing ") + args_info.inputs[i] + " ... "); - const string &outputFileName = createOutputFileName (args_info.inputs[i], - args_info.output_dir_arg, ext); - result = processFile - ( args_info.inputs[i], - (generate_to_stdout ? "" : outputFileName), - ext) ; - if (result == EXIT_FAILURE) - break; - progressInfo("created " + outputFileName + "\n"); + // let's process other files, if there are any + if (args_info.inputs_num && !is_cgi) { + for (i = 0; i < (args_info.inputs_num); ++i) { + progressInfo(string("Processing ")+ args_info.inputs[i] + " ... "); + const string &outputFileName = createOutputFileName( + args_info.inputs[i], args_info.output_dir_arg, ext); + result = processFile(args_info.inputs[i], (generate_to_stdout ? "" : outputFileName), ext); + if (result == EXIT_FAILURE) + break; + progressInfo("created " + outputFileName + "\n"); + } } - } - delete outputbuffer; - outputbuffer = 0; + delete outputbuffer; + outputbuffer = 0; - return (result); + return (result); } -void -StartApp::print_copyright() -{ - int i; +void StartApp::print_copyright() { + int i; - for (i = 1; i <= copyright_text_length; ++i) - cout << copyright_text[i] << endl;; + for (i = 1; i <= copyright_text_length; ++i) + cout << copyright_text[i] << endl; + ; } -void -StartApp::print_reportbugs() -{ - int i; +void StartApp::print_reportbugs() { + int i; - for (i = 1; i <= reportbugs_text_length; ++i) - cout << reportbugs_text[i] << endl; + for (i = 1; i <= reportbugs_text_length; ++i) + cout << reportbugs_text[i] << endl; } -void -StartApp::print_version() -{ - cout << "GNU " << PACKAGE << " " << VERSION << endl; +void StartApp::print_version() { + cout << "GNU " << PACKAGE << " " << VERSION << endl; } int process_file(const char *file, TextFormatter *pre, const string &path, - const string &lang_file, FileInfo *fileinfo, bool verbose) -{ - RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file); - - try{ - printMessage("Processing " + string((file ? file : "standard input")) + " with regex"); - printMessage("Using language definition " + lang_file); - RegExpEnginePtr engine; - if (args_info.debug_langdef_given) { - RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo); - debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0); - engine = RegExpEnginePtr(debugEngine); - } else { - engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo)); + const string &lang_file, FileInfo *fileinfo, bool verbose) { + RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file); + + try { + printMessage("Processing " + string((file ? file : "standard input")) + " with regex"); + printMessage("Using language definition " + lang_file); + RegExpEnginePtr engine; + if (args_info.debug_langdef_given) { + RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo); + debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0); + engine = RegExpEnginePtr(debugEngine); + } else { + engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo)); + } + engine->process_file(file); } - engine->process_file(file); - } - catch(...) - { - exitError("error during regex processing"); - } - return 0; + catch(...) + { + exitError("error during regex processing"); + } + return 0; } -string StartApp::inferLang(const string &inputFileName) -{ - printMessage("inferring input language...", cerr); - if (!inputFileName.size()) { - cerr << PACKAGE << ": "; - cerr << "missing feature: language inference requires input file" << endl; - return ""; - } +string StartApp::inferLang(const string &inputFileName) { + printMessage("inferring input language...", cerr); + if (!inputFileName.size()) { + cerr << PACKAGE << ": "; + cerr << "missing feature: language inference requires input file" << endl; + return ""; + } - LanguageInfer languageInfer; + LanguageInfer languageInfer; - const string &result = languageInfer.infer(inputFileName); - if (result.size()) { - printMessage( "inferred input language: " + result, cerr ) ; + const string &result = languageInfer.infer(inputFileName); + if (result.size()) { + printMessage("inferred input language: " + result, cerr); - // OK now map it into a .lang file - string mapped_lang = langmap->get_file(result); + // OK now map it into a .lang file + string mapped_lang = langmap->get_file(result); - if (!mapped_lang.size()) { - // try the lower version - mapped_lang = langmap->get_file(Utils::tolower(result)); - } + if (!mapped_lang.size()) { + // try the lower version + mapped_lang = langmap->get_file(Utils::tolower(result)); + } - if (mapped_lang.size()) { - return mapped_lang; + if (mapped_lang.size()) { + return mapped_lang; + } + } else { + printMessage("couldn't infer input language", cerr); } - } else { - printMessage( "couldn't infer input language", cerr ) ; - } - return ""; + return ""; } -int -StartApp::processFile(const string &inputFileName, const string &outputFileName, const string &file_extension) -{ - FILE *in = 0; - bool deleteOStream = false ; - bool langSpecFound = false; - - if ( outputFileName.size() ) { - sout = new ofstream(outputFileName.c_str()) ; - if ( ! (*sout) ) { - cerr << "Error in creating " << outputFileName << " for output" << endl ; - return EXIT_FAILURE ; +int StartApp::processFile(const string &inputFileName, + const string &outputFileName, const string &file_extension) { + FILE *in = 0; + bool deleteOStream = false; + bool langSpecFound = false; + + if (outputFileName.size()) { + sout = new ofstream(outputFileName.c_str()); + if (! (*sout)) { + cerr << "Error in creating " << outputFileName << " for output" << endl; + return EXIT_FAILURE; + } + deleteOStream = true; + printMessage("output file: " + inputFileName); } - deleteOStream = true; - printMessage("output file: " + inputFileName); - } - if (inputFileName.size()) - { - unsigned int lines = get_line_count (inputFileName); - printMessage("input file: " + inputFileName); - - line_num_digit = 0; - while (lines) - { - ++line_num_digit; - lines /= 10; + if (inputFileName.size()) { + unsigned int lines = get_line_count(inputFileName); + printMessage("input file: " + inputFileName); + + line_num_digit = 0; + while (lines) { + ++line_num_digit; + lines /= 10; } + } else + line_num_digit = 5; + // if we read from stdin, we can't read the file in advance and + // check how many lines of code it contains. In this case set + // the number of digit for the line number to 5. + + /* + * Use default values for any options not provided + */ + if (sout == 0) { + sout = &cout; } - else - line_num_digit = 5; - // if we read from stdin, we can't read the file in advance and - // check how many lines of code it contains. In this case set - // the number of digit for the line number to 5. - - /* - * Use default values for any options not provided - */ - if (sout == 0) { - sout = &cout; - } - - if (in == 0) { - ; /* Well stdin already points to stdin so, .... */ - } - - OutputGenerator *outputgenerator = 0; - - if (generate_line_num) - outputgenerator = - new LineOutputGenerator(*sout, - &(textstyles->refstyle.anchor), generate_ref, - (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""), - textstyles->line_prefix); - else - outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix); - - // when debugging, always flush the output - outputgenerator->setAlwaysFlush( args_info.debug_langdef_given ); - - outputbuffer->setOutputGenerator(outputgenerator); - - docgenerator->set_gen_version (gen_version); - - printMessage( "translating source code... ", cerr ) ; - - string langfile = lang_file; - - if (args_info.infer_lang_given) { - langfile = inferLang(inputFileName); - - if (langfile.size()) - langSpecFound = true; - } - - // language inference has the precedence (if it succeeds) - if (!langfile.size() && !langSpecFound) { - // find the language definition file associated to a language - if (source_language.size()) { - langfile = langmap->get_file(source_language); - if (! langfile.size()) - { - if (! args_info.failsafe_given) - { - cerr << PACKAGE << ": "; - cerr << "source language " << source_language - << " not handled" << endl; - return EXIT_FAILURE ; - } + + if (in == 0) { + ; /* Well stdin already points to stdin so, .... */ + } + + OutputGenerator *outputgenerator = 0; + + if (generate_line_num) + outputgenerator =new OutputGenerator(*sout, generator_factory->getTextFormatter()->getGenerator("linenum"), + &(textstyles->refstyle.anchor), generate_ref, + (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""), + textstyles->line_prefix); + else + outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix); + + // when debugging, always flush the output + outputgenerator->setAlwaysFlush(args_info.debug_langdef_given); + + outputbuffer->setOutputGenerator(outputgenerator); + + docgenerator->set_gen_version(gen_version); + + printMessage("translating source code... ", cerr); + + string langfile = lang_file; + + if (args_info.infer_lang_given) { + langfile = inferLang(inputFileName); + + if (langfile.size()) + langSpecFound = true; + } + + // language inference has the precedence (if it succeeds) + if (!langfile.size() && !langSpecFound) { + // find the language definition file associated to a language + if (source_language.size()) { + langfile = langmap->get_file(source_language); + if (! langfile.size()) { + if (! args_info.failsafe_given) { + cerr << PACKAGE << ": "; + cerr << "source language " << source_language<< " not handled" << endl; + return EXIT_FAILURE; + } + } else + langSpecFound = true; + } else { + if (! inputFileName.size()) { + if (! args_info.failsafe_given) { + cerr << PACKAGE << ": "; + cerr << "when using stdin, please specify a source language"<< endl; + return EXIT_FAILURE; + } + } + + string file_ext = get_file_extension(inputFileName); + + if (file_ext != "") + langfile = langmap->get_file(file_ext); + + if (langfile.size()) + langSpecFound = true; } - else + } else langSpecFound = true; - } else { - if (! inputFileName.size()) - { - if (! args_info.failsafe_given) - { - cerr << PACKAGE << ": "; - cerr << "when using stdin, please specify a source language" - << endl; - return EXIT_FAILURE ; - } - } - string file_ext = get_file_extension (inputFileName); + // language inference is always performed, if the other attempts failed + // if --infer-lang was specified at command line, then the inference + // has already been performed, otherwise we perform it now + if (!langSpecFound && !args_info.infer_lang_given) { + langfile = inferLang(inputFileName); - if (file_ext != "") - langfile = langmap->get_file(file_ext); + if (langfile.size()) + langSpecFound = true; + } - if (langfile.size()) + if (!langSpecFound && args_info.failsafe_given) { + // OK we use default.lang + langfile = "default.lang"; langSpecFound = true; } - } - else - langSpecFound = true; - - // language inference is always performed, if the other attempts failed - // if --infer-lang was specified at command line, then the inference - // has already been performed, otherwise we perform it now - if (!langSpecFound && !args_info.infer_lang_given) { - langfile = inferLang(inputFileName); - - if (langfile.size()) - langSpecFound = true; - } - - if (!langSpecFound && args_info.failsafe_given) { - // OK we use default.lang - langfile = "default.lang"; - langSpecFound = true; - } - - if (langSpecFound) - { - docgenerator->generate_start_doc (); - - const string &i_file_name = get_input_file_name(inputFileName); - const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0); - - FileInfo fileinfo(i_file_name, outputFileName); - process_file(input_file_name, formatter, data_dir, langfile, - &fileinfo, verbose); - - outputbuffer->flush(); - - docgenerator->generate_end_doc (); - - printMessage( "done !", cerr ) ; - } else { - cerr << PACKAGE << ": "; - cerr << "unknown input language for " - << (inputFileName.size() ? inputFileName : "(stdin)") << endl; - return EXIT_FAILURE ; - } - - /* - else // we're in failsafe mode so we simply copy the file to the output - { - istream *input; - if(! inputFileName.size()) - input = &cin; - else - input = open_file_istream_or_error(inputFileName); - *sout << input->rdbuf(); + if (langSpecFound) { + docgenerator->generate_start_doc(); + + const string &i_file_name = get_input_file_name(inputFileName); + const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0); - if (input != &cin) - delete input; - } - */ + FileInfo fileinfo(i_file_name, outputFileName); + process_file(input_file_name, generator_factory->getTextFormatter(), + data_dir, langfile, &fileinfo, verbose); - sout->flush (); + outputbuffer->flush(); - if ( deleteOStream ) - delete sout ; + docgenerator->generate_end_doc(); + + printMessage("done !", cerr); + } else { + cerr << PACKAGE << ": "; + cerr << "unknown input language for "<< (inputFileName.size() ? inputFileName : "(stdin)") << endl; + return EXIT_FAILURE; + } - delete outputgenerator; + /* + else // we're in failsafe mode so we simply copy the file to the output + { + istream *input; + if(! inputFileName.size()) + input = &cin; + else + input = open_file_istream_or_error(inputFileName); - return EXIT_SUCCESS; + *sout << input->rdbuf(); + + if (input != &cin) + delete input; + } + */ + + sout->flush(); + + if (deleteOStream) + delete sout; + + delete outputgenerator; + + return EXIT_SUCCESS; } -void run_ctags(const string &cmd) -{ - printMessage("Running ctags: " + cmd); +void run_ctags(const string &cmd) { + printMessage("Running ctags: " + cmd); - int res = system(cmd.c_str()); + int res = system(cmd.c_str()); - if (res != 0) { - exitError("error running ctags"); - } + if (res != 0) { + exitError("error running ctags"); + } } -void -print_cgi_header() -{ - printf( "Content-type: text/html\n" ) ; - printf( "\n" ) ; +void print_cgi_header() { + printf("Content-type: text/html\n"); + printf("\n"); } diff --git a/src/startapp.h b/src/startapp.h index f98327b..8c83240 100644 --- a/src/startapp.h +++ b/src/startapp.h @@ -23,7 +23,6 @@ #include "langmap.h" #include "textstyles.h" -class TextFormatter; class PreFormatter; class DocGenerator; class GeneratorFactory; @@ -33,7 +32,6 @@ class StartApp protected: char *inputFileName, *outputFileName; /* what we're reading */ DocGenerator *docgenerator; - TextFormatter *formatter; PreFormatter *preformatter; LangMapPtr langmap; LangMapPtr outlangmap; diff --git a/src/texinfo.style b/src/texinfo.style index 3adcf37..9d15cd7 100644 --- a/src/texinfo.style +++ b/src/texinfo.style @@ -1,6 +1,7 @@ keyword, type b ; variable f, i ; string f ; +regexp f ; comment nf, i, noref ; preproc b ; diff --git a/src/xhtml_notfixed.outlang b/src/xhtml_notfixed.outlang new file mode 100644 index 0000000..d5a8eb0 --- /dev/null +++ b/src/xhtml_notfixed.outlang @@ -0,0 +1,16 @@ +include "xhtml_common.outlang" + +fixed "<span style=\"font-family: monospace;\">$text</span>" + +doctemplate +"<!-- Generator: $additional --> +$header" +"$footer" +end + +translations +"\n" "<br />\n" +" " " " +'^ ' " " # a space at the beginning of a line +"\t" " " +end diff --git a/src/xhtmltable.outlang b/src/xhtmltable.outlang index 217b09d..ec73fc1 100644 --- a/src/xhtmltable.outlang +++ b/src/xhtmltable.outlang @@ -1,14 +1,7 @@ include "xhtml_common.outlang" doctemplate -"<table style=\"background-color: $docbgcolor\"> -<tbody> -<tr><td> -<pre><tt>" -"</tt></pre> -</td></tr> -</tbody> -</table> -" +"<table style=\"background-color: $docbgcolor\"><tbody><tr><td><pre><tt>" +"</tt></pre></td></tr></tbody></table>" end |