src-highlite.git - src-highlite

index : src-highlite.git

src-highlite

summary refs log tree commit diff

path: root/src

diff options

Diffstat (limited to 'src')

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

120

-rw-r--r--

src/check-regexp_cmd.c

293

-rw-r--r--

src/check-regexp_cmd.ggo

-rw-r--r--

src/check-regexp_cmd.h

-rw-r--r--

src/cmdline.c (renamed from src/lib/cmdline.c)

-rw-r--r--

src/cmdline.ggo (renamed from src/lib/cmdline.ggo)

-rw-r--r--

src/cmdline.h (renamed from src/lib/cmdline.h)

-rw-r--r--

src/cmdlineargs.h (renamed from src/lib/cmdlineargs.h)

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

src/docbookdoc.outlang

-rw-r--r--

src/esc.style

-rw-r--r--

src/html.lang

-rw-r--r--

src/html_notfixed.outlang

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

src/lib/delimitedlangelem.cpp

-rw-r--r--

src/lib/delimitedlangelem.h

-rw-r--r--

src/lib/fileutil.cc

-rw-r--r--

src/lib/generatorfactory.cc

180

-rw-r--r--

src/lib/generatorfactory.h

-rw-r--r--

src/lib/generatormap.cpp

154

-rw-r--r--

src/lib/generatormap.h

-rw-r--r--

src/lib/keys.h

-rw-r--r--

src/lib/langdefparser.yy

-rw-r--r--

src/lib/langdefscanner.ll

-rw-r--r--

src/lib/langelem.cpp

-rw-r--r--

src/lib/langelem.h

-rw-r--r--

src/lib/langelems.cpp

-rw-r--r--

src/lib/langelems.h

-rw-r--r--

src/lib/languageinfer.cpp

-rw-r--r--

src/lib/linebuffer.cpp

-rw-r--r--

src/lib/linebuffer.h

-rw-r--r--

src/lib/lineoutputgenerator.cpp

-rw-r--r--

src/lib/lineoutputgenerator.h

-rw-r--r--

src/lib/maingeneratormap.cpp

-rw-r--r--

src/lib/maingeneratormap.h

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

src/lib/namedsubexpslangelem.cpp

-rw-r--r--

src/lib/namedsubexpslangelem.h

-rw-r--r--

src/lib/outlangdefparser.yy

-rw-r--r--

src/lib/outputgenerator.cpp

-rw-r--r--

src/lib/outputgenerator.h

-rw-r--r--

src/lib/parserinfo.h

-rw-r--r--

src/lib/parsestyles.h

-rw-r--r--

src/lib/readtags.c

-rw-r--r--

src/lib/readtags.h

-rw-r--r--

src/lib/refgeneratormap.cpp

214

-rw-r--r--

src/lib/refgeneratormap.h

-rw-r--r--

src/lib/refposition.h

-rw-r--r--

src/lib/regexpengine.cpp

265

-rw-r--r--

src/lib/regexpengine.h (renamed from src/regexpengine.h)

-rw-r--r--

src/lib/regexpenginedebug.cpp (renamed from src/regexpenginedebug.cpp)

-rw-r--r--

src/lib/regexpenginedebug.h (renamed from src/regexpenginedebug.h)

-rw-r--r--

src/lib/regexpreprocessor.cpp

214

-rw-r--r--

src/lib/regexpreprocessor.h

-rw-r--r--

src/lib/regexpstate.cpp

207

-rw-r--r--

src/lib/regexpstate.h

191

-rw-r--r--

src/lib/regexpstatebuilder.H

-rw-r--r--

src/lib/regexpstatebuilder.cpp

142

-rw-r--r--

src/lib/regexpstatebuilder.h

-rw-r--r--

src/lib/regexpstatebuilder_dbtab.cc

-rw-r--r--

src/lib/regexpstateprinter.cpp

-rw-r--r--

src/lib/statelangelem.cpp

-rw-r--r--

src/lib/statelangelem.h

-rw-r--r--

src/lib/stringdef.cpp

-rw-r--r--

src/lib/stringdef.h

-rw-r--r--

src/lib/stringlistlangelem.cpp

-rw-r--r--

src/lib/stringlistlangelem.h

-rw-r--r--

src/lib/stylecssparser.yy

-rw-r--r--

src/lib/stylecssscanner.ll

-rw-r--r--

src/lib/styleparser.yy

-rw-r--r--

src/lib/stylescanner.ll

-rw-r--r--

src/lib/test_langinfer.cpp

-rw-r--r--

src/lib/test_regexpreprocessor_main.cpp

167

-rw-r--r--

src/lib/textformatter.cpp

332

-rw-r--r--

src/lib/textformatter.h

120

-rw-r--r--

src/lib/tostringcollection.h

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

102

-rw-r--r--

src/postscript.lang

-rw-r--r--

src/regexpengine.cpp

177

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

1104

-rw-r--r--

src/startapp.h

-rw-r--r--

src/texinfo.style

-rw-r--r--

src/xhtml_notfixed.outlang

-rw-r--r--

src/xhtmltable.outlang

104 files changed, 3603 insertions, 2248 deletions

diff --git a/src/.cvsignore b/src/.cvsignore
index ab0c685..b5caf54 100644
--- a/src/.cvsignore
+++ b/src/.cvsignore

@@ -26,3 +26,4 @@ cpp2html.cc.html

cpp2html.h.html

src-hilite-lesspipe.sh

libsh.a

+check-regexp \ No newline at end of file

diff --git a/src/Makefile.am b/src/Makefile.am
index e6ca190..ffed6b7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am

@@ -1,4 +1,4 @@

# This file is free software; as a special exception the author gives

# unlimited permission to copy and/or distribute it, with or without

@@ -8,10 +8,24 @@

# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the

# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

-bin_PROGRAMS = source-highlight

+SUFFIXES = .ggo

+if NO_GENGETOPT

+.ggo.c:

+ touch $@

+else

+check-regexp_cmd.c: $(srcdir)/check-regexp_cmd.ggo

+ $(GENGETOPT) --input $(srcdir)/check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd

+.ggo.c:

+ $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version

+endif

+bin_PROGRAMS = source-highlight check-regexp

bin_SCRIPTS = java2html cpp2html src-hilite-lesspipe.sh

-noinst_LTLIBRARIES = libsh.la

-libsh_la_LIBADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la

EXTRA_PROGRAMS = source-highlight-cgi

SRCHILITE = $(top_builddir)/src/source-highlight$(EXEEXT)

@@ -24,8 +38,10 @@ AM_CPPFLAGS = -I$(top_srcdir)/gl -I$(top_builddir)/gl

LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la

-source_highlight_LDADD = libsh.la

-source_highlight_cgi_LDADD = libsh.la

+source_highlight_LDADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la

+source_highlight_cgi_LDADD = $(source_highlight_LDADD)

+check_regexp_LDADD = $(BOOST_REGEX_LIB) $(top_builddir)/gl/libgnu.la

SUBDIRS = \

lib \

@@ -35,12 +51,11 @@ DIST_SUBDIRS = \

lib \

includes

-libsh_la_SOURCES = source-highlight.cc regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp

# for most rules, we use one file per line. `diffs' are more clear this way

-source_highlight_SOURCES = startapp.cc

+source_highlight_SOURCES = cmdline.c startapp.cc source-highlight.cc

+check_regexp_SOURCES = check-regexp_cmd.c check-regexp.cpp

-source_highlight_cgi_SOURCES = startapp-cgi.cc envmapper.c

+source_highlight_cgi_SOURCES = cmdline.c startapp-cgi.cc envmapper.c

# files that we don't want automake/autoconf to touch ever.

# just stick them in the distribution as-is

@@ -81,10 +96,12 @@ xhtml_common.outlang \

xhtmlcss.outlang \

xhtmldoc.outlang \

xhtml.outlang \

+xhtml_notfixed.outlang \

xhtmltable.outlang \

texinfo.outlang \

javadoc.outlang \

-docbook.outlang

+docbook.outlang \

+docbookdoc.outlang

STYLEFILES = texinfo.style esc.style

@@ -111,4 +128,4 @@ tags:

.PHONY: tags

-noinst_HEADERS = regexpengine.h regexpstate.h asserttest.h regexpenginedebug.h

+noinst_HEADERS = asserttest.h cmdline.h cmdlineargs.h cmdline.ggo check-regexp_cmd.h check-regexp_cmd.ggo

diff --git a/src/asserttest.h b/src/asserttest.h
index 7b54235..c3275a2 100644
--- a/src/asserttest.h
+++ b/src/asserttest.h

@@ -15,8 +15,9 @@

#include <iostream>

#include <stdlib.h>

+template <typename T>

int

-assertEquals(const std::string &expected, const std::string &actual)

+assertEquals(T expected, T actual)

{

if (expected != actual) {

std::cerr << "assertEquals failed" << std::endl;

@@ -30,7 +31,7 @@ assertEquals(const std::string &expected, const std::string &actual)

}

int

-assertEquals(bool expected, bool actual)

+assertEquals(const std::string &expected, const std::string &actual)

{

if (expected != actual) {

std::cerr << "assertEquals failed" << std::endl;

diff --git a/src/changelog.lang b/src/changelog.lang
index 828bf26..0a1fb37 100644
--- a/src/changelog.lang
+++ b/src/changelog.lang

@@ -3,14 +3,6 @@ state date start '[[:digit:]]{2,4}-?[[:digit:]]{2}-?[[:digit:]]{2}' begin

name = '([[:word:]]|[[:punct:]])+'

end

-state symbol start '^(?:[\t]+|[[:space:]]+)\*[[:space:]]+' begin

- state file start '[^:]+\:' begin

- normal start '.'

- end

-end

+(normal,symbol,normal,file)= `(^[[:blank:]]+)(\*)([[:blank:]]+)((?:[^:]+\:)?)`

+(normal,file)= `(^[[:blank:]]+)((?:[^:]+\:)?)`

-state normal start '^(?:[\t]+|[[:space:]]+)' begin

- state file start '[^:]+\:' begin

- normal start '.'

- end

-end

diff --git a/src/check-regexp.cpp b/src/check-regexp.cpp
new file mode 100644
index 0000000..1808381
--- /dev/null
+++ b/src/check-regexp.cpp

@@ -0,0 +1,120 @@

+/*

+ *

+ * This program is free software; you can redistribute it and/or modify

+ * it under the terms of the GNU General Public License as published by

+ * the Free Software Foundation; either version 2 of the License, or

+ * (at your option) any later version.

+ *

+ * This program is distributed in the hope that it will be useful,

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ * GNU General Public License for more details.

+ *

+ * You should have received a copy of the GNU General Public License

+ * along with this program; if not, write to the Free Software

+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+ *

+ */

+/*

+ * This program is part of GNU source-highlight simply to

+ * check a regular expression against a given an expression

+ */

+/*

+ * some examples:

+ *

+ ./check-regexp "([^[:alnum:]]+)[^[:blank:]]*(1�~)[^[:blank:]]*(1�~)" "|w|\$|e|d| ^w^w^ ?a?b?"

+ trying to match: |w|$|e|d| ^w^w^ ?a?b?

+ against : ([^[:alnum:]]+)[^[:blank:]]*(1�~)[^[:blank:]]*(1�~)

+ what[0]: |w|$|e|d|

+ what[1]: |

+ what[2]: |

+ what[3]: |

+ prefix:

+ what[0]: ^w^w^

+ what[1]: ^

+ what[2]: ^

+ what[3]: ^

+ prefix:

+ what[0]: ?a?b?

+ what[1]: ?

+ what[2]: ?

+ what[3]: ?

+ total number of matches: 3

+ *

+ */

+#include <cstdlib>

+#include <boost/regex.hpp>

+#include <iostream>

+#include "check-regexp_cmd.h"

+using namespace std;

+int main(int argc, char * argv[]) {

+ gengetopt_args_info args_info; // command line structure

+ if (cmdline_parser(argc, argv, &args_info)!= 0)

+ // calls cmdline parser. The user gived bag args if it doesn't return -1

+ return EXIT_FAILURE;

+ if (args_info.inputs_num < 2) {

+ cerr << "Syntax: check-regexp <regular expression> <expressions...>" << endl;

+ exit(EXIT_FAILURE);

+ }

+ boost::regex regex(args_info.inputs[0]);

+ for (unsigned int i = 1; i < args_info.inputs_num; ++i) {

+ string tomatch = args_info.inputs[i];

+ std::string::const_iterator start, end;

+ boost::match_results<std::string::const_iterator> what;

+ boost::match_flag_type flags;

+ start = tomatch.begin();

+ end = tomatch.end();

+ flags = boost::match_default;

+ cout << "\nsearching : " << tomatch << endl;

+ cout << "for the regexp : " << regex << endl;

+ int num_of_matches = 0;

+ while (boost::regex_search(start, end, what, regex, flags)) {

+ string prefix = what.prefix();

+ if (prefix.size())

+ cout << "prefix: " << prefix << endl;

+ cout << "what[0]: " << what[0] << endl;

+ for (unsigned int i = 1; i < what.size(); ++i) {

+ if (what[i].matched) {

+ cout << " what[" << i << "]: "<< what[i] << endl;

+ cout << " length: " << what[i].length()<< endl;

+ }

+ string suffix = what.suffix();

+ if (suffix.size())

+ cout << "suffix: " << suffix << endl;

+ // update search position:

+ start = what[0].second;

+ // update flags:

+ flags |= boost::match_prev_avail;

+ flags |= boost::match_not_bob;

+ ++num_of_matches;

+ }

+ cout << "total number of matches: " << num_of_matches << endl;

+ }

diff --git a/src/check-regexp_cmd.c b/src/check-regexp_cmd.c
new file mode 100644
index 0000000..f20ab59
--- /dev/null
+++ b/src/check-regexp_cmd.c

@@ -0,0 +1,293 @@

+/*

+ File autogenerated by gengetopt version 2.19.2

+ generated with the following command:

+ /home/bettini/usr/local/bin/gengetopt --input ./check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd

+ The developers of gengetopt consider the fixed text that goes in all

+ gengetopt output files to be in the public domain:

+ we make no copyright claims on it.

+*/

+/* If we use autoconf. */

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include "getopt.h"

+#include "check-regexp_cmd.h"

+const char *gengetopt_args_info_purpose = "Tries to match the regular expression passed as the first argument\nagainst the strings passed as remaining arguments";

+const char *gengetopt_args_info_usage = "Usage: check-regexp 'regular expression' 'string1' 'string2' ...";

+const char *gengetopt_args_info_description = "";

+const char *gengetopt_args_info_help[] = {

+ " -h, --help Print help and exit",

+ " -V, --version Print version and exit",

+ 0

+};

+static

+void clear_given (struct gengetopt_args_info *args_info);

+static

+void clear_args (struct gengetopt_args_info *args_info);

+static int

+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error);

+static char *

+gengetopt_strdup (const char *s);

+static

+void clear_given (struct gengetopt_args_info *args_info)

+ args_info->help_given = 0 ;

+ args_info->version_given = 0 ;

+static

+void clear_args (struct gengetopt_args_info *args_info)

+static

+void init_args_info(struct gengetopt_args_info *args_info)

+ args_info->help_help = gengetopt_args_info_help[0] ;

+ args_info->version_help = gengetopt_args_info_help[1] ;

+void

+cmdline_parser_print_version (void)

+ printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);

+void

+cmdline_parser_print_help (void)

+ int i = 0;

+ cmdline_parser_print_version ();

+ if (strlen(gengetopt_args_info_purpose) > 0)

+ printf("\n%s\n", gengetopt_args_info_purpose);

+ printf("\n%s\n\n", gengetopt_args_info_usage);

+ if (strlen(gengetopt_args_info_description) > 0)

+ printf("%s\n", gengetopt_args_info_description);

+ while (gengetopt_args_info_help[i])

+ printf("%s\n", gengetopt_args_info_help[i++]);

+void

+cmdline_parser_init (struct gengetopt_args_info *args_info)

+ clear_given (args_info);

+ clear_args (args_info);

+ init_args_info (args_info);

+ args_info->inputs = NULL;

+ args_info->inputs_num = 0;

+static void

+cmdline_parser_release (struct gengetopt_args_info *args_info)

+ unsigned int i;

+ for (i = 0; i < args_info->inputs_num; ++i)

+ free (args_info->inputs [i]);

+ if (args_info->inputs_num)

+ free (args_info->inputs);

+ clear_given (args_info);

+int

+cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info)

+ FILE *outfile;

+ int i = 0;

+ outfile = fopen(filename, "w");

+ if (!outfile)

+ {

+ fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename);

+ return EXIT_FAILURE;

+ }

+ if (args_info->help_given) {

+ fprintf(outfile, "%s\n", "help");

+ }

+ if (args_info->version_given) {

+ fprintf(outfile, "%s\n", "version");

+ }

+ fclose (outfile);

+ i = EXIT_SUCCESS;

+ return i;

+void

+cmdline_parser_free (struct gengetopt_args_info *args_info)

+ cmdline_parser_release (args_info);

+/* gengetopt_strdup() */

+/* strdup.c replacement of strdup, which is not standard */

+char *

+gengetopt_strdup (const char *s)

+ char *result = NULL;

+ if (!s)

+ return result;

+ result = (char*)malloc(strlen(s) + 1);

+ if (result == (char*)0)

+ return (char*)0;

+ strcpy(result, s);

+ return result;

+int

+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)

+ return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);

+int

+cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)

+ int result;

+ result = cmdline_parser_internal (argc, argv, args_info, override, initialize, check_required, NULL);

+ if (result == EXIT_FAILURE)

+ {

+ cmdline_parser_free (args_info);

+ exit (EXIT_FAILURE);

+ }

+ return result;

+int

+cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)

+ return EXIT_SUCCESS;

+int

+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error)

+ int c; /* Character of the parsed option. */

+ int error = 0;

+ struct gengetopt_args_info local_args_info;

+ if (initialize)

+ cmdline_parser_init (args_info);

+ cmdline_parser_init (&local_args_info);

+ optarg = 0;

+ optind = 0;

+ opterr = 1;

+ optopt = '?';

+ while (1)

+ {

+ int option_index = 0;

+ char *stop_char;

+ static struct option long_options[] = {

+ { "help", 0, NULL, 'h' },

+ { "version", 0, NULL, 'V' },

+ { NULL, 0, NULL, 0 }

+ };

+ stop_char = 0;

+ c = getopt_long (argc, argv, "hV", long_options, &option_index);

+ if (c == -1) break; /* Exit from `while (1)' loop. */

+ switch (c)

+ {

+ case 'h': /* Print help and exit. */

+ cmdline_parser_print_help ();

+ cmdline_parser_free (&local_args_info);

+ exit (EXIT_SUCCESS);

+ case 'V': /* Print version and exit. */

+ cmdline_parser_print_version ();

+ cmdline_parser_free (&local_args_info);

+ exit (EXIT_SUCCESS);

+ case 0: /* Long option with no short option */

+ case '?': /* Invalid option. */

+ /* `getopt_long' already printed an error message. */

+ goto failure;

+ default: /* bug: option not considered. */

+ fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : ""));

+ abort ();

+ } /* switch */

+ } /* while */

+ cmdline_parser_release (&local_args_info);

+ if ( error )

+ return (EXIT_FAILURE);

+ if (optind < argc)

+ {

+ int i = 0 ;

+ int found_prog_name = 0;

+ /* whether program name, i.e., argv[0], is in the remaining args

+ (this may happen with some implementations of getopt,

+ but surely not with the one included by gengetopt) */

+ i = optind;

+ while (i < argc)

+ if (argv[i++] == argv[0]) {

+ found_prog_name = 1;

+ break;

+ }

+ i = 0;

+ args_info->inputs_num = argc - optind - found_prog_name;

+ args_info->inputs =

+ (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ;

+ while (optind < argc)

+ if (argv[optind++] != argv[0])

+ args_info->inputs[ i++ ] = gengetopt_strdup (argv[optind-1]) ;

+ }

+ return 0;

+failure:

+ cmdline_parser_release (&local_args_info);

+ return (EXIT_FAILURE);

diff --git a/src/check-regexp_cmd.ggo b/src/check-regexp_cmd.ggo
new file mode 100644
index 0000000..6ddefb2
--- /dev/null
+++ b/src/check-regexp_cmd.ggo

@@ -0,0 +1,12 @@

+# This file is used by gengetopt to generate a command line args parser

+# GNU gengetopt can be found at

+# http://www.gnu.org/software/gengetopt

+package "check-regexp (GNU Source-highlight)"

+purpose "Tries to match the regular expression passed as the first argument

+against the strings passed as remaining arguments"

+usage "check-regexp 'regular expression' 'string1' 'string2' ..."

diff --git a/src/check-regexp_cmd.h b/src/check-regexp_cmd.h
new file mode 100644
index 0000000..424ef12
--- /dev/null
+++ b/src/check-regexp_cmd.h

@@ -0,0 +1,62 @@

+/* check-regexp_cmd.h */

+/* File autogenerated by gengetopt version 2.19.2 */

+#ifndef CHECK_REGEXP_CMD_H

+#define CHECK_REGEXP_CMD_H

+/* If we use autoconf. */

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#ifdef __cplusplus

+extern "C" {

+#endif /* __cplusplus */

+#ifndef CMDLINE_PARSER_PACKAGE

+#define CMDLINE_PARSER_PACKAGE "check-regexp (GNU Source-highlight)"

+#endif

+#ifndef CMDLINE_PARSER_VERSION

+#define CMDLINE_PARSER_VERSION VERSION

+#endif

+struct gengetopt_args_info

+ const char *help_help; /* Print help and exit help description. */

+ const char *version_help; /* Print version and exit help description. */

+ int help_given ; /* Whether help was given. */

+ int version_given ; /* Whether version was given. */

+ char **inputs ; /* unamed options */

+ unsigned inputs_num ; /* unamed options number */

+} ;

+extern const char *gengetopt_args_info_purpose;

+extern const char *gengetopt_args_info_usage;

+extern const char *gengetopt_args_info_help[];

+int cmdline_parser (int argc, char * const *argv,

+ struct gengetopt_args_info *args_info);

+int cmdline_parser2 (int argc, char * const *argv,

+ struct gengetopt_args_info *args_info,

+ int override, int initialize, int check_required);

+int cmdline_parser_file_save(const char *filename,

+ struct gengetopt_args_info *args_info);

+void cmdline_parser_print_help(void);

+void cmdline_parser_print_version(void);

+void cmdline_parser_init (struct gengetopt_args_info *args_info);

+void cmdline_parser_free (struct gengetopt_args_info *args_info);

+int cmdline_parser_required (struct gengetopt_args_info *args_info,

+ const char *prog_name);

+#ifdef __cplusplus

+#endif /* __cplusplus */

+#endif /* CHECK_REGEXP_CMD_H */

diff --git a/src/lib/cmdline.c b/src/cmdline.c
index a471a23..fdc0f2c 100644
--- a/src/lib/cmdline.c
+++ b/src/cmdline.c

@@ -1,5 +1,5 @@

- File autogenerated by gengetopt version 2.19.1

+ File autogenerated by gengetopt version 2.19.2

generated with the following command:

/home/bettini/usr/local/bin/gengetopt --input cmdline.ggo --unamed-opts --no-handle-help --no-handle-version

diff --git a/src/lib/cmdline.ggo b/src/cmdline.ggo
index e855554..25bac44 100644
--- a/src/lib/cmdline.ggo
+++ b/src/cmdline.ggo

@@ -1,37 +1,9 @@

# This file is used by gengetopt to generate a command line args parser

# GNU gengetopt can be found at

# http://www.gnu.org/software/gengetopt

-# Specification file format:

-# This file consist in lines of sentences with the following format:

-# option <long> <short> <desc> <argtype> <required>

-# option <long> <short> <desc> flag <onoff>

-# option <long> <short> <desc> no

-# ... # ...

-# Where:

-# <packname> = Double quoted string with upper and lower case chars, digits,

-# '-' and '.'. No spaces allowed.

-# <version> = Double quoted string with upper and lower case chars, digits,

-# '-' and '.'. No spaces allowed.

-# <long> = Double quoted string with upper and lower case chars, digits,

-# '-' and '.'. No spaces allowed.

-# <short> = A single upper or lower case char, or a digit.

-# <desc> = String with upper and lower case chars, digits, '-', '.' and

-# spaces. First character must be no space.

-# <argtype> = string, int, short, long, float, double, longdouble or longlong.

-# <required> = yes or no.

-# <onoff> = on or off.

-# Comments begins with '#' in any place of the line and ends in the

-# end of line.

-# The third form of option is used if the option does not take an argument;

-# it must not be required.

purpose "Highlight the syntax of a source file (e.g. Java) into a specific format (e.g. HTML)"

# Options

diff --git a/src/lib/cmdline.h b/src/cmdline.h
index cc28f14..3a0b158 100644
--- a/src/lib/cmdline.h
+++ b/src/cmdline.h

@@ -1,6 +1,6 @@

/* cmdline.h */

-/* File autogenerated by gengetopt version 2.19.1 */

+/* File autogenerated by gengetopt version 2.19.2 */

#ifndef CMDLINE_H

#define CMDLINE_H

diff --git a/src/lib/cmdlineargs.h b/src/cmdlineargs.h
index 10ffe45..10ffe45 100644
--- a/src/lib/cmdlineargs.h
+++ b/src/cmdlineargs.h

diff --git a/src/cpp.lang b/src/cpp.lang
index 8a59c08..90a5e87 100644
--- a/src/cpp.lang
+++ b/src/cpp.lang

@@ -13,6 +13,9 @@ include "number.lang"

include "c_string.lang"

+(keyword,normal,type) =

+ `(\<(?:class|struct|typename))([[:blank:]]+)([[:alnum:]]+)`

keyword = "__asm|__cdecl|__declspec|__export|__far16",

"__fastcall|__fortran|__import",

"__pascal|__rtti|__stdcall|_asm|_cdecl",

diff --git a/src/default.css b/src/default.css
index e08e98b..52a320b 100644
--- a/src/default.css
+++ b/src/default.css

@@ -3,6 +3,7 @@ body { background-color: white; }

.keyword { color: blue; font-weight: bold; }

.type { color: darkgreen; }

.string { color: red; font-family: monospace; }

+.regexp { color: orange; }

.specialchar { color: pink; font-family: monospace; }

.comment { color: brown; font-style: italic; }

.number { color: purple; }

diff --git a/src/default.style b/src/default.style
index 5ade28d..14d5117 100644
--- a/src/default.style
+++ b/src/default.style

@@ -3,6 +3,7 @@ bgcolor "white"; // the background color for documents

keyword blue b ; // for language keywords

type darkgreen ; // for basic types

string red f ; // for strings and chars

+regexp orange f ; // for strings and chars

specialchar pink f ; // for special chars, e.g., \n, \t, \\

comment brown i, noref; // for comments

number purple ; // for literal numbers

diff --git a/src/docbook.outlang b/src/docbook.outlang
index 923eae0..0ed7a62 100644
--- a/src/docbook.outlang
+++ b/src/docbook.outlang

@@ -6,6 +6,11 @@ extension "xml"

bold "<emphasis role=\"strong\">$text</emphasis>"

italics "<emphasis>$text</emphasis>"

+anchor "<anchor id=\"line$linenum\"/>$text"

+postline_reference "<link linkend='line$linenum'>$text -> $linenum</link>"

+postdoc_reference "<link linkend='line$linenum'>$text -> $linenum</link>"

+reference "<link linkend='line$linenum'>$text</link>"

translations

"&" "&"

"<" "<"

diff --git a/src/docbookdoc.outlang b/src/docbookdoc.outlang
new file mode 100644
index 0000000..cb1f446
--- /dev/null
+++ b/src/docbookdoc.outlang

@@ -0,0 +1,14 @@

+include "docbook.outlang"

+doctemplate

+"<!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook//EN\">

+<article>

+<articleinfo>

+<title>prova</title>

+</articleinfo>

+<programlisting>"

+"</programlisting>

+</article>

+end

diff --git a/src/esc.style b/src/esc.style
index 8c00bda..8a0aebd 100644
--- a/src/esc.style
+++ b/src/esc.style

@@ -1,6 +1,7 @@

keyword blue b ;

type darkgreen ;

string red ;

+regexp orange ;

specialchar pink ;

comment cyan i ;

number purple ;

diff --git a/src/html.lang b/src/html.lang
index af706ca..2893308 100644
--- a/src/html.lang
+++ b/src/html.lang

@@ -4,9 +4,9 @@ end

comment delim "" multiline nested

-keyword = '<(/)?[[:alnum:]]+(/)?>'

+keyword = '<(/)?[[:alpha:]][[:alnum:]]*(/)?>'

-state keyword delim '<(/)?[[:alnum:]]+' '(/)?>' multiline begin

+state keyword delim '<(/)?[[:alpha:]][[:alnum:]]*' '(/)?>' multiline begin

type = '[^="[:blank:]>]+'

symbol = "="

string delim "\"" "\"" escape "\\" multiline

diff --git a/src/html_notfixed.outlang b/src/html_notfixed.outlang
index 4b5b49c..c6af7b0 100644
--- a/src/html_notfixed.outlang
+++ b/src/html_notfixed.outlang

@@ -2,6 +2,12 @@ include "html_common.outlang"

fixed "<tt>$text</tt>"

+doctemplate

+"

+$header"

+"$footer"

+end

translations

"\n" "<br>\n"

" " "  "

diff --git a/src/java.lang b/src/java.lang
index 742fac1..63371ca 100644
--- a/src/java.lang
+++ b/src/java.lang

@@ -6,6 +6,8 @@ include "number.lang"

include "c_string.lang"

+(keyword,normal,type) = `(\<(?:class|interface))([[:blank:]]+)([$[:alnum:]]+)`

diff --git a/src/lang.map b/src/lang.map
index d15938b..efae070 100644
--- a/src/lang.map
+++ b/src/lang.map

@@ -38,8 +38,8 @@ caml = caml.lang

mli = caml.lang

sml = sml.lang

sig = sml.lang

-syslog = syslog.lang

-log = syslog.lang

+syslog = log.lang

+log = log.lang

pas = pascal.lang

pascal = pascal.lang

fortran = fortran.lang

diff --git a/src/langdef.lang b/src/langdef.lang
index 2229f23..ade37f6 100644
--- a/src/langdef.lang
+++ b/src/langdef.lang

@@ -5,13 +5,14 @@ comment start "#"

preproc = "include"

string delim "\"" "\"" escape "\\" multiline

-string delim "'" "'" escape "\\" multiline

+regexp delim "'" "'" escape "\\" multiline

+regexp delim "`" "`" escape "\\" multiline

"multiline|nested|vardef|exitall|exit",

"redef|subst|nonsensitive"

-symbol = "=|+|,"

+symbol = "=|+|,|(|)"

vardef ID = '[[:word:]]+'

diff --git a/src/lib/.cvsignore b/src/lib/.cvsignore
index f661af7..57922c9 100644
--- a/src/lib/.cvsignore
+++ b/src/lib/.cvsignore

@@ -52,4 +52,5 @@ test_readtags

test_langinfer

mytags

-reportbugs.c \ No newline at end of file

+reportbugs.c

+test_regexpreprocessor

diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 2aa586e..cc904ae 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am

@@ -1,4 +1,4 @@

# This file is free software; as a special exception the author gives

# unlimited permission to copy and/or distribute it, with or without

@@ -8,19 +8,11 @@

# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the

# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

-SUFFIXES = .text .ggo

+SUFFIXES = .text

.text.c:

$(TXTC) -c $<

-if NO_GENGETOPT

-.ggo.c:

- touch $@

-else

-.ggo.c:

- $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version

-endif

if NO_DOUBLECPP

$(srcdir)/%.H: $(srcdir)/%.h

touch $@

@@ -36,19 +28,22 @@ LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la

noinst_LTLIBRARIES = libcommon.la

-libcommon_la_SOURCES = copyright.text reportbugs.text cmdline.c messages.cc \

+libcommon_la_SOURCES = copyright.text reportbugs.text messages.cc \

fileutil.cc chartranslator.cc generatorfactory.cc docgenerator.cc styleparser.yy \

stylescanner.ll langdefparser.yy langdefscanner.ll outlangdefparser.yy \

stylecssparser.yy stylecssscanner.ll outlangdefscanner.ll srcuntabifier.cpp \

- generatormap.cpp maingeneratormap.cpp outputbuffer.cpp outputgenerator.cpp \

- mainoutputbuffer.cpp lineoutputgenerator.cpp stringdef.cpp langelem.cpp statelangelem.cpp \

+ textformatter.cpp outputbuffer.cpp outputgenerator.cpp \

+ mainoutputbuffer.cpp statelangelem.cpp \

langelems.cpp regexpreprocessor.cpp regexpstatebuilder.H regexpstatebuilder_dbtab.cc \

regexpstatebuilder.cpp langdefloader.cpp langmap.cpp statestartlangelem.cpp \

- stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp textformatter.cpp \

+ stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp \

textstyle.cpp textgenerator.cpp textstylebuilder.cpp doctemplate.cpp substfun.cpp \

- refgeneratormap.cpp readtags.c fileinfo.cpp linebuffer.cpp preformatter.cpp \

+ readtags.c fileinfo.cpp preformatter.cpp \

regexpstateprinter.cpp langelemsprinter.cpp langelemsprinter.H langelemsprinter_dbtab.cc \

- languageinfer.cpp stopwatch.cpp utils.cpp utils.h

+ languageinfer.cpp stopwatch.cpp utils.cpp utils.h \

+ regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp \

+ namedsubexpslangelem.cpp

libcommon_la_LIBADD = $(BOOST_REGEX_LIB)

INCLUDES = -I@top_srcdir@/src

@@ -56,12 +51,9 @@ INCLUDES = -I@top_srcdir@/src

EXTRA_DIST = colors.h generatorfactory.h \

keys.h styleparser.h stylecssparser.h \

tokens.h messages.h fileutil.h \

- cmdline.h \

- chartranslator.h my_set.h my_sstream.h my_string.h \

+ chartranslator.h my_sstream.h \

linenumdigit.h globalostream.h \

docgenerator.h \

- cmdlineargs.h \

- cmdline.ggo \

srcuntabifier.h colormap.h \

langdefparser.h outlangdefparser.h \

@@ -98,10 +90,16 @@ styleparser.cc styleparser.h: $(srcdir)/styleparser.yy

stylecssparser.cc stylecssparser.h: $(srcdir)/stylecssparser.yy

$(YACC) -p stylecsssc_ -o $@ $(srcdir)/stylecssparser.yy --defines=$*.h

-TESTS = test_textstyle test_textgenerator test_outlangparser test_readtags \

-test_langinfer

+TESTS = test_textstyle test_textgenerator test_outlangparser \

+test_langinfer test_regexpreprocessor

+check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_langinfer test_regexpreprocessor

+if !NO_CTAGS

+TESTS += test_readtags

+check_PROGRAMS += test_readtags

+endif

-check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_readtags test_langinfer

test_langdefparser_SOURCES = test_langdefparser_main.cpp

test_langdefparser_LDADD = libcommon.la

@@ -126,6 +124,9 @@ test_readtags_LDADD = readtags.$(OBJEXT)

test_langinfer_SOURCES = test_langinfer.cpp

test_langinfer_LDADD = libcommon.la

+test_regexpreprocessor_SOURCES = test_regexpreprocessor_main.cpp

+test_regexpreprocessor_LDADD = libcommon.la

mytags: $(srcdir)/test_readtags_main.cpp

$(CTAGS) --excmd=n --fields=+n -o mytags $(srcdir)/test_readtags_main.cpp $(srcdir)/readtags.h

@@ -139,14 +140,16 @@ MAINTAINERCLEANFILES = styleparser.cc styleparser.h stylescanner.cc \

langdefparser.cc langdefparser.h langdefscanner.cc \

outlangdefparser.cc outlangdefparser.h outlangdefscanner.cc

-noinst_HEADERS = parsestyles.h generatormap.h maingeneratormap.h outputbuffer.h \

- outputgenerator.h mainoutputbuffer.h lineoutputgenerator.h stringdef.h langelem.h \

+noinst_HEADERS = parsestyles.h textformatter.h outputbuffer.h \

+ outputgenerator.h mainoutputbuffer.h stringdef.h langelem.h \

statelangelem.h langelems.h langdefparserfun.h outlangdefparserfun.h \

tostringcollection.h regexpreprocessor.h regexpstatebuilder.h langdefloader.h \

langdefscanner.h outlangdefscanner.h parsestruct.h langmap.h statestartlangelem.h \

- stringlistlangelem.h delimitedlangelem.h vardefinitions.h textformatter.h textstyle.h \

+ stringlistlangelem.h delimitedlangelem.h vardefinitions.h textstyle.h \

textstyles.h textgenerator.h textstylebuilder.h doctemplate.h substfun.h \

- parserinfo.h refgeneratormap.h readtags.h fileinfo.h linebuffer.h preformatter.h \

- regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h

+ parserinfo.h readtags.h fileinfo.h linebuffer.h preformatter.h \

+ regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h \

+ regexpengine.h regexpstate.h regexpenginedebug.h \

+ namedsubexpslangelem.h refposition.h

diff --git a/src/lib/delimitedlangelem.cpp b/src/lib/delimitedlangelem.cpp
index 8cafd20..a11757f 100644
--- a/src/lib/delimitedlangelem.cpp
+++ b/src/lib/delimitedlangelem.cpp

@@ -38,4 +38,11 @@ DelimitedLangElem::toString() const

return res;

}

+const std::string

+DelimitedLangElem::toStringOriginal() const

+ string res = StateStartLangElem::toString() + " " + start->toStringOriginal() + (end ? " " + end->toStringOriginal() : "");

+ return res;

diff --git a/src/lib/delimitedlangelem.h b/src/lib/delimitedlangelem.h
index c1df355..dceae26 100644
--- a/src/lib/delimitedlangelem.h
+++ b/src/lib/delimitedlangelem.h

@@ -42,6 +42,8 @@ public:

virtual const std::string toString() const;

+ virtual const std::string toStringOriginal() const;

void set_escape(StringDef *e) { escape = e; }

StringDef *getStart() const { return start; }

diff --git a/src/lib/fileutil.cc b/src/lib/fileutil.cc
index 52c7386..24dcf48 100644
--- a/src/lib/fileutil.cc
+++ b/src/lib/fileutil.cc

@@ -17,11 +17,14 @@

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

#include <iostream>

#include <fstream>

#include "fileutil.h"

-#include "my_string.h"

#include "messages.h" // for verbosity

using namespace std;

diff --git a/src/lib/generatorfactory.cc b/src/lib/generatorfactory.cc
index dcbb908..983f1be 100644
--- a/src/lib/generatorfactory.cc
+++ b/src/lib/generatorfactory.cc

@@ -25,117 +25,105 @@

#include "textgenerator.h"

#include "textstyles.h"

#include "textstylebuilder.h"

-#include "refgeneratormap.h"

-// global

-#include "maingeneratormap.h"

-GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles,

- PreFormatter *pf,

- bool gen_ref,

- const string &_ctags_file, RefPosition position,

- bool optimizations) :

- textStyles(tstyles), preformatter(pf),

- generate_references(gen_ref),

- ctags_file(_ctags_file), refposition(position),

- noOptimizations(optimizations)

- generatormap = createGeneratorMap();

- generatormap->setNoOptimizations(noOptimizations);

+#include "textformatter.h"

+GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf,

+ bool gen_ref, const string &_ctags_file, RefPosition position,

+ bool optimizations) :

+ textStyles(tstyles), preformatter(pf), generate_references(gen_ref),

+ ctags_file(_ctags_file), refposition(position),

+ noOptimizations(optimizations) {

+ textformatter = createTextFormatter();

+ textformatter->setNoOptimizations(noOptimizations);

}

-GeneratorFactory::~ GeneratorFactory()

- if (generatormap)

- delete generatormap;

+GeneratorFactory::~GeneratorFactory() {

+ if (textformatter)

+ delete textformatter;

}

-GeneratorMap *

-GeneratorFactory::createGeneratorMap()

- if (generate_references)

- return new RefGeneratorMap(preformatter, ctags_file,

- textStyles->refstyle, refposition);

- return new GeneratorMap(preformatter);

+TextFormatter *GeneratorFactory::createTextFormatter() {

+ if (generate_references)

+ return new TextFormatter(preformatter, ctags_file,

+ textStyles->refstyle, refposition);

+ else

+ return new TextFormatter(preformatter);

}

-string GeneratorFactory::preprocessColor(const string &color)

- if ( color[0] == '"' && color[color.size()-1] == '"')

- return color.substr(1, color.size()-2);

- else

- return textStyles->colorMap->getColor (color);

+string GeneratorFactory::preprocessColor(const string &color) {

+ if (color[0] == '"' && color[color.size()-1] == '"')

+ return color.substr(1, color.size()-2);

+ else

+ return textStyles->colorMap->getColor(color);

}

bool GeneratorFactory::createGenerator(const string &key, const string &color,

- const string &bgcolor, StyleConstantsPtr styleconstants)

- if (generatormap->hasGenerator(key))

- return false;

- if (! textStyles->onestyle.empty()) {

- generatormap->addGenerator (key, new TextGenerator(textStyles->onestyle.subst_style(key)));

- return true;

- }

- TextStyleBuilder textStyleBuilder(textStyles->starting_template, textStyles->style_separator);

- textStyleBuilder.start();

- if (styleconstants.get()) {

- for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) {

- switch( *it ){

- case ISBOLD:

- textStyleBuilder.add(textStyles->bold);

- break;

- case ISITALIC:

- textStyleBuilder.add(textStyles->italics);

- break;

- case ISUNDERLINE:

- textStyleBuilder.add(textStyles->underline);

- break;

- case ISFIXED:

- textStyleBuilder.add(textStyles->fixed);

- break;

- case ISNOTFIXED:

- textStyleBuilder.add(textStyles->notfixed);

- break;

- case ISNOREF:

- generatormap->addNoReference(key);

- break;

- }

+ const string &bgcolor, StyleConstantsPtr styleconstants) {

+ if (textformatter->hasGenerator(key))

+ return false;

+ if (! textStyles->onestyle.empty()) {

+ textformatter->addGenerator(key, new TextGenerator(textStyles->onestyle.subst_style(key)));

+ return true;

+ }

+ TextStyleBuilder textStyleBuilder(textStyles->starting_template,

+ textStyles->style_separator);

+ textStyleBuilder.start();

+ if (styleconstants.get()) {

+ for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) {

+ switch (*it) {

+ case ISBOLD:

+ textStyleBuilder.add(textStyles->bold);

+ break;

+ case ISITALIC:

+ textStyleBuilder.add(textStyles->italics);

+ break;

+ case ISUNDERLINE:

+ textStyleBuilder.add(textStyles->underline);

+ break;

+ case ISFIXED:

+ textStyleBuilder.add(textStyles->fixed);

+ break;

+ case ISNOTFIXED:

+ textStyleBuilder.add(textStyles->notfixed);

+ break;

+ case ISNOREF:

+ textformatter->addNoReference(key);

+ break;

+ }

}

- }

- if ( color.size () ) {

- textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color)));

- }

+ if (color.size()) {

+ textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color)));

+ }

- if ( bgcolor.size () ) {

- textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor)));

- }

+ if (bgcolor.size()) {

+ textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor)));

+ }

- TextStyle style = textStyleBuilder.end();

+ TextStyle style = textStyleBuilder.end();

- generatormap->addGenerator(key, new TextGenerator(style));

- return true;

+ textformatter->addGenerator(key, new TextGenerator(style));

+ return true;

}

-void GeneratorFactory::addDefaultGenerator()

- TextGenerator *defaultGenerator = generatormap->hasGenerator(NORMAL);

- if (!defaultGenerator) {

- if (textStyles->onestyle.empty())

- defaultGenerator = new TextGenerator();

- else

- defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL));

- generatormap->addGenerator (NORMAL, defaultGenerator);

- }

- generatormap->setDefaultGenerator(defaultGenerator);

+void GeneratorFactory::addDefaultGenerator() {

+ TextGenerator *defaultGenerator = textformatter->hasGenerator(NORMAL);

+ if (!defaultGenerator) {

+ if (textStyles->onestyle.empty())

+ defaultGenerator = new TextGenerator();

+ else

+ defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL));

+ textformatter->addGenerator(NORMAL, defaultGenerator);

+ }

+ textformatter->setDefaultGenerator(defaultGenerator);

}

diff --git a/src/lib/generatorfactory.h b/src/lib/generatorfactory.h
index b7da58a..2845e81 100644
--- a/src/lib/generatorfactory.h
+++ b/src/lib/generatorfactory.h

@@ -27,7 +27,7 @@

#include <boost/shared_ptr.hpp>

#include "textstyles.h"

-#include "refgeneratormap.h"

+#include "refposition.h"

typedef enum { ISBOLD=1, ISITALIC, ISUNDERLINE, ISFIXED, ISNOTFIXED, ISNOREF } StyleConstant;

typedef std::list<StyleConstant> StyleConstants;

@@ -38,6 +38,7 @@ using std::string;

class TextGenerator;

class PreFormatter;

+class TextFormatter;

class GeneratorFactory

{

@@ -52,8 +53,10 @@ class GeneratorFactory

RefPosition refposition;

/// whether to turn off optimizations for generating output (default false)

bool noOptimizations;

+ /// the main text formatter

+ TextFormatter *textformatter;

- GeneratorMap *createGeneratorMap();

+ TextFormatter *createTextFormatter();

public:

GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf,

@@ -90,6 +93,8 @@ class GeneratorFactory

* @return

string preprocessColor(const string &color);

+ TextFormatter *getTextFormatter() { return textformatter; }

};

#endif // GENERATORFACTORY_H

diff --git a/src/lib/generatormap.cpp b/src/lib/generatormap.cpp
deleted file mode 100644
index c7660e6..0000000
--- a/src/lib/generatormap.cpp
+++ /dev/null

@@ -1,154 +0,0 @@

-//

-// C++ Implementation: generatormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "generatormap.h"

-#include "textgenerator.h"

-#include "preformatter.h"

-#include "mainoutputbuffer.h"

-using namespace std;

-GeneratorMap::GeneratorMap(PreFormatter *pf) :

- default_generator (0),

- preformatter(pf), noOptimizations(false)

-GeneratorMap::~GeneratorMap()

- for (MapType::const_iterator it = generatormap.begin(); it != generatormap.end(); ++it)

- delete it->second;

-void

-GeneratorMap::setDefaultGenerator(TextGenerator *gen)

- default_generator = gen;

-void

-GeneratorMap::addGenerator(const std::string &elem, TextGenerator *gen)

- generatormap[elem] = gen;

-TextGenerator *GeneratorMap::hasGenerator(const string &elem)

- MapType::const_iterator it = generatormap.find(elem);

- if (it == generatormap.end())

- return 0;

- return it->second;

-TextGenerator *

-GeneratorMap::getGenerator(const string &elem)

- MapType::const_iterator it = generatormap.find(elem);

- if (it == generatormap.end())

- {

- // create a copy of the prototype and substitute the style.

- TextGenerator *missing = new TextGenerator(*default_generator);

- missing->subst_style(elem);

- generatormap[elem] = missing;

- return missing;

- }

- return it->second;

-void

-GeneratorMap::addNoReference(const std::string &elem)

- noreferences.insert(elem);

-bool

-GeneratorMap::isNoReference(const std::string &elem) const

- return (noreferences.find(elem) != noreferences.end());

-const string

-GeneratorMap::generateString( const std::string &elem, const std::string &s ,

- const FileInfo *p )

- return getGenerator(elem)->generateEntire(preformatter->preformat(s));

-void

-GeneratorMap::generateEntire( const std::string &elem, const std::string &s,

- const FileInfo *p )

- if (noOptimizations) {

- // we generate the element right now, since during debugging

- // we want to be very responsive

- if (s.size())

- output(generateString(elem, s, p));

- return;

- }

- // otherwise we optmize output generation: delay formatting a specific

- // element until we deal with another element; this way strings that belong

- // to the same element are formatted using only one tag: e.g.,

- // <comment>/* mycomment */</comment>

- // instead of

- // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment>

- if (elem == current_elem) {

- elem_buffer << s;

- } else {

- // first format the buffered string

- const string toformat = elem_buffer.str();

- if (toformat.size())

- output(generateString(current_elem, toformat, p));

- // then start a new buffer

- elem_buffer.str("");

- elem_buffer << s;

- current_elem = elem;

- current_file_info = p;

- }

-void

-GeneratorMap::generateNL( const std::string &text )

- // first format the buffered string

- flush();

- string preformat_text = preformatter->preformat(text);

- if (preformat_text == text)

- preformat_text = "\n";

- outputbuffer->output_ln(preformat_text);

-void

-GeneratorMap::flush()

- const string &remainder = elem_buffer.str();

- if (remainder.size()) {

- output(generateString(current_elem, remainder, current_file_info));

- elem_buffer.str("");

- current_elem = "";

- // each line is handled separately

- }

-void

-GeneratorMap::output(const string &s)

- outputbuffer->output(s);

diff --git a/src/lib/generatormap.h b/src/lib/generatormap.h
deleted file mode 100644
index b6fa977..0000000
--- a/src/lib/generatormap.h
+++ /dev/null

@@ -1,86 +0,0 @@

-//

-// C++ Interface: generatormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#ifndef GENERATORMAP_H

-#define GENERATORMAP_H

-#include <map>

-#include <string>

-#include <set>

-#include "my_sstream.h"

-class TextGenerator;

-class PreFormatter;

-class FileInfo;

-/**

-map of generators; associate a generator for each program element, e.g., keyword, string, etc.

-@author Lorenzo Bettini

-*/

-class GeneratorMap

- protected:

- typedef std::map<std::string, TextGenerator *> MapType;

- typedef std::set<std::string> NoRefType;

- MapType generatormap;

- /// those elements for which no reference info is generated

- NoRefType noreferences;

- TextGenerator *default_generator;

- PreFormatter *preformatter;

- /// where we buffer strings for the current elem

- std::ostringstream elem_buffer;

- /// the element that is currently buffered

- std::string current_elem;

- /// concerns the element currently buffered

- const FileInfo *current_file_info;

- /// whether to turn off optimizazionts (such as buffering), default: false

- bool noOptimizations;

- virtual const std::string generateString(const std::string &elem,

- const std::string &s, const FileInfo *);

- void output(const std::string &s);

- public:

- GeneratorMap(PreFormatter *);

- virtual ~GeneratorMap();

- /**

- * Returns the generator for the specific element name or null if

- * there's no generator for the element

- * @param elem

- * @return

- */

- TextGenerator *hasGenerator(const std::string &elem);

- /**

- * Retrieves the generator for a specific element; if it doesn't find it,

- * it creates a generator for that element, using the default generator

- * (i.e., the one for "normal" element)

- * @param elem

- * @return

- */

- TextGenerator *getGenerator(const std::string &elem);

- void addGenerator(const std::string &elem, TextGenerator *gen);

- void addNoReference(const std::string &elem);

- bool isNoReference(const std::string &elem) const;

- void setDefaultGenerator(TextGenerator *g);

- void generateEntire( const std::string &elem,

- const std::string &s, const FileInfo * );

- void generateNL( const std::string &s );

- void flush();

- void setNoOptimizations(bool n) { noOptimizations = n; }

-};

-#endif

diff --git a/src/lib/keys.h b/src/lib/keys.h
index 9fff74b..37332e7 100644
--- a/src/lib/keys.h
+++ b/src/lib/keys.h

@@ -2,16 +2,5 @@

#define KEYS_H

#define NORMAL "normal"

-#define KEYWORD "keyword"

-#define TYPE "type"

-#define STRING "string"

-#define COMMENT "comment"

-#define NUMBER "number"

-#define PREPROC "preproc"

-#define SYMBOL "symbol"

-#define FUNCTION "function"

-#define CBRACKET "cbracket"

-#define LINENO "lineno"

-#define GLOBAL "global"

#endif

diff --git a/src/lib/langdefparser.yy b/src/lib/langdefparser.yy
index 9ed5d8f..b524c5c 100644
--- a/src/lib/langdefparser.yy
+++ b/src/lib/langdefparser.yy

@@ -1,6 +1,6 @@

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

@@ -24,8 +24,6 @@

#include <iostream>

#include <string>

-#include "my_sstream.h"

#include "messages.h"

#include "stringdef.h"

#include "statelangelem.h"

@@ -36,6 +34,7 @@

#include "langdefparserfun.h"

#include "langdefscanner.h"

#include "vardefinitions.h"

+#include "namedsubexpslangelem.h"

using std::cerr;

using std::string;

@@ -64,6 +63,11 @@ struct Key : public ParserInfo

~Key() { delete key; }

};

+// this is a trick since ElementNames is a typedef and cannot

+// be used in the union below

+struct ElementNamesList : ElementNames {

+};

%union {

@@ -76,12 +80,15 @@ struct Key : public ParserInfo

class StateLangElem *statelangelem;

class StateStartLangElem *statestartlangelem;

class LangElems *langelems;

+ class NamedSubExpsLangElem *namedsubexpslangelem;

struct Key *key;

+ class ElementNamesList *keys;

int flag ;

};

%token <tok> BEGIN_T END_T ENVIRONMENT_T STATE_T MULTILINE_T DELIM_T START_T ESCAPE_T NESTED_T EXIT_ALL EXIT_T VARDEF_T REDEF_T SUBST_T NONSENSITIVE_T

-%token <string> KEY STRINGDEF VARUSE

+%token <string> KEY STRINGDEF REGEXPNOPREPROC VARUSE

+%token <stringdef> REGEXPDEF

%type <stringdef> stringdef escapedef

%type <stringdefs> stringdefs

@@ -91,16 +98,18 @@ struct Key : public ParserInfo

%type <booloption> multiline startnewenv nested nonsensitive

%type <tok> exitall redefsubst

%type <key> key;

+%type <keys> keys;

-allelements :

- {

- /* no definitions */

- /* synthetize a normal elem that catches everything */

- current_lang_elems = new LangElems;

+allelements :

+ {

+ /* no definitions (i.e., empty a .lang file with no definition) */

+ /* such as, default.lang */

+ /* synthetize a normal elem that catches everything */

+ current_lang_elems = new LangElems;

StringDefs *defs = new StringDefs;

- defs->push_back (new StringDef("(.*)"));

+ defs->push_back (new StringDef("(?:.*)"));

current_lang_elems->add(new StringListLangElem("normal", defs, false));

}

| elemdefs { current_lang_elems = 1�h��; }

@@ -155,6 +164,11 @@ complexelem : key DELIM_T stringdef stringdef escapedef multiline nested

$$->setParserInfo(1�h��);

delete 1�h��;

}

+ | '(' keys ')' '=' REGEXPNOPREPROC {

+ $$ = new NamedSubExpsLangElem(2,�h�� new StringDef(*5�h��));

+ $$->setParserInfo(parsestruct->file_name, @1.first_line);

+ delete 5�h��;

+ }

;

key: KEY {

@@ -164,6 +178,20 @@ key: KEY {

}

;

+keys: keys ',' KEY

+ {

+ $$ = 1�h��;

+ $$->push_back(*3�h��);

+ delete 3�h��;

+ }

+ | KEY

+ {

+ $$ = new ElementNamesList;

+ $$->push_back(*1�h��);

+ delete 1�h��;

+ }

escapedef : ESCAPE_T stringdef { $$ = 2�h��; }

| { $$ = 0; }

;

@@ -200,7 +228,14 @@ stringdefs : stringdefs ',' stringdef { $$ = 1�h��; $$->push_back(3�h��); }

$$->push_back(1�h��); }

;

-stringdef : STRINGDEF {

+stringdef : REGEXPDEF {

+ $$ = 1�h��;

+ }

+ | STRINGDEF {

+ $$ = new StringDef(*1�h��);

+ delete 1�h��;

+ }

+ | REGEXPNOPREPROC {

$$ = new StringDef(*1�h��);

delete 1�h��;

}

@@ -220,13 +255,12 @@ stringdef : STRINGDEF {

+extern int langdef_lex_destroy (void);

void

yyerror( const char *s )

{

- ostringstream str ;

- str << parsestruct->file_name << ":" << parsestruct->line << ": " << s; // << ", in option declaration";

- printError( str.str(), cerr ) ;

- exit(EXIT_FAILURE);

+ exitError(s, parsestruct);

}

void

@@ -245,6 +279,10 @@ parse_lang_def()

delete vardefinitions;

parsestruct = 0;

vardefinitions = 0;

+ // release scanner memory

+ langdef_lex_destroy ();

return current_lang_elems;

}

@@ -259,6 +297,10 @@ parse_lang_def(const char *path, const char *name)

delete vardefinitions;

parsestruct = 0;

vardefinitions = 0;

+ // release scanner memory

+ langdef_lex_destroy ();

return current_lang_elems;

}

diff --git a/src/lib/langdefscanner.ll b/src/lib/langdefscanner.ll
index 6104bab..e9ff1de 100644
--- a/src/lib/langdefscanner.ll
+++ b/src/lib/langdefscanner.ll

@@ -1,6 +1,6 @@

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

@@ -22,6 +22,9 @@

#include "langdefparser.h"

#include "langdefscanner.h"

#include "fileutil.h"

+#include "regexpreprocessor.h"

+#include "stringdef.h"

+#include "messages.h"

#include <stack>

@@ -40,6 +43,7 @@ static std::ostringstream buff;

static void buffer(const char *s);

static void buffer_escape(const char *c);

static const std::string *flush_buffer();

+static StringDef *flush_buffer_preproc();

static void open_include_file(const char *file);

static void close_include_file();

@@ -69,11 +73,11 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])*

STRING \"[^\n"]+\"

-%s COMMENT_STATE STRING_STATE REGEXP_STATE INCLUDE_STATE

+%s COMMENT_STATE STRING_STATE REGEXP_STATE REGEXP_NOPREPROC_STATE INCLUDE_STATE

-[ \t] {}

+<INITIAL,COMMENT_STATE,INCLUDE_STATE>[ \t] {}

\r {}

@@ -130,21 +134,38 @@ STRING \"[^\n"]+\"

<INITIAL>"=" { return '=' ; }

<INITIAL>"," { return ',' ; }

<INITIAL>"+" { return '+' ; }

+<INITIAL>"(" { updateTokenInfo(); return '(' ; }

+<INITIAL>")" { return ')' ; }

<INITIAL>\" { BEGIN(STRING_STATE) ; }

<STRING_STATE>("*"|"."|"?"|"+"|"("|")"|"{"|"}"|"["|"]"|"^"|"$") { buffer_escape( yytext ) ; }

<STRING_STATE>\\\| { buffer( yytext ) ; }

<STRING_STATE>\\\\ { buffer( yytext ) ; }

+<STRING_STATE>\\[[:digit:]] {

+ printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ;

+ exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ;

+ }

<STRING_STATE>"\\\"" { buffer( yytext ) ; }

<STRING_STATE>\" { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; }

<STRING_STATE>[^\n] { buffer( yytext ) ; }

<INITIAL>\' { BEGIN(REGEXP_STATE) ; }

<REGEXP_STATE>\\\\ { buffer( yytext ) ; }

+<REGEXP_STATE>\\[[:digit:]] {

+ printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ;

+ exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ;

+ }

<REGEXP_STATE>"\\'" { buffer( "'" ) ; }

-<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; }

+<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.stringdef = flush_buffer_preproc() ; DEB2("REGEXPDEF",langdef_lval.string); return REGEXPDEF; }

<REGEXP_STATE>[^\n] { buffer( yytext ) ; }

+<INITIAL>` { BEGIN(REGEXP_NOPREPROC_STATE) ; }

+<REGEXP_NOPREPROC_STATE>\\\\ { buffer( yytext ) ; }

+<REGEXP_NOPREPROC_STATE>"\\`" { buffer( "'" ) ; }

+<REGEXP_NOPREPROC_STATE>` { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("REGEXPNOPREPROCDEF",langdef_lval.string); return REGEXPNOPREPROC; }

+<REGEXP_NOPREPROC_STATE>[^\n] { buffer( yytext ) ; }

<INITIAL>{nl} { DEB("NEWLINE"); ++(parsestruct->line) ; }

<INITIAL>. { return yytext[0] ; }

@@ -168,6 +189,13 @@ const std::string *flush_buffer()

return ret;

}

+StringDef *flush_buffer_preproc()

+ StringDef *sd = new StringDef(RegexPreProcessor::preprocess(buff.str()), buff.str());

+ buff.str("");

+ return sd;

void _open_file_to_scan(const string &path, const string &name)

{

langdef_in = open_data_file_stream(path, name);

diff --git a/src/lib/langelem.cpp b/src/lib/langelem.cpp
deleted file mode 100644
index f3b53af..0000000
--- a/src/lib/langelem.cpp
+++ /dev/null

@@ -1,30 +0,0 @@

-//

-// C++ Implementation: %{MODULE}

-//

-// Description:

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "langelem.h"

-using namespace std;

-LangElem::LangElem(const string &n) : name(n), redef(false), subst(false)

-LangElem::~LangElem()

-const std::string

-LangElem::toString() const

- return name;

diff --git a/src/lib/langelem.h b/src/lib/langelem.h
index 6bf51c8..fcf0f5a 100644
--- a/src/lib/langelem.h
+++ b/src/lib/langelem.h

@@ -33,12 +33,19 @@ class LangElem : public ParserInfo

bool subst; // whether this substitutes an existing language element

public:

- LangElem(const std::string &n);

+ LangElem(const std::string &n) : name(n), redef(false), subst(false) {}

- virtual ~LangElem();

+ virtual ~LangElem() {}

const std::string getName() const { return name; }

- virtual const std::string toString() const;

+ virtual const std::string toString() const { return name; }

+ /**

+ * return the original string representation of this element;

+ * this must be defined by subclasses

+ */

+ virtual const std::string toStringOriginal() const = 0;

bool isRedef() const { return redef; }

void setRedef() { redef = true; }

bool isSubst() const { return subst; }

diff --git a/src/lib/langelems.cpp b/src/lib/langelems.cpp
index efcd49c..3788d8c 100644
--- a/src/lib/langelems.cpp
+++ b/src/lib/langelems.cpp

@@ -33,6 +33,12 @@ LangElems::toString() const

return toStringCollection<LangElems>(this, '\n');

}

+const string

+LangElems::toStringOriginal() const

+ return toStringOriginalCollection<LangElems>(this, '\n');

void LangElems::add(LangElem *el)

{

push_back(el);

diff --git a/src/lib/langelems.h b/src/lib/langelems.h
index d8eb321..ff8548f 100644
--- a/src/lib/langelems.h
+++ b/src/lib/langelems.h

@@ -59,6 +59,7 @@ class LangElems : protected list<LangElem *>

void subst(LangElem *el);

const std::string toString() const;

+ const std::string toStringOriginal() const;

// doublecpp: dispatch methods, DO NOT MODIFY

public:

virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state);

diff --git a/src/lib/languageinfer.cpp b/src/lib/languageinfer.cpp
index 4e5962c..4eca066 100644
--- a/src/lib/languageinfer.cpp
+++ b/src/lib/languageinfer.cpp

@@ -43,6 +43,10 @@ const string LanguageInfer::infer( istream &stream )

boost::regex langRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*([[:alnum:]]+)");

// the regular expression for finding the language specification in a script file

+ // this such as #! /usr/bin/env perl

+ boost::regex langEnvRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*(?:env)[[:blank:]]+([[:alnum:]]+)");

+ // the regular expression for finding the language specification in a script file

// according to Emacs convention: # -*- language -*-

boost::regex langRegExEmacs("-\\*-[[:blank:]]*([[:alnum:]]+).*-\\*-");

@@ -61,6 +65,7 @@ const string LanguageInfer::infer( istream &stream )

read_line(&stream, secondLine);

boost::match_results<std::string::const_iterator> what;

+ boost::match_results<std::string::const_iterator> whatEnv;

boost::match_results<std::string::const_iterator> whatEamcs;

// first try the emacs specification

@@ -77,12 +82,19 @@ const string LanguageInfer::infer( istream &stream )

return whatEamcs[1];

}

- // try the sha-bang specification

+ // try also the env specification

+ boost::regex_search(firstLine,

+ whatEnv, langEnvRegEx, boost::match_default);

+ if (whatEnv[1].matched)

+ return whatEnv[1];

+ // finally try the sha-bang specification

boost::regex_search(firstLine,

what, langRegEx, boost::match_default);

if (what[1].matched)

return what[1];

return "";

}

diff --git a/src/lib/linebuffer.cpp b/src/lib/linebuffer.cpp
deleted file mode 100644
index 66d3b4b..0000000
--- a/src/lib/linebuffer.cpp
+++ /dev/null

@@ -1,26 +0,0 @@

-//

-// C++ Implementation: linebuffer

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "linebuffer.h"

-LineBuffer::LineBuffer()

-LineBuffer::~LineBuffer()

-bool LineBuffer::empty() const

- return (buffer.str().size() == 0 && post.size() == 0);

diff --git a/src/lib/linebuffer.h b/src/lib/linebuffer.h
index f55115b..e4d64d0 100644
--- a/src/lib/linebuffer.h
+++ b/src/lib/linebuffer.h

@@ -33,8 +33,8 @@ class LineBuffer

PostContents post; // to be generated after the line

public:

- LineBuffer();

- ~LineBuffer();

+ LineBuffer() {}

+ ~LineBuffer() {}

void output(const std::string &s) { buffer << s; }

void output_post(const std::string &s) { post.insert(s); }

@@ -42,7 +42,7 @@ class LineBuffer

const std::string getContents() const { return buffer.str(); }

const PostContents &getPostContents() const { return post; }

- bool empty() const;

+ bool empty() const { return (buffer.str().size() == 0 && post.size() == 0); }

};

typedef boost::shared_ptr<LineBuffer> LineBufferPtr;

diff --git a/src/lib/lineoutputgenerator.cpp b/src/lib/lineoutputgenerator.cpp
deleted file mode 100644
index bcdb3e0..0000000
--- a/src/lib/lineoutputgenerator.cpp
+++ /dev/null

@@ -1,74 +0,0 @@

-//

-// C++ Implementation: lineoutputgenerator

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "lineoutputgenerator.h"

-#include <iomanip>

-#include "linenumdigit.h"

-#include "my_sstream.h"

-#include "maingeneratormap.h"

-#include "textgenerator.h"

-#include "textstyle.h"

-using namespace std;

-static SubstitutionMapping substitutionmapping;

-LineOutputGenerator::LineOutputGenerator(ostream& os,

- TextStyle *anch, bool genref, const string &prefix,

- const std::string &line_pref):

- OutputGenerator(os, line_pref), generate_ref(genref && ! anch->empty()),

- anchor(anch), anchor_line_prefix(prefix), line_num(1)

- line_num_generator = generatormap->getGenerator("linenum");

-LineOutputGenerator::~LineOutputGenerator()

-void LineOutputGenerator::generate_line(const string &line)

- generate_line_info();

- ++line_num;

- OutputGenerator::generate_line(line);

-void LineOutputGenerator::reset()

- OutputGenerator::reset();

- line_num = 1;

-void

-LineOutputGenerator::generate_line_info()

- ostringstream sout;

- sout << std::setw (line_num_digit) << std::setfill ('0')

- << line_num << ":";

- string line_str = line_num_generator->generateEntire(sout.str().c_str());

- if (generate_ref) {

- ostringstream line_n;

- line_n << anchor_line_prefix << line_num;

- sout.str("");

- substitutionmapping["$text"] = line_str;

- substitutionmapping["$linenum"] = line_n.str();

- sout << anchor->output(substitutionmapping);

- line_str = sout.str();

- }

- output_string(line_str + " ");

diff --git a/src/lib/lineoutputgenerator.h b/src/lib/lineoutputgenerator.h
deleted file mode 100644
index 201abc1..0000000
--- a/src/lib/lineoutputgenerator.h
+++ /dev/null

@@ -1,48 +0,0 @@

-//

-// C++ Interface: lineoutputgenerator

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#ifndef LINEOUTPUTGENERATOR_H

-#define LINEOUTPUTGENERATOR_H

-#include <outputgenerator.h>

-class TextGenerator;

-class TextStyle;

-/**

-a specialized output generator that also generates the line number before each line

-@author Lorenzo Bettini

-*/

-class LineOutputGenerator : public OutputGenerator

-private:

- bool generate_ref;

- TextStyle *anchor;

- std::string anchor_line_prefix;

- unsigned int line_num;

- TextGenerator *line_num_generator;

-public:

- LineOutputGenerator(std::ostream& os,

- TextStyle *anch, bool genref, const std::string &prefix,

- const std::string &line_pref);

- ~LineOutputGenerator();

-protected:

- virtual void generate_line(const std::string &line);

- virtual void reset();

- virtual void generate_line_info();

-};

-#endif

diff --git a/src/lib/maingeneratormap.cpp b/src/lib/maingeneratormap.cpp
deleted file mode 100644
index 8ebb379..0000000
--- a/src/lib/maingeneratormap.cpp
+++ /dev/null

@@ -1,14 +0,0 @@

-//

-// C++ Implementation: maingeneratormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "maingeneratormap.h"

-GeneratorMap *generatormap;

diff --git a/src/lib/maingeneratormap.h b/src/lib/maingeneratormap.h
deleted file mode 100644
index f356940..0000000
--- a/src/lib/maingeneratormap.h
+++ /dev/null

@@ -1,19 +0,0 @@

-//

-// C++ Interface: maingeneratormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#ifndef MAINGENERATORMAP_H

-#define MAINGENERATORMAP_H

-#include "generatormap.h"

-extern GeneratorMap *generatormap;

-#endif

diff --git a/src/lib/messages.cc b/src/lib/messages.cc
index b5439af..32c1df6 100644
--- a/src/lib/messages.cc
+++ b/src/lib/messages.cc

@@ -1,5 +1,5 @@

** This program is free software; you can redistribute it and/or modify

** it under the terms of the GNU General Public License as published by

@@ -27,6 +27,8 @@

#include <stdlib.h>

#include "messages.h"

+#include "parserinfo.h"

+#include "parsestruct.h"

static Messages *_messager = 0 ;

@@ -65,9 +67,16 @@ void printError( const std::string &s, ostream &stream ) {

stream << s << endl;

}

+void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream ) {

+ printError(pinfo->filename, pinfo->line, s, stream);

void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream)

{

- stream << filename << ":" << line << ": " << error << endl;

+ stream << filename << ":";

+ if (line)

+ stream << line << ": ";

+ stream << error << endl;

}

void setMessager( Messages *m ) {

@@ -86,9 +95,29 @@ void exitError(const std::string &error)

exit(EXIT_FAILURE);

}

+void exitError(const std::string &error, const ParserInfo *pinfo)

+ exitError(pinfo->filename, pinfo->line, error);

+void exitError(const std::string &error, const ParseStruct *pinfo)

+ exitError((pinfo->path.size() ? pinfo->path + "/" : "") + pinfo->file_name, pinfo->line, error);

+void exitError(const std::string &filename, unsigned int line, const std::string &error)

+ cerr << PACKAGE << ": ";

+ printError(filename, line, error);

+ exit(EXIT_FAILURE);

void foundBug(const std::string &error, const std::string &file, int line)

{

cerr << PACKAGE << ": " << error << ", " << file << ":" << line << endl;

+ cerr << PACKAGE << ": " << "it looks like you found a bug of this program" << endl;

+ cerr << PACKAGE << ": " << "could you please send this output and the input file" << endl;

+ cerr << PACKAGE << ": " << "to the author of this program?" << endl;

exit(EXIT_FAILURE);

}

diff --git a/src/lib/messages.h b/src/lib/messages.h
index 833aa72..4fb5f8b 100644
--- a/src/lib/messages.h
+++ b/src/lib/messages.h

@@ -11,6 +11,9 @@ using std::ostream;

using std::cerr;

using std::endl;

+class ParserInfo;

+class ParseStruct;

class Messages {

public:

/// whether to print anything

@@ -66,9 +69,13 @@ void printMessage_noln( const std::string &s, ostream &stream = cerr ) ;

void printWarning( const char *s, ostream &stream = cerr ) ;

void printError( const char *s, ostream &stream = cerr ) ;

void printError( const std::string &s, ostream &stream = cerr ) ;

+void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream = cerr ) ;

void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream = cerr );

void memory_exhausted();

void exitError(const std::string &error);

+void exitError(const std::string &error, const ParserInfo *pinfo);

+void exitError(const std::string &error, const ParseStruct *pinfo);

+void exitError(const std::string &filename, unsigned int line, const std::string &error);

void foundBug(const std::string &error, const std::string &file, int line);

bool shouldPrint();

diff --git a/src/lib/my_set.h b/src/lib/my_set.h
deleted file mode 100644
index c7cce53..0000000
--- a/src/lib/my_set.h
+++ /dev/null

@@ -1,16 +0,0 @@

-// deal with namespace problems

-#ifndef _MY_SET_H

-#define _MY_SET_H

-#include <set>

-#ifdef HAVE_CONFIG_H

-#include "config.h"

-#endif

-#ifdef HAVE_NAMESPACES

-using std::set;

-#endif

-#endif // _MY_SET_H

diff --git a/src/lib/my_string.h b/src/lib/my_string.h
deleted file mode 100644
index e22d2c7..0000000
--- a/src/lib/my_string.h
+++ /dev/null

@@ -1,11 +0,0 @@

-// deal with namespace problems

-#ifdef HAVE_CONFIG_H

-#include "config.h"

-#endif // HAVE_CONFIG_H

-#include <string>

-#ifdef HAVE_NAMESPACES

-using std::string;

-#endif

diff --git a/src/lib/namedsubexpslangelem.cpp b/src/lib/namedsubexpslangelem.cpp
new file mode 100644
index 0000000..c3a4201
--- /dev/null
+++ b/src/lib/namedsubexpslangelem.cpp

@@ -0,0 +1,47 @@

+//

+// C++ Interface: NamedSubExpsLangElem

+//

+// Description: represents a regular expression made by many marked groups

+// and each marked group represents a different language element

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#include "namedsubexpslangelem.h"

+#include "stringdef.h"

+#include "tostringcollection.h"

+using namespace std;

+NamedSubExpsLangElem::NamedSubExpsLangElem(const ElementNames *names, StringDef *def,

+ bool ex, bool al) :

+ StateStartLangElem("named subexps", ex, al), // "named subexps" is a bogus name

+ elementNames(names), regexpDef(def)

+NamedSubExpsLangElem::~NamedSubExpsLangElem() {

+ if (elementNames)

+ delete elementNames;

+ if (regexpDef)

+ delete regexpDef;

+const std::string

+NamedSubExpsLangElem::toString() const

+ string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') +

+ regexpDef->toString();

+ return res;

+const std::string

+NamedSubExpsLangElem::toStringOriginal() const

+ string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') +

+ regexpDef->toStringOriginal();

+ return res;

diff --git a/src/lib/namedsubexpslangelem.h b/src/lib/namedsubexpslangelem.h
new file mode 100644
index 0000000..43126a4
--- /dev/null
+++ b/src/lib/namedsubexpslangelem.h

@@ -0,0 +1,50 @@

+//

+// C++ Interface: NamedSubExpsLangElem

+//

+// Description: represents a regular expression made by many marked groups

+// and each marked group represents a different language element

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#ifndef NAMEDSUBEXPSLANGELEM_H_

+#define NAMEDSUBEXPSLANGELEM_H_

+#include "statestartlangelem.h"

+#include <list>

+class StringDef;

+typedef std::list<std::string> ElementNames;

+// doublecpp: forward declarations, DO NOT MODIFY

+class RegExpStateBuilder; // file: regexpstatebuilder.h

+class RegExpStatePointer; // file: regexpstatebuilder.h

+// doublecpp: end, DO NOT MODIFY

+class NamedSubExpsLangElem : public StateStartLangElem

+ const ElementNames *elementNames;

+ StringDef *regexpDef;

+public:

+ NamedSubExpsLangElem(const ElementNames *names, StringDef *def, bool exit = false, bool all = false);

+ virtual ~NamedSubExpsLangElem();

+ virtual const std::string toString() const;

+ virtual const std::string toStringOriginal() const;

+ const ElementNames *getElementNames() const { return elementNames; }

+ const StringDef *getRegexpDef() const { return regexpDef; }

+// doublecpp: dispatch methods, DO NOT MODIFY

+public:

+virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state);

+// doublecpp: end, DO NOT MODIFY

+};

+#endif /*NAMEDSUBEXPSLANGELEM_H_*/

diff --git a/src/lib/outlangdefparser.yy b/src/lib/outlangdefparser.yy
index 4c8a28c..8caf551 100644
--- a/src/lib/outlangdefparser.yy
+++ b/src/lib/outlangdefparser.yy

@@ -1,6 +1,6 @@

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

@@ -24,8 +24,6 @@

#include <iostream>

#include <string>

-#include "my_sstream.h"

#include "messages.h"

#include "parsestruct.h"

#include "outlangdefscanner.h"

@@ -235,22 +233,13 @@ translation : REGEXDEF REGEXDEF

-void

-yyerror( const char *s )

- ostringstream str ;

- str << outlang_parsestruct->file_name << ":" << outlang_parsestruct->line << ": " << s; // << ", in option declaration";

- printError( str.str(), cerr ) ;

- exit(EXIT_FAILURE);

+extern int outlangdef_lex_destroy (void);

-/*

void

-yyerror( const string &s )

+yyerror( const char *s )

{

- yyerror(s.c_str());

+ exitError(s, outlang_parsestruct);

}

-*/

TextStylesPtr

parse_outlang_def()

@@ -262,6 +251,10 @@ parse_outlang_def()

outlangdef_parse();

delete outlang_parsestruct;

outlang_parsestruct = 0;

+ // release scanner memory

+ outlangdef_lex_destroy ();

return textstyles;

}

@@ -276,6 +269,10 @@ parse_outlang_def(const char *path, const char *name)

outlangdef_parse();

delete outlang_parsestruct;

outlang_parsestruct = 0;

+ // release scanner memory

+ outlangdef_lex_destroy ();

return textstyles;

}

@@ -292,6 +289,10 @@ parse_outlang_def_file(const char *path, const char *name)

outlangdef_parse();

delete outlang_parsestruct;

outlang_parsestruct = 0;

+ // release scanner memory

+ outlangdef_lex_destroy ();

return textstyles;

}

diff --git a/src/lib/outputgenerator.cpp b/src/lib/outputgenerator.cpp
index c43fccf..abeb06c 100644
--- a/src/lib/outputgenerator.cpp
+++ b/src/lib/outputgenerator.cpp

@@ -4,59 +4,95 @@

// Description:

// Copyright: See COPYING file that comes with this distribution

#include "outputgenerator.h"

+#include <iomanip>

+#include "linenumdigit.h"

+#include "my_sstream.h"

+#include "textgenerator.h"

+#include "textstyle.h"

using namespace std;

+/// used for line information generation

+static SubstitutionMapping substitutionmapping;

OutputGenerator::OutputGenerator(ostream &os, const std::string &pref) :

- output(os), line_prefix(pref), alwaysFlush(false)

+ output(os), line_prefix(pref), alwaysFlush(false), generateLineInfo(false),

+ line_num(1) {

+OutputGenerator::OutputGenerator(ostream& os, TextGenerator *linegen,

+ TextStyle *anch, bool genref, const string &prefix,

+ const std::string &line_pref) :

+ output(os), line_prefix(line_pref), alwaysFlush(false), generateLineInfo(true),

+ generate_ref(genref && ! anch->empty()), anchor(anch), anchor_line_prefix(prefix), line_num(1),

+ line_num_generator(linegen) {

}

-OutputGenerator::~OutputGenerator()

+OutputGenerator::~OutputGenerator() {

}

-void

-OutputGenerator::output_string(const string &s)

+void OutputGenerator::output_string(const string &s) {

output << s;

if (alwaysFlush)

- flush();

+ flush();

}

-void

-OutputGenerator::outputLine(const string &line)

- if (line_prefix.size())

- output_string(line_prefix);

+void OutputGenerator::outputLine(const string &line) {

+ if (line_prefix.size())

+ output_string(line_prefix);

- output_string(line);

+ output_string(line);

}

-void

-OutputGenerator::generateLine(const string &line)

- if (line_prefix.size())

- output_string(line_prefix);

+void OutputGenerator::generateLine(const string &line) {

+ if (line_prefix.size())

+ output_string(line_prefix);

+ generate_line(line);

+void OutputGenerator::generate_line(const string &line) {

+ if (generateLineInfo)

+ generate_line_info();

+ ++line_num;

- generate_line(line);

+ output_string(line);

}

-void

-OutputGenerator::generate_line(const string &line)

- output_string(line);

+void OutputGenerator::flush() {

+ output << std::flush;

}

-void

-OutputGenerator::flush()

- output << std::flush;

+void OutputGenerator::reset() {

+ line_num = 1;

}

+void OutputGenerator::generate_line_info() {

+ ostringstream sout;

+ sout << std::setw(line_num_digit)<< std::setfill('0')<< line_num << ":";

+ string line_str = line_num_generator->generateEntire(sout.str().c_str());

+ if (generate_ref) {

+ ostringstream line_n;

+ line_n << anchor_line_prefix << line_num;

+ sout.str("");

+ substitutionmapping["$text"] = line_str;

+ substitutionmapping["$linenum"] = line_n.str();

+ sout << anchor->output(substitutionmapping);

+ line_str = sout.str();

+ }

+ output_string(line_str + " ");

diff --git a/src/lib/outputgenerator.h b/src/lib/outputgenerator.h
index fd85058..09ee1dc 100644
--- a/src/lib/outputgenerator.h
+++ b/src/lib/outputgenerator.h

@@ -14,6 +14,9 @@

#include <iostream>

+class TextGenerator;

+class TextStyle;

/**

base class that actually writes the generated output to the output stream

@@ -28,11 +31,26 @@ class OutputGenerator

std::string line_prefix;

/// whether to flush the stream at each output (default = false)

bool alwaysFlush;

+ /// whether to generate line information

+ bool generateLineInfo;

+ // for line information

+ bool generate_ref;

+ TextStyle *anchor;

+ std::string anchor_line_prefix;

+ unsigned int line_num;

+ TextGenerator *line_num_generator;

public:

OutputGenerator(std::ostream &os, const std::string &pref);

+ OutputGenerator(std::ostream& os, TextGenerator *linegen,

+ TextStyle *anch, bool genref, const std::string &prefix,

+ const std::string &line_pref);

- virtual ~OutputGenerator();

+ ~OutputGenerator();

void setAlwaysFlush(bool b) { alwaysFlush = b; }

@@ -61,10 +79,9 @@ class OutputGenerator

void flush();

/**

- * Resets the generator. By default it does nothing, but it can be

- * overidden by subclasses

+ * Resets the generator (i.e., resets line number)

- virtual void reset() {}

+ void reset();

protected:

/**

@@ -73,7 +90,12 @@ class OutputGenerator

* @param s

- virtual void generate_line(const std::string &s);

+ void generate_line(const std::string &s);

+ /**

+ * Generates line information

+ */

+ void generate_line_info();

};

#endif

diff --git a/src/lib/parserinfo.h b/src/lib/parserinfo.h
index 9d6eee3..e2c54af 100644
--- a/src/lib/parserinfo.h
+++ b/src/lib/parserinfo.h

@@ -24,7 +24,7 @@ struct ParserInfo {

std::string filename; // including path

unsigned int line;

- ParserInfo() {}

+ ParserInfo() : line(0) {}

ParserInfo(const std::string &n) : filename(n) {}

void setParserInfo(const std::string &name, unsigned int l)

diff --git a/src/lib/parsestyles.h b/src/lib/parsestyles.h
index a753917..6de12d1 100644
--- a/src/lib/parsestyles.h
+++ b/src/lib/parsestyles.h

@@ -19,7 +19,7 @@ class GeneratorFactory;

void parseStyles(const std::string &path, const std::string &name,

GeneratorFactory *generatorFactory,

std::string &bodyBgColor) ;

-void parseStyleError(const std::string &error) ;

+void parseStyleError(const std::string &error, bool exit = true) ;

/// for css style files

void parseCssStyles(const std::string &path, const std::string &name,

diff --git a/src/lib/readtags.c b/src/lib/readtags.c
index 8cc0291..8a58827 100644
--- a/src/lib/readtags.c
+++ b/src/lib/readtags.c

@@ -1,5 +1,5 @@

-* $Id: readtags.c,v 1.7 2007�N03��23�� 18:13:11 bettini Exp $

+* $Id: readtags.c,v 1.8 2007�N06��08�� 10:11:30 bettini Exp $

diff --git a/src/lib/readtags.h b/src/lib/readtags.h
index ce32611..2bf2ccf 100644
--- a/src/lib/readtags.h
+++ b/src/lib/readtags.h

@@ -1,5 +1,5 @@

-* $Id: readtags.h,v 1.7 2007�N03��23�� 18:13:11 bettini Exp $

+* $Id: readtags.h,v 1.8 2007�N06��08�� 10:11:30 bettini Exp $

diff --git a/src/lib/refgeneratormap.cpp b/src/lib/refgeneratormap.cpp
deleted file mode 100644
index 8a1e30f..0000000
--- a/src/lib/refgeneratormap.cpp
+++ /dev/null

@@ -1,214 +0,0 @@

-//

-// C++ Implementation: refgeneratormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "refgeneratormap.h"

-#include "messages.h"

-#include "fileinfo.h"

-#include "fileutil.h"

-#include "mainoutputbuffer.h"

-#include "preformatter.h"

-#include <boost/regex.hpp>

-#include <list>

-using namespace std;

-bool isTaggable(const string &elem)

- return elem.find_first_of(' ') == string::npos;

-RefGeneratorMap::RefGeneratorMap(PreFormatter *pf, const string &_ctags_file_name,

- const TextStyles::RefTextStyle &r, RefPosition pos)

- : GeneratorMap(pf), ctags_file_name(_ctags_file_name),

- refstyle(r), refposition(pos)

- ctags_file = tagsOpen (ctags_file_name.c_str(), &info);

- if (ctags_file == 0)

- {

- exitError("cannot open tag file: " + ctags_file_name);

- }

-RefGeneratorMap::~RefGeneratorMap()

- tagsClose(ctags_file);

-//#define DEBUGREF

-#ifdef DEBUGREF

-#include <iostream>

-#define DEB(x) cerr << x << endl;

-#define DEB2(x) cerr << x ;

-#else

-#define DEB(x) ;

-#define DEB2(x) ;

-#endif

-static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)");

-static SubstitutionMapping ref_substitutionmapping;

-#define SPACE 2

-#define NOT_SPACE 1

-/*

-* separates a line in block of spaces and block of non spaces.

-* the stringbuffer tokens stores the pieces seen so far for which no

-* entry in the tag file was found.

-* for each block of non spaces tries to look for an entry in the tag.

-* if it finds it flushes the stringbuffer tokens (by passing its contents

-* to the parent class implementation of generateString).

-* For instance (notice the spaces among the +)

-* "myfield + myfield2 + myfield3

-* if only an entry for myfield2 is found, the we will generate

-* 3 blocks:

-* "myfield + "

-* "myfield2"

-* " + myfield3"

-*/

-const std::string RefGeneratorMap::generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo)

- if (isNoReference(elem))

- return GeneratorMap::generateString(elem, s, fileinfo);

- buffer.str("");

- ostringstream tokens;

- boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex);

- boost::sregex_iterator j;

- while (i != j) {

- if ((*i)[SPACE].matched) {

- tokens << string((*i)[SPACE].first, (*i)[SPACE].second);

- } else {

- string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second);

- string found_refs = _generateString(elem, not_spaces, fileinfo);

- if (found_refs.size()) {

- const string &previous = tokens.str();

- if (previous.size()) {

- buffer << GeneratorMap::generateString(elem, previous, fileinfo);

- tokens.str("");

- }

- buffer << found_refs;

- } else {

- tokens << not_spaces;

- }

- ++i;

- }

- const string &remainder = tokens.str();

- if (remainder.size()) {

- buffer << GeneratorMap::generateString(elem, remainder, fileinfo);

- }

- return buffer.str();

-struct RefEntry

- string filename;

- unsigned long linenumber;

-};

-const string RefGeneratorMap::_generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo)

- tagEntry entry;

- bool found = false; // whether we found a tag

- bool found_anchor = false; // whether we found an anchor

- string output;

- typedef list<RefEntry> FoundRefList;

- FoundRefList foundreflist;

- DEB("inspecting " + s)

- if (tagsFind (ctags_file, &entry, s.c_str(), TAG_FULLMATCH) == TagSuccess)

- {

- found = true;

- do

- {

- RefEntry refentry;

- refentry.filename = entry.file;

- if ((refentry.filename == fileinfo->filename ||

- refentry.filename == fileinfo->input_file_name) &&

- entry.address.lineNumber == fileinfo->line) {

- ostringstream gen_info;

- // we just found the reference to this very element

- // so we must generate an anchor

- gen_info << entry.address.lineNumber;

- DEB(" found anchor " + gen_info.str());

- ref_substitutionmapping["$text"] = preformatter->preformat(s);

- ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename);

- ref_substitutionmapping["$infile"] = refentry.filename;

- ref_substitutionmapping["$linenum"] = gen_info.str();

- output = refstyle.anchor.output(ref_substitutionmapping);

- found_anchor = true;

- break;

- }

- DEB2(" found " + string(entry.name) + " : ");

- DEB(entry.address.lineNumber);

- refentry.linenumber = entry.address.lineNumber;

- foundreflist.push_back(refentry);

- } while (tagsFindNext (ctags_file, &entry) == TagSuccess);

- }

- if (found) {

- if (! found_anchor) {

- ref_substitutionmapping["$text"] = preformatter->preformat(s);

- TextStyle *referencestyle = 0;

- if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE)

- referencestyle = &(refstyle.postline_reference);

- else if (refposition == POSTDOC)

- referencestyle = &(refstyle.postdoc_reference);

- else

- referencestyle = &(refstyle.inline_reference);

- for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) {

- ostringstream gen_info;

- // we found where this element appears so we generate a reference

- // if it's a link in the same file, we use the output_file_name...

- if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name)

- gen_info << fileinfo->output_file_name;

- else

- gen_info << it->filename << fileinfo->output_file_extension;

- // ...otherwise we build the referenced file by using the output_file_extension

- // in fact, in this case, it probably means that multiple input files have been specified

- ref_substitutionmapping["$outfile"] = gen_info.str();

- ref_substitutionmapping["$infilename"] = strip_file_path(it->filename);

- ref_substitutionmapping["$infile"] = it->filename;

- gen_info.str("");

- gen_info << it->linenumber;

- ref_substitutionmapping["$linenum"] = gen_info.str();

- output += referencestyle->output(ref_substitutionmapping);

- // if the following is true, it means that there more than one reference

- if (foundreflist.size() > 1 || refposition != INLINE) {

- output += preformatter->preformat("\n");

- if (refposition == POSTLINE || refposition == INLINE) {

- outputbuffer->output_postline(output);

- } else { // (refposition == POSTDOC)

- outputbuffer->output_post(output);

- }

- output = ""; // no need to modify the current element

- }

- return output;

diff --git a/src/lib/refgeneratormap.h b/src/lib/refgeneratormap.h
deleted file mode 100644
index 3db34dc..0000000
--- a/src/lib/refgeneratormap.h
+++ /dev/null

@@ -1,49 +0,0 @@

-//

-// C++ Interface: refgeneratormap

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#ifndef REFGENERATORMAP_H

-#define REFGENERATORMAP_H

-#include <generatormap.h>

-#include "my_sstream.h"

-#include "readtags.h"

-#include "textstyles.h"

-typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition;

-/**

-A specialized GeneratorMap that also generates anchors and references, by using ctags information

-@author Lorenzo Bettini

-*/

-class RefGeneratorMap : public GeneratorMap

-private:

- const std::string ctags_file_name;

- TextStyles::RefTextStyle refstyle;

- RefPosition refposition;

- tagFile *ctags_file;

- tagFileInfo info;

- std::ostringstream buffer;

-public:

- RefGeneratorMap(PreFormatter *pf, const std::string &_ctags_file_name,

- const TextStyles::RefTextStyle &r, RefPosition pos);

- ~RefGeneratorMap();

-protected:

- virtual const std::string generateString(const std::string& elem, const std::string& s, const FileInfo* arg1);

- const std::string _generateString(const std::string& elem, const std::string& s, const FileInfo* arg1);

-};

-#endif

diff --git a/src/lib/refposition.h b/src/lib/refposition.h
new file mode 100644
index 0000000..f80c9e0
--- /dev/null
+++ b/src/lib/refposition.h

@@ -0,0 +1,8 @@

+#ifndef REFPOSITION_H

+#define REFPOSITION_H

+// where a reference must be put

+typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition;

+#endif // REFPOSITION_H

diff --git a/src/lib/regexpengine.cpp b/src/lib/regexpengine.cpp
new file mode 100644
index 0000000..37d377d
--- /dev/null
+++ b/src/lib/regexpengine.cpp

@@ -0,0 +1,265 @@

+//

+// C++ Implementation: regexpengine

+//

+// Description:

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#include "regexpengine.h"

+RegExpEngine::~RegExpEngine() {

+#include <fstream>

+#include <iostream>

+#include <stdlib.h>

+#include "textformatter.h"

+#include "keys.h"

+#include "langdefloader.h"

+#include "messages.h"

+#include "parserinfo.h"

+// purpose:

+// takes the contents of a file and transform to

+// syntax highlighted code in html format

+using namespace std;

+typedef enum {FOUND_EOF=0, FOUND_NL, FOUND_END} load_line_ret;

+load_line_ret load_line(std::string& s, std::istream& is) {

+ s.erase();

+ if (is.bad()|| is.eof())

+ return FOUND_EOF;

+ char c;

+ while (is.get(c)) {

+ if (c == '\n')

+ return FOUND_NL;

+ if (c != '\r')

+ s.append(1, c);

+ }

+ return FOUND_END;

+void RegExpEngine::process_file(const char *file) {

+ istream *is = 0;

+ if (file) {

+ is = new ifstream(file);

+ if (!is || ! (*is)) {

+ cerr << "Error in opening " << file<< " for input" << endl;

+ exit(1);

+ }

+ } else

+ is = &cin;

+ std::string s;

+ std::string::const_iterator start, end;

+ boost::smatch match;

+ boost::smatch what;

+ boost::match_flag_type flags;

+ // the regexp state we try at the moment.

+ RegExpStatePtr alternative;

+ // the regexp state matching best;

+ RegExpStatePtr best_alternative;

+ initial_state = currentstate;

+ fileinfo->line = 1;

+ // for selecting the formatter

+ int index_of_formatter = 0;

+ // for selecting the subexpression (or the whole expression)

+ int index_of_subexpression = 0;

+ int smallest_prefix = -1;

+ int biggest_length = -1;

+ string prefix;

+ load_line_ret ret;

+ while ((ret = load_line(s, *is)) != FOUND_EOF) {

+ bool matched = true;

+ bool found_eol = false;

+ start = s.begin();

+ end = s.end();

+ // reset the flags

+ flags = boost::match_default;

+ // always start with the current state

+ alternative = currentstate;

+ while (matched) {

+ matched = false;

+ if (alternative->has_alternative()) {

+ // this means that the state contains a list of alternative states

+ // so we must try to match all the states and use the one that matches best

+ // i.e., with the smallest prefix and the biggest match length

+ smallest_prefix = -1;

+ biggest_length = -1;

+ while (alternative.get()) {

+ if (boost::regex_search(start, end, match,

+ alternative->reg_exp, flags)) {

+ const std::string &match_prefix = match.prefix();

+ if (smallest_prefix < 0 || (boost::smatch::size_type)smallest_prefix >= match_prefix.size()) {

+ if ((boost::smatch::size_type)smallest_prefix > match_prefix.size()|| biggest_length < 0 || (boost::smatch::difference_type)biggest_length < match.length()) {

+ prefix = match.prefix();

+ smallest_prefix = match_prefix.size();

+ biggest_length = match.length();

+ best_alternative = alternative;

+ matched = true;

+ // copy it, otherwise the next call will overwrite it

+ what = match;

+ }

+ alternative = alternative->alternative;

+ }

+ if (matched) {

+ // store the one that matched best

+ alternative = best_alternative;

+ } else {

+ // reset the original current_state

+ alternative = currentstate;

+ }

+ } else if (boost::regex_search(start, end, what,

+ alternative->reg_exp, flags)) {

+ // otherwise, all the alternatives of the state are stored as

+ // a big alternation, where all alternatives are grouped

+ prefix = what.prefix();

+ matched = true;

+ }

+ if (matched) {

+ if (alternative->hasMarkedAlternatives) {

+ // we must inspect all the sub_matches

+ // to find the subexpression that matched

+ for (unsigned int i = 1; i < what.size(); ++i) {

+ if (what[i].matched) {

+ index_of_formatter = i;

+ index_of_subexpression = i;

+ break; // no other match is possible

+ }

+ } else {

+ // the alternative state contains only one expression, with

+ // marked subexpressions, so we must format the whole match

+ // the formatter at 0 is the normal formatter

+ index_of_formatter = 1;

+ // we select the whole match

+ index_of_subexpression = 0;

+ // this is OK also in the case when allAlternativesCanMatch:

+ // we consider the whole expression, and all formatters share the

+ // same exit state, so we can use the first one

+ }

+ // part that possibly did not match

+ if (prefix.size())

+ format(-1, alternative, prefix);

+ if (alternative->allAlternativesCanMatch) {

+ // we must format each subexpression that matched

+ for (unsigned int i = 1; i < what.size(); ++i) {

+ if (what[i].matched) {

+ format(i, alternative, what[i]);

+ }

+ // only the last formatter has the correct next state

+ index_of_formatter = what.size() - 1;

+ } else {

+ // format the part that matched

+ format(index_of_formatter, alternative,

+ what[index_of_subexpression]);

+ }

+ if (alternative->formatters[index_of_formatter]->getNextState()) {

+ // we must enter another state

+ enterState(alternative, index_of_formatter);

+ } else if (alternative->formatters[index_of_formatter]->exit_state_level) {

+ if (alternative->formatters[index_of_formatter]->exit_all) {

+ // we must go back to the outermost state

+ exitAll();

+ } else {

+ // we must get back to exit_state_level states

+ exitState(alternative->formatters[index_of_formatter]->exit_state_level);

+ }

+ // now let's continue with what's left of the original input

+ start = what[index_of_subexpression].second;

+ if (!(*start)) {

+ if (found_eol)

+ matched = false; // we had already matched end of line

+ // we might need to match the eol itself, so let's perform another loop

+ found_eol = true;

+ }

+ if (what[0].first != what[0].second) {

+ // we actually consumed something, so the start of the string

+ // must not be interpreted as the beginning of the line

+ flags |= boost::match_not_bol;

+ }

+ // we always search for the next match by using the original current state

+ alternative = currentstate;

+ } else {

+ // format the non-matching part as normal

+ format(-1, alternative, string(start, end));

+ matched = false;

+ }

+ if (ret == FOUND_NL)

+ formatter->format_nl("\n");

+ (fileinfo->line)++;

+ }

+ // make sure we flush all the buffered parts

+ formatter->flush();

+ if (file)

+ delete is;

+ currentstate = initial_state; // reset the initial state

+void RegExpEngine::enterState(RegExpStatePtr state, int index) {

+ states_stack.push(currentstate);

+ currentstate = state->formatters[index]->getNextState();

+void RegExpEngine::exitState(int level) {

+ // remove additional levels

+ for (int l = 1; l < level; ++l)

+ states_stack.pop();

+ currentstate = states_stack.top();

+ states_stack.pop();

+void RegExpEngine::exitAll() {

+ currentstate = initial_state;

+ states_stack = stack_of_states();

+void RegExpEngine::format(int index, RegExpStatePtr state, const std::string &s) {

+ formatter->format(state->get_elem(index), s, fileinfo);

diff --git a/src/regexpengine.h b/src/lib/regexpengine.h
index b2e9976..aefa716 100644
--- a/src/regexpengine.h
+++ b/src/lib/regexpengine.h

@@ -22,35 +22,34 @@ class TextFormatter;

#include "fileinfo.h"

/**

-the class that actually performs regular expression processing

+ the class that actually performs regular expression processing

-@author Lorenzo Bettini

-*/

-class RegExpEngine

- protected:

+ @author Lorenzo Bettini

+ */

+class RegExpEngine {

+protected:

RegExpStatePtr currentstate, initial_state;

FileInfo *fileinfo;

- private:

+private:

TextFormatter *formatter;

typedef std::stack<RegExpStatePtr> stack_of_states;

stack_of_states states_stack;

- protected:

- virtual void enterState(int index);

+protected:

+ virtual void enterState(RegExpStatePtr state, int index);

virtual void exitState(int level);

virtual void exitAll();

- virtual void format(int index, const std::string &s);

+ virtual void format(int index, RegExpStatePtr state, const std::string &s);

public:

- RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) :

- currentstate(v), fileinfo(f), formatter(pre)

- {}

+ RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) :

+ currentstate(v), fileinfo(f), formatter(pre) {

+ }

- virtual ~RegExpEngine();

+ virtual ~RegExpEngine();

- void process_file(const char *file);

+ void process_file(const char *file);

};

typedef boost::shared_ptr<RegExpEngine> RegExpEnginePtr;

diff --git a/src/regexpenginedebug.cpp b/src/lib/regexpenginedebug.cpp
index 886af8d..7e8ad78 100644
--- a/src/regexpenginedebug.cpp
+++ b/src/lib/regexpenginedebug.cpp

@@ -25,11 +25,22 @@ RegExpEngineDebug::~RegExpEngineDebug()

{

}

+void printRegExpState(RegExpStatePtr state)

+ cout << state->reg_exp;

+ RegExpStatePtr alternative = state->alternative;

+ while (alternative.get()) {

+ cout << "\n" << " " << alternative->reg_exp;

+ alternative = alternative->alternative;

+ }

-void RegExpEngineDebug::enterState(int index)

+void RegExpEngineDebug::enterState(RegExpStatePtr state, int index)

{

- cout << "entering: " << currentstate->formatters[index]->getNextState()->reg_exp << endl;

- RegExpEngine::enterState(index);

+ cout << "entering: ";

+ printRegExpState(state->formatters[index]->getNextState());

+ cout << endl;

+ RegExpEngine::enterState(state, index);

//step();

}

@@ -44,27 +55,32 @@ void RegExpEngineDebug::exitState(int level)

{

RegExpEngine::exitState(level);

- cout << "exiting " << level << " level(s): " << currentstate->reg_exp << endl;

+ cout << "exiting " << level << " level(s): ";

+ printRegExpState(currentstate);

+ cout << endl;

//step();

}

void printInfo(const SubExpressionInfo &e)

{

+ if (!e.second.line)

+ return; // it concerns a subexpressions

cout << e.second.filename << ":" << e.second.line << ": "

<< e.first << endl;

}

-void RegExpEngineDebug::format(int index, const std::string& s)

+void RegExpEngineDebug::format(int index, RegExpStatePtr state, const std::string& s)

{

- RegExpEngine::format(index, s);

+ RegExpEngine::format(index, state, s);

if(index >= 0) {

unsigned int i = (unsigned int)index;

- assert(i <= currentstate->subExpressions.size());

+ assert(i <= state->subExpressions.size());

- printInfo(currentstate->subExpressions[i-1]);

+ printInfo(state->subExpressions[i-1]);

}

- cout << "formatting: \"" << s << "\" as " << currentstate->get_elem(index) << endl;

+ cout << "formatting: \"" << s << "\" as " << state->get_elem(index) << endl;

step();

}

diff --git a/src/regexpenginedebug.h b/src/lib/regexpenginedebug.h
index d14a5bd..b673d4f 100644
--- a/src/regexpenginedebug.h
+++ b/src/lib/regexpenginedebug.h

@@ -32,10 +32,10 @@ protected:

/// whether it's an interactive debug session

bool interactive;

- virtual void enterState(int index);

+ virtual void enterState(RegExpStatePtr state, int index);

virtual void exitAll();

virtual void exitState(int level);

- virtual void format(int index, const std::string& s);

+ virtual void format(int index, RegExpStatePtr state, const std::string& s);

/**

* Waits for a step command (if in interactive mode)

diff --git a/src/lib/regexpreprocessor.cpp b/src/lib/regexpreprocessor.cpp
index f0f7e5f..516c2e9 100644
--- a/src/lib/regexpreprocessor.cpp
+++ b/src/lib/regexpreprocessor.cpp

@@ -1,10 +1,10 @@

-// C++ Implementation: %{MODULE}

+// C++ Implementation: RegexPreProcessor

-// Description:

+// Description: performs operations or inspections on a string representing

+// a valid regular expression

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

// Copyright: See COPYING file that comes with this distribution

@@ -16,71 +16,189 @@

using namespace std;

+// IMPORTANT: the following regular expressions assume that the

+// regular expression they try to match is a valid regular expression

+// matches character sets in a regular expression

const boost::regex char_set_exp("\\[([^\\|]*)\\]");

-RegexPreProcessor::RegexPreProcessor()

+// substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char

+const boost::regex from("(\\\\\\()|(\\((?!\\?))");

+const string into = "(?1\\\\\\()(?2\\(\\?\\:)";

+// found actual marking parenthesis, i.e., not preceeded by \\ and not followed by ?

+const boost::regex paren("(?<!\\\\)\\((?!\\?)");

-RegexPreProcessor::~RegexPreProcessor()

+// regular expression matching a backreference, e.g., 1�~ or inside a conditional (?(1)...)

+const boost::regex backreference("(\\\\[[:digit:]])|(\\(\\?\\([[:digit:]])");

const string

-RegexPreProcessor::preprocess(const string &s)

- // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char

- boost::regex from("(\\\\\\()|(\\((?!\\?))");

- string into = "(?1\\\\\\()(?2\\(\\?\\:)";

+ subexpressions_info::ERR_OUTER_UNMARKED = "unmarked subexpressions are allowed only inside marked subexpressions";

+const string

+ subexpressions_info::ERR_NESTED_SUBEXP = "subexpressions of subexpressions are not allowed";

+const string subexpressions_info::ERR_UNBALANCED_PAREN = "unbalanced parenthesis";

+const string

+ subexpressions_info::ERR_OUTSIDE_SUBEXP = "parts outside marked subexpressions are not allowed";

- return boost::regex_replace(s, from, into, boost::match_default | boost::format_all);

+RegexPreProcessor::RegexPreProcessor() {

}

-const string

-_make_nonsensitive(const string &s)

- ostringstream result;

+RegexPreProcessor::~RegexPreProcessor() {

+const string RegexPreProcessor::preprocess(const string &s) {

+ // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char

+ return boost::regex_replace(s, from, into,

+ boost::match_default | boost::format_all);

+const string _make_nonsensitive(const string &s) {

+ ostringstream result;

- for (string::const_iterator it = s.begin(); it != s.end(); ++it)

- if (isalpha(*it))

- result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]";

- else

- result << *it;

+ for (string::const_iterator it = s.begin(); it != s.end(); ++it)

+ if (isalpha(*it))

+ result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]";

+ else

+ result << *it;

- return result.str();

+ return result.str();

}

-const string

-RegexPreProcessor::make_nonsensitive(const string &s)

- boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp);

- boost::sregex_iterator m2;

+const string RegexPreProcessor::make_nonsensitive(const string &s) {

+ boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp);

+ boost::sregex_iterator m2;

- if (m1 == m2)

- return _make_nonsensitive(s);

+ if (m1 == m2)

+ return _make_nonsensitive(s);

- ostringstream buffer;

- string prefix;

- string suffix;

+ ostringstream buffer;

+ string prefix;

+ string suffix;

- for (boost::sregex_iterator it = m1; it != m2; ++it)

- {

- prefix = it->prefix();

- suffix = it->suffix();

+ for (boost::sregex_iterator it = m1; it != m2; ++it) {

+ prefix = it->prefix();

+ suffix = it->suffix();

- if (prefix.size()) {

- buffer << _make_nonsensitive(prefix);

+ if (prefix.size()) {

+ buffer << _make_nonsensitive(prefix);

+ }

+ buffer << (*it)[0];

}

- buffer << (*it)[0];

- }

+ if (suffix.size()) {

+ buffer << _make_nonsensitive(suffix);

+ }

- if (suffix.size()) {

- buffer << _make_nonsensitive(suffix);

- }

+ return buffer.str();

- return buffer.str();

+unsigned int RegexPreProcessor::num_of_subexpressions(const string &s)

+ boost::sregex_iterator m1(s.begin(), s.end(), paren);

+ boost::sregex_iterator m2;

+ int counter = 0;

+ for (boost::sregex_iterator it = m1; it != m2; ++it)

+ {

+ ++counter;

+ }

+ return counter;

}

+const subexpressions_strings *RegexPreProcessor::split_marked_subexpressions(const string &s) {

+ boost::sregex_iterator m1(s.begin(), s.end(), paren);

+ boost::sregex_iterator m2;

+ // we don't need to parse it (we can use the regex) since we assume that

+ // the regular expression represented by s is made up of only

+ // marked subexpressions and no nested subexpressions and char outside subexpressions

+ subexpressions_strings *split = new subexpressions_strings;

+ for (boost::sregex_iterator it = m1; it != m2; )

+ {

+ string prefix = it->prefix();

+ if (prefix.size())

+ split->push_back("(" + prefix);

+ string suffix = it->suffix();

+ if (++it == m2)

+ split->push_back("(" + suffix);

+ }

+ return split;

+subexpressions_info RegexPreProcessor::num_of_marked_subexpressions(const string &s) {

+ subexpressions_info sexps;

+ // number of open parenthesis

+ int open_paren_num = 0;

+ // whether we're inside a marked subexpressions

+ bool found_marked_subexp = false;

+ // len of string

+ int len = s.size();

+ // char we're examining

+ char c;

+ for (int i = 0; i < len; ++i) {

+ c = s[i];

+ if (c == '\\' && (i+1) < len && (s[i+1] == '(' || s[i+1] == ')')) {

+ // skip the escaped paren

+ ++i;

+ } else if (c == '(') {

+ // we found a subexp

+ ++open_paren_num;

+ if ((i+1) < len && s[i+1] == '?') {

+ if (!found_marked_subexp) {

+ // outer subexpressions must be marked

+ sexps.errors = subexpressions_info::ERR_OUTER_UNMARKED;

+ return sexps;

+ }

+ } else {

+ // it's a marked subexp

+ if (found_marked_subexp) {

+ // we don't allow nested subexpressions

+ sexps.errors = subexpressions_info::ERR_NESTED_SUBEXP;

+ return sexps;

+ }

+ // we found a marked subexp

+ found_marked_subexp = true;

+ ++(sexps.marked);

+ }

+ } else if (c == ')') {

+ if (!open_paren_num) {

+ // unbalanced parenthesis

+ sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN;

+ return sexps;

+ }

+ --open_paren_num;

+ // end of marked subexp

+ if (!open_paren_num && found_marked_subexp)

+ found_marked_subexp = false;

+ } else {

+ // we don't allow non marked parts

+ if (!found_marked_subexp) {

+ sexps.errors = subexpressions_info::ERR_OUTSIDE_SUBEXP;

+ return sexps;

+ }

+ // check that all paren are closed

+ if (open_paren_num)

+ sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN;

+ return sexps;

+bool RegexPreProcessor::contains_backreferences(const std::string &s) {

+ return boost::regex_search(s, backreference);

diff --git a/src/lib/regexpreprocessor.h b/src/lib/regexpreprocessor.h
index 0842c6a..8625023 100644
--- a/src/lib/regexpreprocessor.h
+++ b/src/lib/regexpreprocessor.h

@@ -1,10 +1,10 @@

-// C++ Interface: %{MODULE}

+// C++ Interface: RegexPreProcessor

-// Description:

+// Description: performs operations or inspections on a string representing

+// a valid regular expression

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

// Copyright: See COPYING file that comes with this distribution

@@ -13,20 +13,90 @@

#define REGEXPREPROCESSOR_H

#include <string>

+#include <list>

+/**

+ * info about subexpressions

+ */

+struct subexpressions_info {

+ /// errors for subexpression checking

+ const static std::string ERR_OUTER_UNMARKED;

+ const static std::string ERR_NESTED_SUBEXP;

+ const static std::string ERR_UNBALANCED_PAREN;

+ const static std::string ERR_OUTSIDE_SUBEXP;

+ /// num of marked subexpressions

+ unsigned int marked;

+ /// error specifications, if any

+ std::string errors;

+ subexpressions_info() : marked(0) {}

+};

+/**

+ * all the marked subexpressions in a list

+ */

+typedef std::list<std::string> subexpressions_strings;

/**

-preprocess a regular expression, e.g., transform "()" into "(?:)"

+ preprocess a regular expression, e.g., transform "()" into "(?:)"

-@author Lorenzo Bettini

-*/

-class RegexPreProcessor{

+ @author Lorenzo Bettini

+ */

+class RegexPreProcessor {

public:

RegexPreProcessor();

~RegexPreProcessor();

+ /**

+ * translates marked subexpressions (...) into non marked subexpressions (?: )

+ * @return the translated string

+ */

static const std::string preprocess(const std::string &s);

+ /**

+ * translates the expression into a case nonsensitive expression, i.e.,

+ * foo is translated into [Ff][Oo][Oo]

+ * @return the translated string

+ */

static const std::string make_nonsensitive(const std::string &s);

+ /**

+ * counts the number of marked subexpressions (...)

+ * @return the number of marked subexpressions

+ */

+ static unsigned int num_of_subexpressions(const std::string &s);

+ /**

+ * check that the expressions is made up of marked subexpressions (...)

+ * and no nested subexpressions and no char outside subexpressions

+ *

+ * @return the struct containing the number of marked subexpressions

+ * and possible errors

+ */

+ static subexpressions_info num_of_marked_subexpressions(const std::string &s);

+ /**

+ * Splits the marked subexpressions of a regular expression made up of only

+ * marked subexpressions and no nested subexpressions and char outside subexpressions

+ * (thus, before calling this, you must make sure that num_of_marked_subexpressions

+ * did not return an error.

+ *

+ * @return the subexpressions in a collection (this is allocated on the heap, so

+ * it is up to the caller to delete it)

+ */

+ static const subexpressions_strings *split_marked_subexpressions(const std::string &s);

+ /**

+ * Checks whether the passed regular expression string contains

+ * a backreference (e.g., either 1�~ or a conditional with a backreference

+ * (?(1)...)

+ *

+ * @return true if the passed regular expression string contains

+ * a backreference

+ */

+ static bool contains_backreferences(const std::string &s);

};

#endif

diff --git a/src/lib/regexpstate.cpp b/src/lib/regexpstate.cpp
new file mode 100644
index 0000000..af308ef
--- /dev/null
+++ b/src/lib/regexpstate.cpp

@@ -0,0 +1,207 @@

+//

+// C++ Implementation: RegExpState

+//

+// Description: as regular expression state: contains the regular expression to

+// match and the formatters for each alternative.

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#include "regexpstate.h"

+#include "messages.h"

+#include <stdlib.h>

+using namespace std;

+int RegExpState::global_id = 1;

+RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit,

+ bool all) :

+ elem(el), exit_state_level(exit), exit_all(all), next_state(r) {

+void RegExpFormatter::setNextState(RegExpStatePtr r) {

+ next_state_strong = r;

+RegExpStatePtr RegExpFormatter::getNextState() const {

+ RegExpStatePtr next = next_state.lock();

+ if (!next)

+ return next_state_strong;

+ return next;

+/**

+ * Return the formatter associated to the passed index.

+ * If the index is negative, it returns the default formatter.

+ * @param index

+ * @return

+ */

+const string &RegExpState::get_elem(int index) {

+ return formatters[(index<0 ? 0 : index)]->elem;

+void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo,

+ RegExpFormatterPtr f) {

+ if (alternative.get()) {

+ alternative->add_exp(s, parserInfo, f);

+ return;

+ } else {

+ if (nextAddMustCreateAnAlternative) {

+ // a previous operation had recorded the fact that the next

+ // add_exp should have been performed as a creation of an alternative

+ add_alternative(s, parserInfo, f);

+ // but further addition on the alternative must not create further alternatives

+ alternative->nextAddMustCreateAnAlternative = false;

+ return;

+ }

+ const string &ex = buffer.str();

+ if (ex.size())

+ buffer << "|";

+ buffer << s;

+ formatters.push_back(f);

+ subExpressions.push_back(make_pair(s, *parserInfo));

+void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo,

+ const format_vector &f) {

+ if (alternative.get()) {

+ alternative->add_exp(s, parserInfo, f);

+ return;

+ }

+ RegExpState *myalternative;

+ if (buffer.str()== "") {

+ // we must not add an alternative: this state is the alternative

+ myalternative = this;

+ } else {

+ // we must isolate this regexp in an alternative

+ alternative = RegExpStatePtr(new RegExpState);

+ myalternative = alternative.get();

+ }

+ // the default formatter could be already set, and so we must

+ // transmit it right now, otherwise the alternative will never have it.

+ if (formatters[0].get()) {

+ myalternative->set_default_formatter(formatters[0]);

+ }

+ // another add exp on the alternative must create another alternative

+ myalternative->nextAddMustCreateAnAlternative = true;

+ myalternative->buffer << s;

+ // if the default formatter is not set, reserve space for it

+ if (myalternative->formatters[0].get())

+ ;

+ std::copy(f.begin(), f.end(), back_inserter(myalternative->formatters));

+ // add a subexpression for each formatter (just to be consistent with

+ // other cases).

+ for (format_vector::const_iterator it = f.begin(); it != f.end(); ++it)

+ myalternative->subExpressions.push_back(make_pair(s, *parserInfo));

+void RegExpState::add_alternative(const std::string &s, ParserInfo *parserInfo,

+ RegExpFormatterPtr f) {

+ if (!alternative.get()) {

+ if (buffer.str()== "") {

+ // we must not add an alternative: this state is the alternative

+ add_exp(s, parserInfo, f);

+ // but the next add should be put in an alternative

+ nextAddMustCreateAnAlternative = true;

+ } else {

+ alternative = RegExpStatePtr(new RegExpState);

+ // the default formatter could be already set, and so we must

+ // transmit it right now, otherwise the alternative will never have it.

+ if (formatters[0].get()) {

+ alternative->set_default_formatter(formatters[0]);

+ }

+ // forward to the alternative

+ alternative->add_exp(s, parserInfo, f);

+ // another add exp on the alternative must create another alternative

+ alternative->nextAddMustCreateAnAlternative = true;

+ }

+ } else {

+ alternative->add_alternative(s, parserInfo, f);

+ }

+void RegExpState::setHasMarkedAlternatives() {

+ if (alternative.get()) {

+ alternative->setHasMarkedAlternatives();

+ } else {

+ hasMarkedAlternatives = true;

+ }

+void RegExpState::setAllAlternativesCanMatch() {

+ if (alternative.get()) {

+ alternative->setAllAlternativesCanMatch();

+ } else {

+ allAlternativesCanMatch = true;

+ }

+void RegExpState::add_subexp_formatter(RegExpFormatterPtr f) {

+ if (alternative.get()) {

+ alternative->add_subexp_formatter(f);

+ return;

+ }

+ formatters.push_back(f);

+ // FIXME: insert a bogus subexpression

+ subExpressions.push_back(make_pair("", ParserInfo()));

+void RegExpState::freeze() throw(boost::bad_expression) {

+ const string &buffered = buffer.str();

+ try {

+ reg_exp.assign(buffered);

+ // call freeze also on alternative

+ if (alternative.get()) {

+ alternative->freeze();

+ }

+ } catch (boost::bad_expression &e) {

+ printError("bad expression: " + buffered);

+ throw;

+ }

+void RegExpState::set_default_formatter(RegExpFormatterPtr f) {

+ formatters[0] = f;

+ if (alternative.get())

+ alternative->set_default_formatter(f);

+RegExpFormatterPtr RegExpState::getLastFormatter() const {

+ if (alternative.get())

+ return alternative->getLastFormatter();

+ return formatters[formatters.size()-1];

+bool RegExpState::has_alternative() const {

+ return (alternative.get());

diff --git a/src/lib/regexpstate.h b/src/lib/regexpstate.h
new file mode 100644
index 0000000..00085f4
--- /dev/null
+++ b/src/lib/regexpstate.h

@@ -0,0 +1,191 @@

+//

+// C++ Interface: RegExpState

+//

+// Description: as regular expression state: contains the regular expression to

+// match and the formatters for each alternative.

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#ifndef REGEXPSTATE_H

+#define REGEXPSTATE_H

+#include <boost/regex.hpp>

+#include <boost/shared_ptr.hpp>

+#include <boost/weak_ptr.hpp>

+#include <deque>

+#include <vector>

+#include "my_sstream.h"

+#include "parserinfo.h"

+struct RegExpState;

+typedef boost::shared_ptr<RegExpState> RegExpStatePtr;

+typedef boost::weak_ptr<RegExpState> RegExpStatePtrW;

+struct RegExpFormatter

+ const std::string elem; // the element represented

+ int exit_state_level; // how many states we must leave

+ bool exit_all;

+ RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false);

+ void setNextState(RegExpStatePtr r);

+ RegExpStatePtr getNextState() const;

+ private:

+ RegExpStatePtrW next_state;

+ RegExpStatePtr next_state_strong;

+ /*

+ FIXME

+ the next_state is a weak pointer when there's a "nested" situation.

+ This allows to avoid cycles, that otherwise would prevent memory from

+ being correctly freed.

+ */

+};

+typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr;

+typedef std::deque<RegExpFormatterPtr> format_vector;

+typedef std::pair<std::string, ParserInfo> SubExpressionInfo;

+typedef std::vector<SubExpressionInfo> SubExpressions;

+/**

+class representing a state for the regular expression engine

+@author Lorenzo Bettini

+ */

+struct RegExpState

+ static int global_id;

+ const int id; // the identifier of the state

+ /// the regular expression (with all the alternatives) for this state

+ boost::regex reg_exp;

+ /// for each alternative keep the parser info

+ SubExpressions subExpressions;

+ /// the formatters (one for each alternative)

+ format_vector formatters;

+ /// where to buffer the expression strings (added with add_exp)

+ /// when freeze is called these will be used to create the reg_exp

+ std::ostringstream buffer;

+ /// if the regular expression is not matched try with this alternative

+ /// state (this is used to split regular expressions where an alternative

+ /// has a back reference, since back references are limited to 9)

+ RegExpStatePtr alternative;

+ /**

+ * Records that the fact that if an add_exp is invoked on this object,

+ * then an alternative must be created and the exp should be inserted there

+ */

+ bool nextAddMustCreateAnAlternative;

+ /**

+ * Means that this state has a regular expression made up of (possible)

+ * many marked subexpressions each of one is an alternative, e.g.,

+ * (foo)|(#)|...

+ *

+ * This is crucial since, when formatting, we need to inspect each sub_match

+ * of match_result to find out which one matched (and so, which formatter to use).

+ */

+ bool hasMarkedAlternatives;

+ /**

+ * Means that this state has a regular expression made up of marked subexpressions

+ * where all of them can match, e.g.,

+ * (class)([[:blank:]]*)([[:alnum:]]+)

+ *

+ * This is crucial since, when formatting, we need to inspect each sub_match of

+ * match_result to find out all those that matched. This is different from the

+ * case of hasMarkedAlternatives: in that case only one can match

+ */

+ bool allAlternativesCanMatch;

+ RegExpState() :

+ id(global_id++), formatters(1), alternative(RegExpStatePtr()),

+ nextAddMustCreateAnAlternative(false),

+ hasMarkedAlternatives(false),

+ allAlternativesCanMatch(false) {}

+ const std::string &get_elem(int index = -1);

+ /**

+ * Adds the formatter for the given regular expression (and the file info

+ * of the original language definition file).

+ *

+ * The expression is only buffered (i.e., the regular expression is not built

+ * untile freeze is called)

+ *

+ * @param s the regular expression string

+ * @param parserInfo

+ * @param f

+ */

+ void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);

+ /**

+ * Adds the formatters for the given regular expression (and the file info

+ * of the original language definition file). Each formatter of the passed

+ * vector is related to the corresponding marked subexpression of the passed expression.

+ *

+ * The expression is only buffered (i.e., the regular expression is not built

+ * untile freeze is called)

+ *

+ * @param s the regular expression string

+ * @param parserInfo

+ * @param f

+ */

+ void add_exp(const std::string &s, ParserInfo *parserInfo, const format_vector &f);

+ /**

+ * Basically the same as add_exp, but

+ * 1. creates the alternative RegExpState

+ * 2. forwards all the operations to it from now on

+ */

+ void add_alternative(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);

+ /**

+ * Sets that we added a marked subexpression

+ */

+ void setHasMarkedAlternatives();

+ /**

+ * Sets that we added a subexpression where all alternatives can match

+ */

+ void setAllAlternativesCanMatch();

+ /**

+ * Adds the formatter for a (marked) subexpression

+ *

+ * @param f

+ */

+ void add_subexp_formatter(RegExpFormatterPtr f);

+ /**

+ * Actually build the regular expression from the buffered strings

+ */

+ void freeze() throw(boost::bad_expression);

+ /**

+ * The default formatter in case the regular expression is not matched

+ */

+ void set_default_formatter(RegExpFormatterPtr f);

+ /**

+ * @return the last formatter

+ */

+ RegExpFormatterPtr getLastFormatter() const;

+ /**

+ * @return whether this state has an alternative

+ */

+ bool has_alternative() const;

+};

+#endif

diff --git a/src/lib/regexpstatebuilder.H b/src/lib/regexpstatebuilder.H
index 5e3fcb1..c47ddc6 100644
--- a/src/lib/regexpstatebuilder.H
+++ b/src/lib/regexpstatebuilder.H

@@ -1,10 +1,10 @@

-// C++ Interface: %{MODULE}

+// C++ Interface: RegExpStateBuilder

-// Description:

+// Description: Builds the RegExpStates starting from all the language elements.

+//

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

// Copyright: See COPYING file that comes with this distribution

@@ -34,6 +34,7 @@ build a RegExpState starting from language definitions

class DelimitedLangElem; // file: delimitedlangelem.h

class LangElem; // file: langelem.h

class LangElems; // file: langelems.h

+class NamedSubExpsLangElem; // file: namedsubexpslangelem.h

class RegExpStatePointer; // file: regexpstatebuilder.h

class StateLangElem; // file: statelangelem.h

class StateStartLangElem; // file: statestartlangelem.h

@@ -66,8 +67,10 @@ virtual void build(DelimitedLangElem * elem, RegExpStatePointer state);

#line 50 "regexpstatebuilder.h"

virtual void build(StateStartLangElem * elem, RegExpStatePointer state);

#line 51 "regexpstatebuilder.h"

-virtual void build(LangElem * elem, RegExpStatePointer state);

+virtual void build(NamedSubExpsLangElem * elem, RegExpStatePointer state);

#line 52 "regexpstatebuilder.h"

+virtual void build(LangElem * elem, RegExpStatePointer state);

+#line 53 "regexpstatebuilder.h"

virtual void build(LangElems * elems, RegExpStatePointer state);

public:

void _forward_build(DelimitedLangElem * elem, RegExpStatePointer state)

@@ -85,6 +88,11 @@ void _forward_build(LangElems * elems, RegExpStatePointer state)

build(elems, state);

};

+void _forward_build(NamedSubExpsLangElem * elem, RegExpStatePointer state)

+ build(elem, state);

+};

void _forward_build(StateLangElem * elem, RegExpStatePointer state)

{

build(elem, state);

@@ -103,7 +111,7 @@ void _forward_build(StringListLangElem * elem, RegExpStatePointer state)

protected:

virtual void build_DB(LangElem * elem, RegExpStatePointer state);

virtual void build_DB(LangElems * elems, RegExpStatePointer state);

-#line 52 "regexpstatebuilder.h"

+#line 53 "regexpstatebuilder.h"

// doublecpp: end, DO NOT MODIFY

diff --git a/src/lib/regexpstatebuilder.cpp b/src/lib/regexpstatebuilder.cpp
index f98e37f..aeac068 100644
--- a/src/lib/regexpstatebuilder.cpp
+++ b/src/lib/regexpstatebuilder.cpp

@@ -1,10 +1,10 @@

-// C++ Implementation: %{MODULE}

+// C++ Implementation: regexpstatebuilder.cpp

-// Description:

+// Description: Builds the regexp automaton

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

+// Author: Lorenzo Bettini, 2007, http://www.lorenzobettini.it

// Copyright: See COPYING file that comes with this distribution

@@ -17,6 +17,7 @@

#include "statelangelem.h"

#include "stringlistlangelem.h"

#include "delimitedlangelem.h"

+#include "namedsubexpslangelem.h"

#include "regexpstate.h"

#include "stringdef.h"

#include "tostringcollection.h"

@@ -27,17 +28,22 @@

using namespace std;

static const string buildex(const string &s);

-static const string buildex_pre(const string &s);

-void add_exp(RegExpStatePtr state, const string &orig, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f)

+void add_exp(RegExpStatePtr state, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f)

{

- try {

- state->add_exp(exp, parserInfo, f);

- } catch (boost::bad_expression &e) {

- exitError("wrong original expression: " + orig);

- }

+ unsigned int numOfSubexpressions = RegexPreProcessor::num_of_subexpressions(exp);

+ if (numOfSubexpressions) {

+ // for marked subexpressions we must not use buildex, otherwise we might change

+ // the subexpressions indexes (e.g., for backreferences)

+ state->add_alternative(exp, parserInfo, f);

+ } else {

+ state->add_exp(buildex(exp), parserInfo, f);

+ // record that we (manually) added an explicit marked subexpression

+ state->setHasMarkedAlternatives();

+ }

/**

* Definitely associate the regular expression to this state

@@ -64,6 +70,22 @@ RegExpStateBuilder::~RegExpStateBuilder()

{

}

+void setFormatterExitLevel(StateStartLangElem *elem, RegExpFormatterPtr formatter) {

+ bool exit_all = elem->exitAll();

+ bool exit = elem->doExit();

+ /*

+ only act on the exit state (if any exist statement is defined)

+ */

+ if (exit_all) {

+ formatter->exit_state_level = 1;

+ formatter->exit_all = true;

+ }

+ if (exit)

+ formatter->exit_state_level = 1;

RegExpStatePtr

RegExpStateBuilder::build(LangElems *elems)

{

@@ -93,11 +115,11 @@ RegExpStateBuilder::build(LangElems *elems, RegExpStatePointer state)

// try to find out where the problem is...

RegExpStatePtr temp_state(new RegExpState());

for (LangElems::const_iterator it = elems->begin(); it != elems->end(); ++it) {

- build(*it, temp_state);

+ build_DB(*it, temp_state);

try {

temp_state->freeze();

} catch (boost::bad_expression &e) {

- exitError("problem in this expression: " + (*it)->toString());

+ exitError("problem in this expression: " + (*it)->toStringOriginal(), *it);

}

} else {

@@ -111,23 +133,24 @@ RegExpStateBuilder::build(LangElem *elem, RegExpStatePointer state)

}

/**

- * Build a subexpression starting from s

+ * Build a non-marking group (i.e., (? ... ) starting from s

* @param s

* @return

-const string buildex(const string &s)

+const string non_marking_group(const string &s)

{

- return "(" + s + ")";

+ return "(?:" + s + ")";

}

/**

- * Build a subexpression starting from s, after preprocessing s

+ * Build a subexpression starting from s

* @param s

* @return

-const string buildex_pre(const string &s)

+const string buildex(const string &s)

{

- return buildex(RegexPreProcessor::preprocess(s));

+ return "(" + s + ")";

}

/**

@@ -185,17 +208,59 @@ RegExpStateBuilder::build(StringListLangElem *elem, RegExpStatePointer state)

if (!elem->isCaseSensitive())

stringdef = RegexPreProcessor::make_nonsensitive(stringdef);

- string exp_string = buildex_pre(stringdef);

+ string exp_string = non_marking_group(stringdef);

if (isToIsolate)

exp_string = buildex_isolated(exp_string);

RegExpFormatterPtr formatter(new RegExpFormatter(name));

- add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter);

+ add_exp(state, exp_string, elem, formatter);

build(static_cast<StateStartLangElem *>(elem), state);

}

/**

+ * Case of a list of language elements, each representing a

+ * marked subexpression

+ * @param elem

+ * @param state

+ */

+void

+RegExpStateBuilder::build(NamedSubExpsLangElem *elem, RegExpStatePointer state)

+ const ElementNames *elems = elem->getElementNames();

+ const StringDef *regexp = elem->getRegexpDef();

+ format_vector formatters;

+ const string &regexp_string = regexp->toString();

+ // first check that the number of marked subexpressions is the same of

+ // the specified element names

+ subexpressions_info sexps = RegexPreProcessor::num_of_marked_subexpressions(regexp_string);

+ if (sexps.errors.size()) {

+ exitError(sexps.errors, elem);

+ }

+ if (sexps.marked != elems->size()) {

+ exitError("number of marked subexpressions does not match number of elements", elem);

+ }

+ // for each named group build a formatter, that corresponds to that element

+ for (ElementNames::const_iterator it = elems->begin(); it != elems->end(); ++it) {

+ RegExpFormatterPtr formatter = RegExpFormatterPtr(new RegExpFormatter(*it));

+ // each formatter will share the same exit level, since it represents the

+ // same matched regexp

+ setFormatterExitLevel(elem, formatter);

+ formatters.push_back(formatter);

+ }

+ // now add all the formatters for this element

+ state->add_exp(regexp_string, elem, formatters);

+ // record that all the subexpressions can match

+ state->setAllAlternativesCanMatch();

+/**

* Case of a delimited element

* @param elem

* @param state

@@ -242,10 +307,10 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)

"<(?:[^<>])*>"

if (!escape) {

- exp_string = start_string + "([^" +

+ exp_string = start_string + non_marking_group("[^" +

start_string +

(end_string != start_string ? end_string : "") +

- "])*" + end_string;

+ "]") + "*" + end_string;

} else {

in case of a specified escape character it will use it for the

@@ -255,12 +320,12 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)

<(?:[^\\<\\>]|\\.)*>

- exp_string = start_string + "([^" +

+ exp_string = start_string + non_marking_group("[^" +

escape_string +

start_string +

(end_string != start_string ? escape_string + end_string : "") +

- "]|"+ escape_string + "." +

- ")*" + end_string;

+ "]|"+ escape_string + ".") +

+ "*" + end_string;

}

} else {

@@ -300,7 +365,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)

1 + (elem->doExit() ? 1 : 0),

elem->exitAll()));

if (end)

- add_exp(inner, end_string, buildex_pre(end_string), elem, exit);

+ add_exp(inner, end_string, elem, exit);

else

inner->add_exp(buildex("\\z"), elem, exit);

@@ -313,7 +378,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)

(\*\])|(\\.)

if (escape) {

- add_exp(inner, escape_string, buildex_pre(escape_string + "."),

+ add_exp(inner, escape_string + ".",

elem,

RegExpFormatterPtr(new RegExpFormatter(name)));

}

@@ -339,16 +404,17 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)

if (elem->isNested()) {

RegExpFormatterPtr nested(new RegExpFormatter(name, inner));

nested_formatters.push_back(nested);

- add_exp(inner, start_string, buildex_pre(start_string), elem, nested);

+ add_exp(inner, start_string, elem, nested);

}

if (inner) {

- freeze_state(inner);

+ if (!freeze_state(inner))

+ foundBug("bug in expression parsing", __FILE__, __LINE__);;

}

RegExpFormatterPtr formatter(new RegExpFormatter(name, inner));

- add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter);

+ add_exp(state, exp_string, elem, formatter);

build(static_cast<StateStartLangElem *>(elem), state);

}

@@ -363,19 +429,7 @@ RegExpStateBuilder::build(StateStartLangElem *elem, RegExpStatePointer state)

{

RegExpFormatterPtr formatter = state->getLastFormatter();

- bool exit_all = elem->exitAll();

- bool exit = elem->doExit();

- /*

- only act on the exit state (if any exist statement is defined)

- */

- if (exit_all) {

- formatter->exit_state_level = 1;

- formatter->exit_all = true;

- }

- if (exit)

- formatter->exit_state_level = 1;

+ setFormatterExitLevel(elem, formatter);

}

/**

diff --git a/src/lib/regexpstatebuilder.h b/src/lib/regexpstatebuilder.h
index 3fc05fb..f3ed3ff 100644
--- a/src/lib/regexpstatebuilder.h
+++ b/src/lib/regexpstatebuilder.h

@@ -1,10 +1,10 @@

-// C++ Interface: %{MODULE}

+// C++ Interface: RegExpStateBuilder

-// Description:

+// Description: Builds the RegExpStates starting from all the language elements.

+//

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

// Copyright: See COPYING file that comes with this distribution

@@ -48,6 +48,7 @@ class RegExpStateBuilder

void (StringListLangElem *elem, RegExpStatePointer state);

void (DelimitedLangElem *elem, RegExpStatePointer state);

void (StateStartLangElem *elem, RegExpStatePointer state);

+ void (NamedSubExpsLangElem *elem, RegExpStatePointer state);

void (LangElem *elem, RegExpStatePointer state);

void (LangElems *elems, RegExpStatePointer state);

endbranches

diff --git a/src/lib/regexpstatebuilder_dbtab.cc b/src/lib/regexpstatebuilder_dbtab.cc
index 9dd404c..56b58c5 100644
--- a/src/lib/regexpstatebuilder_dbtab.cc
+++ b/src/lib/regexpstatebuilder_dbtab.cc

@@ -4,6 +4,8 @@

#include "statelangelem.h"

+#include "namedsubexpslangelem.h"

#include "delimitedlangelem.h"

#include "langelems.h"

@@ -43,6 +45,12 @@ LangElems::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStateP

}

void

+NamedSubExpsLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state)

+ RegExpStateBuilder_o->_forward_build(this, state);

+void

StateLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state)

{

RegExpStateBuilder_o->_forward_build(this, state);

diff --git a/src/lib/regexpstateprinter.cpp b/src/lib/regexpstateprinter.cpp
index 9a56b91..be5abb2 100644
--- a/src/lib/regexpstateprinter.cpp
+++ b/src/lib/regexpstateprinter.cpp

@@ -10,6 +10,7 @@

#include "regexpstateprinter.h"

+#include "regexpreprocessor.h"

#include <iostream>

@@ -33,21 +34,54 @@ void RegExpStatePrinter::printRegExpState(RegExpStatePtr state)

{

do_indent;

cout << " STATE " << state->id << endl;

+ do_indent;

+ cout << " regexp " << state->reg_exp <<

+ (state->has_alternative() ? " (has alternatives)" : "") << endl;

inc_indent;

- int i = 0;

- for (format_vector::const_iterator it = state->formatters.begin();

- it != state->formatters.end(); ++it)

- {

- do_indent;

- cout << i << ": " << (*it)->elem << " "

- << (i > 0 ? state->subExpressions[i-1].first : "");

- printRegExpFormatter(*it);

- ++i;

+ unsigned int i = 0;

+ if (state->allAlternativesCanMatch) {

+ // print the default formatter

+ do_indent;

+ cout << i << ": " << state->formatters[0]->elem << " ";

+ printRegExpFormatter(state->formatters[0]);

+ // we need to get all the subexpressions

+ const subexpressions_strings *split = RegexPreProcessor::split_marked_subexpressions(state->reg_exp.str());

+ i = 1;

+ for (subexpressions_strings::const_iterator it = split->begin(); it != split->end(); ++it) {

+ do_indent;

+ cout << i << ": " << state->formatters[i]->elem << " "

+ << *it ;

+ ++i;

+ if (i < state->formatters.size())

+ cout << endl;

+ }

+ // and print only the last state which has all the next state

+ // and exit level information

+ do_indent;

+ printRegExpFormatter(state->formatters[i-1]);

+ delete split;

+ } else {

+ for (format_vector::const_iterator it = state->formatters.begin();

+ it != state->formatters.end(); ++it)

+ {

+ do_indent;

+ cout << i << ": " << (*it)->elem << " "

+ << (i > 0 ? state->subExpressions[i-1].first : "");

+ printRegExpFormatter(*it);

+ ++i;

+ }

}

dec_indent;

+ if (state->alternative.get())

+ printRegExpState(state->alternative);

}

void RegExpStatePrinter::printRegExpFormatter(RegExpFormatterPtr formatter)

diff --git a/src/lib/statelangelem.cpp b/src/lib/statelangelem.cpp
index 254b015..13d4c05 100644
--- a/src/lib/statelangelem.cpp
+++ b/src/lib/statelangelem.cpp

@@ -38,3 +38,11 @@ StateLangElem::toString() const

return res;

}

+const std::string

+StateLangElem::toStringOriginal() const

+ string res = statestartlangelem->toString();

+ if (langelems)

+ res += "\n" + langelems->toStringOriginal();

+ return res;

diff --git a/src/lib/statelangelem.h b/src/lib/statelangelem.h
index 003b651..762d647 100644
--- a/src/lib/statelangelem.h
+++ b/src/lib/statelangelem.h

@@ -48,6 +48,8 @@ public:

virtual const std::string toString() const;

+ virtual const std::string toStringOriginal() const;

StateStartLangElem *getStateStart() const { return statestartlangelem; }

bool isState() const { return state; }

LangElems *getElems() const { return langelems; }

diff --git a/src/lib/stringdef.cpp b/src/lib/stringdef.cpp
deleted file mode 100644
index e1ba02f..0000000
--- a/src/lib/stringdef.cpp
+++ /dev/null

@@ -1,28 +0,0 @@

-//

-// C++ Implementation: %{MODULE}

-//

-// Description:

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "stringdef.h"

-StringDef::StringDef(const char *s) :

- stringdef(s)

-StringDef::StringDef(const std::string &s) :

- stringdef(s)

-StringDef::~StringDef()

diff --git a/src/lib/stringdef.h b/src/lib/stringdef.h
index 48ec110..483307e 100644
--- a/src/lib/stringdef.h
+++ b/src/lib/stringdef.h

@@ -1,10 +1,10 @@

-// C++ Interface: %{MODULE}

+// C++ Interface: StringDef

-// Description:

+// Description: a string definition that is used by all the language elements.

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

+// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it>

// Copyright: See COPYING file that comes with this distribution

@@ -16,34 +16,55 @@

#include <list>

/**

-represent a string for a language element

+ represent a string for a language element

-@author Lorenzo Bettini

-*/

-class StringDef

- private:

+ @author Lorenzo Bettini

+ */

+class StringDef {

+private:

+ /// the actual content

std::string stringdef;

- public:

- StringDef(const char *s);

- StringDef(const std::string &s);

- ~StringDef();

- const std::string toString() const { return stringdef; }

- static StringDef *concat(const StringDef *s1, const StringDef *s2)

- { return new StringDef(s1->stringdef + s2->stringdef); }

+ /// the original representation (without any preprocessing)

+ std::string orig;

+public:

+ /**

+ * constructs a StringDef and store also the original representation

+ * @param s the actual content

+ * @param o the original representation

+ */

+ StringDef(const std::string &s, const std::string &o) :

+ stringdef(s), orig(o) {

+ }

+ StringDef(const std::string &s) :

+ stringdef(s) {

+ }

+ const std::string toString() const {

+ return stringdef;

+ }

+ /**

+ * return the original representation (this is useful for printing errors)

+ * @return the original representation

+ */

+ const std::string toStringOriginal() const {

+ return orig;

+ }

+ static StringDef *concat(const StringDef *s1, const StringDef *s2) {

+ return new StringDef(s1->stringdef + s2->stringdef);

+ }

};

typedef std::list<StringDef *> StringDefsBase;

-class StringDefs : public StringDefsBase

- public:

+class StringDefs : public StringDefsBase {

+public:

~StringDefs() {

- for (StringDefsBase::iterator it = begin(); it != end(); ++it)

- delete *it;

+ for (StringDefsBase::iterator it = begin(); it != end(); ++it)

+ delete *it;

}

};

diff --git a/src/lib/stringlistlangelem.cpp b/src/lib/stringlistlangelem.cpp
index 8f0e266..59f5be6 100644
--- a/src/lib/stringlistlangelem.cpp
+++ b/src/lib/stringlistlangelem.cpp

@@ -30,7 +30,13 @@ StringListLangElem::~StringListLangElem()

const std::string

StringListLangElem::toString() const

{

- string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives);;

+ string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives);

return res;

}

+const std::string

+StringListLangElem::toStringOriginal() const

+ string res = StateStartLangElem::toString() + " " + toStringOriginalCollection<StringDefs>(alternatives);

+ return res;

diff --git a/src/lib/stringlistlangelem.h b/src/lib/stringlistlangelem.h
index 7b3fc8e..90a3baa 100644
--- a/src/lib/stringlistlangelem.h
+++ b/src/lib/stringlistlangelem.h

@@ -38,6 +38,8 @@ public:

virtual ~StringListLangElem();

virtual const std::string toString() const;

+ virtual const std::string toStringOriginal() const;

StringDefs *getAlternatives() const { return alternatives; }

bool isCaseSensitive() const { return !nonsensitive; }

diff --git a/src/lib/stylecssparser.yy b/src/lib/stylecssparser.yy
index 86b8659..8ba3697 100644
--- a/src/lib/stylecssparser.yy
+++ b/src/lib/stylecssparser.yy

@@ -23,8 +23,6 @@

#include <iostream>

#include <string>

-#include "my_sstream.h"

#include "generatorfactory.h"

#include "colors.h"

#include "keys.h"

@@ -51,6 +49,7 @@ static string bodyBgColor;

extern int stylecsssc_lex() ;

extern FILE *stylecsssc_in ;

+extern int stylecsssc_lex_destroy (void);

/// the global pointer to style constant for a specific element

static StyleConstantsPtr currentStyleConstants;

@@ -79,11 +78,11 @@ static string currentBGColor;

stylefile : { /* allow empty files */ }

| statements

;

statements : statements statement

| statement

;

statement : option

;

@@ -109,11 +108,11 @@ option : keylist

// check whether it's the body specification

if (Utils::tolower(key) == "body") {

updateBgColor(currentBGColor);

// notice that for text style specification for the body, the background

// is assumed for the entire document and not for the normal text

// following the semantics of css

// avoid adding an empty style definition for normal

if (currentColor != "" || currentStyleConstants->size()) {

if (!generatorFactory->createGenerator(NORMAL, currentColor, "", currentStyleConstants)) {

@@ -205,22 +204,24 @@ void parseCssStyles(const string &path, const string &name, GeneratorFactory *ge

printMessage_noln( "Parsing ", cerr ) ;

printMessage_noln (current_file, cerr);

printMessage( " file ...", cerr ) ;

bodyBgColor = "";

yyparse() ;

bodyBgColor_ = bodyBgColor;

printMessage( "Parsing done!", cerr ) ;

fclose(stylecsssc_in);

+ // release scanner memory

+ stylecsssc_lex_destroy();

}

void

yyerror( char *s )

{

parseStyleError(s);

- exit(EXIT_FAILURE);

}

void updateBgColor(const std::string &c)

diff --git a/src/lib/stylecssscanner.ll b/src/lib/stylecssscanner.ll
index 1806907..b1a6ae3 100644
--- a/src/lib/stylecssscanner.ll
+++ b/src/lib/stylecssscanner.ll

@@ -43,7 +43,6 @@ extern int line ;

#define DEB2(s,s2)

#endif

%option prefix="stylecsssc_"

@@ -207,16 +206,6 @@ STRING \"[^\"\n]*\"

DEB2("CSS PROPERTIES discarding", yytext);

}

-<<EOF>> {

- DEB("reached EOF of the style file");

- DEB("freeing scanner memory");

- /* For non-reentrant C scanner only. */

- yy_delete_buffer(YY_CURRENT_BUFFER);

- yyterminate();

<INITIAL>. { return yytext[0] ; }

diff --git a/src/lib/styleparser.yy b/src/lib/styleparser.yy
index a203137..7f000b6 100644
--- a/src/lib/styleparser.yy
+++ b/src/lib/styleparser.yy

@@ -23,8 +23,6 @@

#include <iostream>

#include <string>

-#include "my_sstream.h"

#include "generatorfactory.h"

#include "colors.h"

#include "keys.h"

@@ -42,6 +40,7 @@ int line = 1 ;

extern int stylesc_lex() ;

extern FILE *stylesc_in ;

+extern int stylesc_lex_destroy (void);

static string bodyBgColor;

@@ -74,15 +73,15 @@ static GeneratorFactory *generatorFactory;

stylefile : { /* allow empty files */ }

| statements

;

statements : statements statement

| statement

;

statement : option

| bodybgcolor

;

option : keylist color bgcolor

{

printSequence( 1�h�� ) ;

@@ -164,7 +163,7 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory

string &bodyBgColor_)

{

generatorFactory = genFactory;

// opens the file for yylex

stylesc_in = open_data_file_stream(path, name);

@@ -173,29 +172,33 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory

printMessage_noln( "Parsing ", cerr ) ;

printMessage_noln (current_file, cerr);

printMessage( " file ...", cerr ) ;

bodyBgColor = "";

yyparse() ;

bodyBgColor_ = bodyBgColor;

printMessage( "Parsing done!", cerr ) ;

fclose(stylesc_in);

+ // free scanner memory

+ stylesc_lex_destroy();

}

void

yyerror( char *s )

{

parseStyleError(s);

- exit(EXIT_FAILURE);

}

-void parseStyleError(const std::string &error)

+void parseStyleError(const std::string &error, bool exit)

{

- ostringstream str ;

- str << current_file << ":" << line << ": " << error;

- printError( str.str(), cerr ) ;

+ if (exit)

+ exitError(current_file, line, error);

+ else {

+ printError(current_file, line, error);

+ }

}

void updateBgColor(const std::string *c)

@@ -204,7 +207,7 @@ void updateBgColor(const std::string *c)

yyerror("bgcolor already defined");

else

bodyBgColor = *c;

// we don't need it anymore

delete c;

}

diff --git a/src/lib/stylescanner.ll b/src/lib/stylescanner.ll
index c5b5c19..0baf33f 100644
--- a/src/lib/stylescanner.ll
+++ b/src/lib/stylescanner.ll

@@ -60,7 +60,7 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])*

STRING \"[^\"\n]*\"

-%s COMMENT_STATE STRING_STATE

+%s COMMENT_STATE STRING_STATE

@@ -76,7 +76,7 @@ STRING \"[^\"\n]*\"

<INITIAL>\#[a-fA-F0-9]{6} {

const std::string wrong = yytext ;

- parseStyleError("use of direct colors has changed");

+ parseStyleError("use of direct colors has changed", false);

parseStyleError("use double quoted syntax: \"" + wrong + "\" instead of " + wrong);

exit(EXIT_FAILURE);

return COLOR ;

@@ -108,16 +108,6 @@ STRING \"[^\"\n]*\"

\n { ++line ; }

-<<EOF>> {

- DEB("reached EOF of the style file");

- DEB("freeing scanner memory");

- /* For non-reentrant C scanner only. */

- yy_delete_buffer(YY_CURRENT_BUFFER);

- yyterminate();

<INITIAL>. { /* anything else will generate a parsing error */ return yytext[0] ; }

diff --git a/src/lib/test_langinfer.cpp b/src/lib/test_langinfer.cpp
index b683cf3..a4c8994 100644
--- a/src/lib/test_langinfer.cpp
+++ b/src/lib/test_langinfer.cpp

@@ -43,6 +43,10 @@ main()

testInfer("#!/bin/sh\n# -*- tcl, as specified in Emacs -*-", "tcl");

+ // try with the env specification

+ testInfer("#! /usr/bin/env python", "python");

+ testInfer("#! /bin/env perl", "perl");

// just a small check for tolower

string s = "Lisp";

Utils::toLower(s);

diff --git a/src/lib/test_regexpreprocessor_main.cpp b/src/lib/test_regexpreprocessor_main.cpp
new file mode 100644
index 0000000..daa5a9d
--- /dev/null
+++ b/src/lib/test_regexpreprocessor_main.cpp

@@ -0,0 +1,167 @@

+/*

+ *

+ * This program is free software; you can redistribute it and/or modify

+ * it under the terms of the GNU General Public License as published by

+ * the Free Software Foundation; either version 2 of the License, or

+ * (at your option) any later version.

+ *

+ * This program is distributed in the hope that it will be useful,

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ * GNU General Public License for more details.

+ *

+ * You should have received a copy of the GNU General Public License

+ * along with this program; if not, write to the Free Software

+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

+ *

+ */

+/*

+ * This program is part of GNU source-highlight

+ *

+ * This tests regex preprocessing

+ */

+#include <iostream>

+#include <boost/regex.hpp>

+#include <algorithm>

+#include "asserttest.h"

+#include "regexpreprocessor.h"

+using namespace std;

+// variable to test results for all the tests

+int result = 0;

+void testPreprocess(const string &original, const string &expected) {

+ cout << "original : " << original << endl;

+ const string &preprocessed = RegexPreProcessor::preprocess(original);

+ cout << "preprocessed : " << preprocessed << endl;

+ result += assertEquals(expected, preprocessed);

+void testMakeNonSensitive(const string &original, const string &expected) {

+ cout << "original : " << original << endl;

+ const string &preprocessed = RegexPreProcessor::make_nonsensitive(original);

+ cout << "preprocessed : " << preprocessed << endl;

+ result += assertEquals(expected, preprocessed);

+void testOnlyNumOfMarkedSubexpressions(const string &original,

+ unsigned int expected) {

+ cout << "original : " << original << endl;

+ unsigned int found = RegexPreProcessor::num_of_subexpressions(original);

+ cout << "found : " << found << endl;

+ result += assertEquals(expected, found);

+void testNumOfMarkedSubexpressions(const string &original,

+ unsigned int expected, const string &error = "") {

+ cout << "original : " << original << endl;

+ subexpressions_info

+ sexp = RegexPreProcessor::num_of_marked_subexpressions(original);

+ unsigned int found = sexp.marked;

+ cout << "found : " << found << endl;

+ if (sexp.errors.size())

+ cout << "error : " << sexp.errors << endl;

+ result += assertEquals(expected, found);

+ result += assertEquals(error, sexp.errors);

+void testBackReference(const string &original, bool expected) {

+ cout << "original : " << original << endl;

+ bool found = RegexPreProcessor::contains_backreferences(original);

+ cout << "found : " << found << endl;

+ result += assertEquals(expected, found);

+void testSplit(const string &original, const subexpressions_strings &expected) {

+ const subexpressions_strings *split;

+ split = RegexPreProcessor::split_marked_subexpressions(original);

+ cout << "split : ";

+ std::copy(split->begin(), split->end(), std::ostream_iterator<string>(cout));

+ cout << endl;

+ if (!std::equal(split->begin(), split->end(), expected.begin())) {

+ ++result;

+ cout << "are not equal!" << endl;

+ cout << "expected : ";

+ std::copy(expected.begin(), expected.end(),

+ std::ostream_iterator<string>(cout));

+ }

+int main() {

+ cout << boolalpha;

+ testPreprocess("simple", "simple");

+ testPreprocess("(inside)", "(?:inside)");

+ testPreprocess("(dou(b)le)", "(?:dou(?:b)le)");

+ testMakeNonSensitive("foo", "[Ff][Oo][Oo]");

+ testOnlyNumOfMarkedSubexpressions("none", 0);

+ testOnlyNumOfMarkedSubexpressions("just (one)", 1);

+ testOnlyNumOfMarkedSubexpressions("(3 of (them)) just (one)", 3);

+ testOnlyNumOfMarkedSubexpressions("none \\(", 0);

+ testOnlyNumOfMarkedSubexpressions("(?: again) none \\(", 0);

+ testNumOfMarkedSubexpressions("none", 0,

+ subexpressions_info::ERR_OUTSIDE_SUBEXP);

+ testNumOfMarkedSubexpressions("just (one)", 0,

+ subexpressions_info::ERR_OUTSIDE_SUBEXP);

+ testNumOfMarkedSubexpressions("(3 of (them)) just (one)", 1,

+ subexpressions_info::ERR_NESTED_SUBEXP);

+ testNumOfMarkedSubexpressions("none \\(", 0,

+ subexpressions_info::ERR_OUTSIDE_SUBEXP);

+ testNumOfMarkedSubexpressions("(?: again) none \\(", 0,

+ subexpressions_info::ERR_OUTER_UNMARKED);

+ testNumOfMarkedSubexpressions("(just one)", 1);

+ testNumOfMarkedSubexpressions("(just one (?:some) and unmarked)", 1);

+ testNumOfMarkedSubexpressions("(just one \\( and escapes)", 1);

+ testNumOfMarkedSubexpressions("(just one \$ and \$ escapes)", 1);

+ testNumOfMarkedSubexpressions("(one) ", 1,

+ subexpressions_info::ERR_OUTSIDE_SUBEXP);

+ testNumOfMarkedSubexpressions("(one", 1,

+ subexpressions_info::ERR_UNBALANCED_PAREN);

+ testNumOfMarkedSubexpressions("(one))", 1,

+ subexpressions_info::ERR_UNBALANCED_PAREN);

+ testNumOfMarkedSubexpressions("(one)(two)((?:three)*)", 3);

+ testNumOfMarkedSubexpressions("(one) (two)", 1,

+ subexpressions_info::ERR_OUTSIDE_SUBEXP);

+ subexpressions_strings expected;

+ expected.push_back("(this)");

+ expected.push_back("(is)");

+ expected.push_back("(one)");

+ testSplit("(this)(is)(one)", expected);

+ expected.clear();

+ expected.push_back("(this)");

+ expected.push_back("(contains \$ some \$ other parenthesis)");

+ expected.push_back("(and (?:non marked) ones)");

+ testSplit("(this)(contains \$ some \$ other parenthesis)(and (?:non marked) ones)", expected);

+ testBackReference("this does not contain any", false);

+ testBackReference("this does contain \1�~ one", true);

+ testBackReference("and also this one (?(2)...) does", true);

+ testBackReference("while this one (?(foo)...) does NOT does", false);

+ return result;

diff --git a/src/lib/textformatter.cpp b/src/lib/textformatter.cpp
index bf681fb..cf87460 100644
--- a/src/lib/textformatter.cpp
+++ b/src/lib/textformatter.cpp

@@ -4,42 +4,334 @@

// Description:

// Copyright: See COPYING file that comes with this distribution

#include "textformatter.h"

+#include "textgenerator.h"

+#include "preformatter.h"

+#include "mainoutputbuffer.h"

+#include "messages.h"

+#include "fileutil.h"

+#include "fileinfo.h"

-#include "maingeneratormap.h"

+#include <boost/regex.hpp>

+#include <list>

using namespace std;

-TextFormatter::TextFormatter()

+bool isTaggable(const string &elem) {

+ return elem.find_first_of(' ')== string::npos;

+static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)");

+static SubstitutionMapping ref_substitutionmapping;

+#define SPACE 2

+#define NOT_SPACE 1

+TextFormatter::TextFormatter(PreFormatter *pf) :

+ default_generator(0), preformatter(pf), noOptimizations(false),

+ generateReferences(false), ctags_file(0) {

+TextFormatter::TextFormatter(PreFormatter *pf, const string &_ctags_file_name,

+ const TextStyles::RefTextStyle &r, RefPosition pos) :

+ default_generator(0), preformatter(pf), noOptimizations(false),

+ generateReferences(true), ctags_file_name(_ctags_file_name),

+ refstyle(r), refposition(pos) {

+ ctags_file = tagsOpen(ctags_file_name.c_str(), &info);

+ if (ctags_file == 0) {

+ exitError("cannot open tag file: " + ctags_file_name);

+ }

+TextFormatter::~TextFormatter() {

+ for (MapType::const_iterator it = textformatter.begin(); it != textformatter.end(); ++it)

+ delete it->second;

+ if (ctags_file)

+ tagsClose(ctags_file);

+void TextFormatter::setDefaultGenerator(TextGenerator *gen) {

+ default_generator = gen;

}

-TextFormatter::~TextFormatter()

+void TextFormatter::addGenerator(const std::string &elem, TextGenerator *gen) {

+ textformatter[elem] = gen;

}

-void

-TextFormatter::format(const string &elem, const string &text, const FileInfo *p)

- if (! text.size())

- return;

+TextGenerator *TextFormatter::hasGenerator(const string &elem) {

+ MapType::const_iterator it = textformatter.find(elem);

+ if (it == textformatter.end())

+ return 0;

+ return it->second;

+TextGenerator *TextFormatter::getGenerator(const string &elem) {

+ MapType::const_iterator it = textformatter.find(elem);

+ if (it == textformatter.end()) {

+ // create a copy of the prototype and substitute the style.

+ TextGenerator *missing = new TextGenerator(*default_generator);

+ missing->subst_style(elem);

+ textformatter[elem] = missing;

+ return missing;

+ }

- generatormap->generateEntire(elem, text, p);

+ return it->second;

}

-void

-TextFormatter::format_nl(const string &text)

- generatormap->generateNL(text);

+void TextFormatter::addNoReference(const std::string &elem) {

+ noreferences.insert(elem);

+bool TextFormatter::isNoReference(const std::string &elem) const {

+ return (noreferences.find(elem) != noreferences.end());

+const string TextFormatter::generateString(const std::string &elem,

+ const std::string &s, const FileInfo *p) {

+ if (!generateReferences || isNoReference(elem)) {

+ return generateStringNoRef(elem, s);

+ } else {

+ return generateStringAndRef(elem, s, p);

+ }

+const string TextFormatter::generateStringNoRef(const std::string &elem,

+ const std::string &s) {

+ return getGenerator(elem)->generateEntire(preformatter->preformat(s));

+void TextFormatter::generateEntire(const std::string &elem,

+ const std::string &s, const FileInfo *p) {

+ if (noOptimizations) {

+ // we generate the element right now, since during debugging

+ // we want to be very responsive

+ if (s.size())

+ output(generateString(elem, s, p));

+ return;

+ }

+ // otherwise we optmize output generation: delay formatting a specific

+ // element until we deal with another element; this way strings that belong

+ // to the same element are formatted using only one tag: e.g.,

+ // <comment>/* mycomment */</comment>

+ // instead of

+ // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment>

+ if (elem == current_elem) {

+ elem_buffer << s;

+ } else {

+ // first format the buffered string

+ const string toformat = elem_buffer.str();

+ if (toformat.size())

+ output(generateString(current_elem, toformat, p));

+ // then start a new buffer

+ elem_buffer.str("");

+ elem_buffer << s;

+ current_elem = elem;

+ current_file_info = p;

+ }

+void TextFormatter::generateNL(const std::string &text) {

+ // first format the buffered string

+ flush();

+ string preformat_text = preformatter->preformat(text);

+ if (preformat_text == text)

+ preformat_text = "\n";

+ outputbuffer->output_ln(preformat_text);

+void TextFormatter::flush() {

+ const string &remainder = elem_buffer.str();

+ if (remainder.size()) {

+ output(generateString(current_elem, remainder, current_file_info));

+ elem_buffer.str("");

+ current_elem = "";

+ // each line is handled separately

+ }

+void TextFormatter::output(const string &s) {

+ outputbuffer->output(s);

+//#define DEBUGREF

+#ifdef DEBUGREF

+#include <iostream>

+#define DEB(x) cerr << x << endl;

+#define DEB2(x) cerr << x ;

+#else

+#define DEB(x) ;

+#define DEB2(x) ;

+#endif

+/*

+ * separates a line in block of spaces and block of non spaces.

+ * the stringbuffer tokens stores the pieces seen so far for which no

+ * entry in the tag file was found.

+ *

+ * for each block of non spaces tries to look for an entry in the tag.

+ * if it finds it flushes the stringbuffer tokens (by passing its contents

+ * to the parent class implementation of generateString).

+ *

+ * For instance (notice the spaces among the +)

+ * "myfield + myfield2 + myfield3

+ * if only an entry for myfield2 is found, the we will generate

+ * 3 blocks:

+ * "myfield + "

+ * "myfield2"

+ * " + myfield3"

+ */

+const std::string TextFormatter::generateStringAndRef(const std::string& elem,

+ const std::string& s, const FileInfo* fileinfo) {

+ buffer.str("");

+ ostringstream tokens;

+ boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex);

+ boost::sregex_iterator j;

+ while (i != j) {

+ if ((*i)[SPACE].matched) {

+ tokens << string((*i)[SPACE].first, (*i)[SPACE].second);

+ } else {

+ string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second);

+ string found_refs = generateRefInfo(elem, not_spaces, fileinfo);

+ if (found_refs.size()) {

+ const string &previous = tokens.str();

+ if (previous.size()) {

+ buffer << generateStringNoRef(elem, previous);

+ tokens.str("");

+ }

+ buffer << found_refs;

+ } else {

+ tokens << not_spaces;

+ }

+ ++i;

+ }

+ const string &remainder = tokens.str();

+ if (remainder.size()) {

+ buffer << generateStringNoRef(elem, remainder);

+ }

+ return buffer.str();

+struct RefEntry {

+ string filename;

+ unsigned long linenumber;

+};

+const string TextFormatter::generateRefInfo(const std::string& elem,

+ const std::string& s, const FileInfo* fileinfo) {

+ tagEntry entry;

+ bool found = false; // whether we found a tag

+ bool found_anchor = false; // whether we found an anchor

+ string output;

+ typedef list<RefEntry> FoundRefList;

+ FoundRefList foundreflist;

+ DEB("inspecting " + s)

+ if (tagsFind(ctags_file, &entry, s.c_str(), TAG_FULLMATCH)== TagSuccess) {

+ found = true;

+ do {

+ RefEntry refentry;

+ refentry.filename = entry.file;

+ if ((refentry.filename == fileinfo->filename ||

+ refentry.filename == fileinfo->input_file_name) &&entry.address.lineNumber == fileinfo->line) {

+ ostringstream gen_info;

+ // we just found the reference to this very element

+ // so we must generate an anchor

+ gen_info << entry.address.lineNumber;DEB(" found anchor " + gen_info.str());

+ ref_substitutionmapping["$text"] = preformatter->preformat(s);

+ ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename);

+ ref_substitutionmapping["$infile"] = refentry.filename;

+ ref_substitutionmapping["$linenum"] = gen_info.str();

+ output = refstyle.anchor.output(ref_substitutionmapping);

+ found_anchor = true;

+ break;

+ }

+ DEB2(" found " + string(entry.name) + " : ");DEB(entry.address.lineNumber);

+ refentry.linenumber = entry.address.lineNumber;

+ foundreflist.push_back(refentry);

+ } while (tagsFindNext(ctags_file, &entry)== TagSuccess);

+ }

+ if (found) {

+ if (! found_anchor) {

+ ref_substitutionmapping["$text"] = preformatter->preformat(s);

+ TextStyle *referencestyle = 0;

+ if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE)

+ referencestyle = &(refstyle.postline_reference);

+ else if (refposition == POSTDOC)

+ referencestyle = &(refstyle.postdoc_reference);

+ else

+ referencestyle = &(refstyle.inline_reference);

+ for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) {

+ ostringstream gen_info;

+ // we found where this element appears so we generate a reference

+ // if it's a link in the same file, we use the output_file_name...

+ if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name)

+ gen_info << fileinfo->output_file_name;

+ else

+ gen_info << it->filename << fileinfo->output_file_extension;

+ // ...otherwise we build the referenced file by using the output_file_extension

+ // in fact, in this case, it probably means that multiple input files have been specified

+ ref_substitutionmapping["$outfile"] = gen_info.str();

+ ref_substitutionmapping["$infilename"] = strip_file_path(it->filename);

+ ref_substitutionmapping["$infile"] = it->filename;

+ gen_info.str("");

+ gen_info << it->linenumber;

+ ref_substitutionmapping["$linenum"] = gen_info.str();

+ output += referencestyle->output(ref_substitutionmapping);

+ // if the following is true, it means that there more than one reference

+ if (foundreflist.size()> 1 || refposition != INLINE) {

+ output += preformatter->preformat("\n");

+ if (refposition == POSTLINE || refposition == INLINE) {

+ outputbuffer->output_postline(output);

+ } else { // (refposition == POSTDOC)

+ outputbuffer->output_post(output);

+ }

+ output = ""; // no need to modify the current element

+ }

+ return output;

+void TextFormatter::format(const string &elem, const string &text,

+ const FileInfo *p) {

+ if (! text.size())

+ return;

+ generateEntire(elem, text, p);

}

-void

-TextFormatter::flush()

- generatormap->flush();

+void TextFormatter::format_nl(const string &text) {

+ generateNL(text);

}

diff --git a/src/lib/textformatter.h b/src/lib/textformatter.h
index 3d6230c..ec0bf6e 100644
--- a/src/lib/textformatter.h
+++ b/src/lib/textformatter.h

@@ -9,33 +9,119 @@

// Copyright: See COPYING file that comes with this distribution

-#ifndef TEXTFORMATTER_H

-#define TEXTFORMATTER_H

+#ifndef GENERATORMAP_H

+#define GENERATORMAP_H

+#include <map>

#include <string>

-#include <boost/shared_ptr.hpp>

+#include <set>

+#include "my_sstream.h"

+#include "readtags.h"

+#include "textstyles.h"

+#include "refposition.h"

+class TextGenerator;

+class PreFormatter;

+class FileInfo;

/**

-format text

+ * Formats the elements of a source file, using a map of generators;

+ * associate a generator for each program element, e.g., keyword, string, etc.

+ *

+ * @author Lorenzo Bettini

+ */

+class TextFormatter {

+protected:

+ typedef std::map<std::string, TextGenerator *> MapType;

+ typedef std::set<std::string> NoRefType;

+ MapType textformatter;

+ /// those elements for which no reference info is generated

+ NoRefType noreferences;

+ TextGenerator *default_generator;

+ PreFormatter *preformatter;

+ /// where we buffer strings for the current elem

+ std::ostringstream elem_buffer;

+ /// the element that is currently buffered

+ std::string current_elem;

+ /// concerns the element currently buffered

+ const FileInfo *current_file_info;

+ /// whether to turn off optimizazionts (such as buffering), default: false

+ bool noOptimizations;

-@author Lorenzo Bettini

-*/

+ bool generateReferences;

-class FileInfo;

+ // for references

+ const std::string ctags_file_name;

+ TextStyles::RefTextStyle refstyle;

+ RefPosition refposition;

+ tagFile *ctags_file;

+ tagFileInfo info;

+ std::ostringstream buffer;

+ const std::string generateString(const std::string &elem,

+ const std::string &s, const FileInfo *);

+ const std::string generateStringAndRef(const std::string &elem,

+ const std::string &s, const FileInfo *);

+ const std::string generateStringNoRef(const std::string &elem,

+ const std::string &s);

+ /**

+ * Generates the reference information

+ */

+ const std::string generateRefInfo(const std::string& elem,

+ const std::string& s, const FileInfo* arg1);

+ /**

+ * Actually performs the output through an OutputBuffer

+ */

+ void output(const std::string &s);

+ void generateEntire(const std::string &elem, const std::string &s,

+ const FileInfo *);

+ void generateNL(const std::string &s);

-class TextFormatter

public:

- TextFormatter();

+ TextFormatter(PreFormatter *);

- ~TextFormatter();

+ TextFormatter(PreFormatter *pf, const std::string &_ctags_file_name,

+ const TextStyles::RefTextStyle &r, RefPosition pos);

- void format(const std::string &elem, const std::string &text,

- const FileInfo *);

- void format_nl(const std::string &text = "\n");

- void flush();

-};

+ ~TextFormatter();

+ /**

+ * Returns the generator for the specific element name or null if

+ * there's no generator for the element

+ * @param elem

+ * @return

+ */

+ TextGenerator *hasGenerator(const std::string &elem);

+ /**

+ * Retrieves the generator for a specific element; if it doesn't find it,

+ * it creates a generator for that element, using the default generator

+ * (i.e., the one for "normal" element)

+ * @param elem

+ * @return

+ */

+ TextGenerator *getGenerator(const std::string &elem);

+ void addGenerator(const std::string &elem, TextGenerator *gen);

+ void addNoReference(const std::string &elem);

+ bool isNoReference(const std::string &elem) const;

-typedef boost::shared_ptr<TextFormatter> TextFormatterPtr;

+ void setDefaultGenerator(TextGenerator *g);

+ void flush();

+ void setNoOptimizations(bool n) {

+ noOptimizations = n;

+ }

+ void format(const std::string &elem, const std::string &text,

+ const FileInfo *);

+ void format_nl(const std::string &text = "\n");

+};

#endif

diff --git a/src/lib/tostringcollection.h b/src/lib/tostringcollection.h
index 99b753d..a4b20f6 100644
--- a/src/lib/tostringcollection.h
+++ b/src/lib/tostringcollection.h

@@ -1,10 +1,8 @@

-// C++ Interface: %{MODULE}

+// Description: given a collection generates a string representation

-// Description:

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

+// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it>

// Copyright: See COPYING file that comes with this distribution

@@ -16,6 +14,13 @@

#include <string>

#include "my_sstream.h"

+/**

+ * Converts a collection of objects with method toString into a string,

+ * using the passed separator to separate the elements.

+ *

+ * @param collection

+ * @param sep

+ */

template <class T>

const std::string toStringCollection(const T *collection, char sep = ' ')

{

@@ -32,4 +37,51 @@ const std::string toStringCollection(const T *collection, char sep = ' ')

return buf.str();

}

+/**

+ * Converts a collection of objects with method toStringOriginal into a string,

+ * using the passed separator to separate the elements.

+ *

+ * @param collection

+ * @param sep

+ */

+template <class T>

+const std::string toStringOriginalCollection(const T *collection, char sep = ' ')

+ std::ostringstream buf;

+ for (typename T::const_iterator it = collection->begin();

+ it != collection->end(); )

+ {

+ buf << (*it)->toStringOriginal();

+ if (++it != collection->end())

+ buf << sep;

+ }

+ return buf.str();

+/**

+ * Converts a collection of objects into a string,

+ * using the passed separator to separate the elements.

+ *

+ * @param collection

+ * @param sep

+ */

+template <class T>

+const std::string collectionToString(const T *collection, char sep = ' ')

+ std::ostringstream buf;

+ for (typename T::const_iterator it = collection->begin();

+ it != collection->end(); )

+ {

+ buf << (*it);

+ if (++it != collection->end())

+ buf << sep;

+ }

+ return buf.str();

#endif // TOSTRINGCOLLECTION_H

diff --git a/src/log.lang b/src/log.lang
index af1611e..6a7b946 100644
--- a/src/log.lang
+++ b/src/log.lang

@@ -39,9 +39,7 @@ ip = $ip

string = "root","failure"

-state normal = '(port|pid)[[:blank:]]' begin

- port = '[[:digit:]]+' exit

-end

+(normal,port) = `((?:port|pid)[[:blank:]])([[:digit:]]+)`

state normal start '[[:blank:]](?=(IN|OUT)=)' begin

state normal = '(IN|OUT|PROTO)=(?=[^[:blank:]]+)' begin

diff --git a/src/logtalk.lang b/src/logtalk.lang
index d9b5405..b2c03ff 100644
--- a/src/logtalk.lang
+++ b/src/logtalk.lang

@@ -55,14 +55,14 @@ variable = '\<[A-Z_][A-Za-z0-9_]*'

cbracket = "{|}"

- '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol) | dynamic)\.'

+ '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol)|dynamic)\.'

preproc = '\<(extends|i(nstantiates|mp(lements|orts))|specializes)(?=\()'

normal = '\<[a-z][A-Za-z0-9_]*'

-number = '0\'[A-Za-z0-9] | 0b[0-1]+ | 0o[0-7]+ | 0x[0-9a-fA-F]+ | [0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?'

+number = '0\'[A-Za-z0-9]|0b[0-1]+|0o[0-7]+|0x[0-9a-fA-F]+|[0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?'

symbol = "::", "^^",

">>", "<<", "/\\", "\\/", "\\",

diff --git a/src/outlang.map b/src/outlang.map
index b8907e5..9a0ced6 100644
--- a/src/outlang.map
+++ b/src/outlang.map

@@ -16,4 +16,5 @@ latexcolor = latexcolor.outlang

latexcolor-doc = latexcolordoc.outlang

texinfo = texinfo.outlang

javadoc = javadoc.outlang

-docbook = docbook.outlang \ No newline at end of file

+docbook = docbook.outlang

+docbook-doc = docbookdoc.outlang \ No newline at end of file

diff --git a/src/perl.lang b/src/perl.lang
index 8c3672b..00d1011 100644
--- a/src/perl.lang
+++ b/src/perl.lang

@@ -1,19 +1,117 @@

preproc = "import"

+# these might be unreadable but I don't know how else to do that...

+regexp = 's\{(\\\}|[^}])*\}\{(\\\}|[^}])*\}[ixsmogce]*'

+regexp = 's$(\\$|[^)])*\)$(\\$|[^)])*\)[ixsmogce]*'

+regexp = 's\[(\\\]|[^\]])*\]\[(\\\]|[^\]])*\][ixsmogce]*'

+regexp = 's<.*><.*>[ixsmogce]*'

+# the last (lookahead) expression is used to deal with # used

+# as a delimiter.

+# otherwise, with a line such as

+# s#foo\###; # my comment

+# the # of the comment would be match as the closing delimiter

+regexp = `s([^[:alnum:][:blank:]]).*1�~.*1�~[ixsmogce]*(?=[[:blank:]]*(\)|;))`

+# this is to deal with cases where the delimiters for the first and the

+# second part are not the same (and spaces are allowed between the first

+# closing and the second opening)

+regexp = `s([^[:alnum:][:blank:]]).*1�~[[:blank:]]*([^[:alnum:][:blank:]]).*2�~[ixsmogce]*(?=[[:blank:]]*(\)|;))`

include "script_comment.lang"

include "number.lang"

+# this won't work if # has something (non blank) before

+vardef comment_in_exp = '[[:blank:]]+#.*'

+vardef var_in_exp = '\$([[:word:]]+|\{[[:word:]]+\})'

+# this is to highlight correctly regular expressions

+# (and don't mix them with { } code blocks

+environment keyword = '(m|qr)(?=\{)' begin

+ environment regexp = '\{' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\\{|\\\}'

+ regexp = "}" exitall

+ end

+end

+# repeat for other non alpha numerical chars

+environment keyword = '(m|qr)(?=#)' begin

+ environment regexp = '#' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\#'

+ regexp = "#" exitall

+ end

+end

+environment keyword = '(m|qr)(?=\|)' begin

+ environment regexp = '\|' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\\|'

+ regexp = "\|" exitall

+ end

+end

+environment keyword = '(m|qr)(?=@)' begin

+ environment regexp = '@' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\@'

+ regexp = "@" exitall

+ end

+end

+environment keyword = '(m|qr)(?=<)' begin

+ environment regexp = '<' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\<|\\>'

+ regexp = ">" exitall

+ end

+end

+environment keyword = '(m|qr)(?=\[)' begin

+ environment regexp = '\[' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\]'

+ regexp = "]" exitall

+ end

+end

+environment keyword = '(m|qr)(?=\\)' begin

+ environment regexp = '\\' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\\\'

+ regexp = "\\" exitall

+ end

+end

+environment keyword = '(m|qr)(?=/)' begin

+ environment regexp = '/' begin

+ comment = $comment_in_exp

+ variable = $var_in_exp

+ regexp = '\\/'

+ regexp = "/" exitall

+ end

+end

string delim "\"" "\"" escape "\\"

string delim "'" "'" escape "\\"

string delim "<" ">"

string = '[[:word:]]*/[^\n]*/[[:word:]]*'

comment delim '^\=(?:head1|head2|item)' '\=cut' multiline

-type = '(?:\$[#]?|@|%)[[:word:]]+'

+variable = '(?:\$[#]?|@|%)[/[:word:]]+'

include "symbols.lang"

diff --git a/src/postscript.lang b/src/postscript.lang
index b0842c6..0ffeaf3 100644
--- a/src/postscript.lang
+++ b/src/postscript.lang

variable = $ID

+environment string delim "(" ")" multiline nested begin

+ specialchar = '\\.'

+end

comment start "%"

include "number.lang"

diff --git a/src/regexpengine.cpp b/src/regexpengine.cpp
deleted file mode 100644
index fda8409..0000000
--- a/src/regexpengine.cpp
+++ /dev/null

@@ -1,177 +0,0 @@

-//

-// C++ Implementation: regexpengine

-//

-// Description:

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "regexpengine.h"

-RegExpEngine::~RegExpEngine()

-#include <fstream>

-#include <iostream>

-#include <stdlib.h>

-#include "maingeneratormap.h"

-#include "keys.h"

-#include "langdefloader.h"

-#include "messages.h"

-#include "textformatter.h"

-#include "parserinfo.h"

-// purpose:

-// takes the contents of a file and transform to

-// syntax highlighted code in html format

-using namespace std;

-typedef enum { FOUND_EOF=0, FOUND_NL, FOUND_END } load_line_ret;

-load_line_ret

-load_line(std::string& s, std::istream& is)

- s.erase();

- if (is.bad() || is.eof())

- return FOUND_EOF;

- char c;

- while (is.get(c))

- {

- if (c == '\n')

- return FOUND_NL;

- if (c != '\r')

- s.append(1, c);

- }

- return FOUND_END;

-void

-RegExpEngine::process_file(const char *file)

- istream *is = 0;

- if (file)

- {

- is = new ifstream(file);

- if (!is || ! (*is))

- {

- cerr << "Error in opening " << file

- << " for input" << endl ;

- exit(1) ;

- }

- else

- is = &cin;

- std::string s;

- std::string::const_iterator start, end;

- boost::match_results<std::string::const_iterator> what;

- boost::match_flag_type flags;

- initial_state = currentstate;

- fileinfo->line = 1;

- load_line_ret ret;

- while ((ret = load_line(s, *is)) != FOUND_EOF)

- {

- bool matched = true;

- bool found_eol = false;

- start = s.begin();

- end = s.end();

- flags = boost::match_default;

- while (matched) {

- if (boost::regex_search(start, end, what, currentstate->reg_exp, flags))

- {

- string prefix = what.prefix();

- if (prefix.size())

- format(-1, prefix);

- for (unsigned int i = 1; i < what.size(); ++i) {

- if (what[i].matched) {

- format(i, string(what[i].first, what[i].second));

- if (currentstate->formatters[i]->getNextState()) {

- enterState(i);

- } else if (currentstate->formatters[i]->exit_state_level) {

- if (currentstate->formatters[i]->exit_all) {

- exitAll();

- } else {

- exitState(currentstate->formatters[i]->exit_state_level);

- }

- start = what[i].second;

- if (!(*start)) {

- if (found_eol)

- matched = false; // we had already matched end of line

- found_eol = true;

- }

- break; // no other match is possible

- }

- if (what[0].first != what[0].second) // matched more than 0

- flags |= boost::match_not_bol;

- }

- else

- {

- format(-1, string(start, end));

- matched = false;

- }

- if (ret == FOUND_NL)

- formatter->format_nl("\n");

- (fileinfo->line)++;

- }

- formatter->flush();

- if (file)

- delete is;

- currentstate = initial_state; // reset the initial state

-void

-RegExpEngine::enterState(int index)

- states_stack.push(currentstate);

- currentstate = currentstate->formatters[index]->getNextState();

-void

-RegExpEngine::exitState(int level)

- // remove additional levels

- for (int l = 1; l < level; ++l)

- states_stack.pop();

- currentstate = states_stack.top();

- states_stack.pop();

-void

-RegExpEngine::exitAll()

- currentstate = initial_state;

- states_stack = stack_of_states();

-void

-RegExpEngine::format(int index, const std::string &s)

- formatter->format(currentstate->get_elem(index), s, fileinfo);

diff --git a/src/regexpstate.cpp b/src/regexpstate.cpp
deleted file mode 100644
index 997db08..0000000
--- a/src/regexpstate.cpp
+++ /dev/null

@@ -1,83 +0,0 @@

-//

-// C++ Implementation: %{MODULE}

-//

-// Description:

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#include "regexpstate.h"

-#include "keys.h"

-#include "messages.h"

-#include <stdlib.h>

-using namespace std;

-int RegExpState::global_id = 1;

-RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit, bool all) :

- elem(el), exit_state_level(exit), exit_all(all), next_state(r)

-void RegExpFormatter::setNextState(RegExpStatePtr r)

- next_state_strong = r;

-RegExpStatePtr RegExpFormatter::getNextState() const

- RegExpStatePtr next = next_state.lock();

- if (!next)

- return next_state_strong;

- return next;

-/**

- * Return the formatter associated to the passed index.

- * If the index is negative, it returns the default formatter.

- * @param index

- * @return

- */

-const string &

-RegExpState::get_elem(int index)

- return formatters[(index<0 ? 0 : index)]->elem;

-void

-RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f)

- const string &ex = buffer.str();

- if (ex.size())

- buffer << "|";

- buffer << s;

- formatters.push_back(f);

- subExpressions.push_back(make_pair(s, *parserInfo));

-void

-RegExpState::freeze()

- const string &buffered = buffer.str();

- try {

- reg_exp.assign(buffered);

- } catch (boost::bad_expression &e) {

- printError("bad expression: " + buffered);

- throw;

- }

-void

-RegExpState::set_default_formatter(RegExpFormatterPtr f)

- formatters[0] = f;

diff --git a/src/regexpstate.h b/src/regexpstate.h
deleted file mode 100644
index f38ea51..0000000
--- a/src/regexpstate.h
+++ /dev/null

@@ -1,78 +0,0 @@

-//

-// C++ Interface: %{MODULE}

-//

-// Description:

-//

-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}

-//

-// Copyright: See COPYING file that comes with this distribution

-//

-#ifndef REGEXPSTATE_H

-#define REGEXPSTATE_H

-#include <boost/regex.hpp>

-#include <boost/shared_ptr.hpp>

-#include <boost/weak_ptr.hpp>

-#include <deque>

-#include <vector>

-#include "my_sstream.h"

-#include "parserinfo.h"

-struct RegExpState;

-typedef boost::shared_ptr<RegExpState> RegExpStatePtr;

-typedef boost::weak_ptr<RegExpState> RegExpStatePtrW;

-struct RegExpFormatter

- const std::string elem; // the element represented

- int exit_state_level; // how many states we must leave

- bool exit_all;

- RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false);

- void setNextState(RegExpStatePtr r);

- RegExpStatePtr getNextState() const;

- private:

- RegExpStatePtrW next_state;

- RegExpStatePtr next_state_strong;

- /*

- FIXME

- the next_state is a weak pointer when there's a "nested" situation.

- This allows to avoid cycles, that otherwise would prevent memory from

- being correctly freed.

- */

-};

-typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr;

-typedef std::deque<RegExpFormatterPtr> format_vector;

-typedef std::pair<std::string, ParserInfo> SubExpressionInfo;

-typedef std::vector<SubExpressionInfo> SubExpressions;

-/**

-class representing a state for the regular expression engine

-@author Lorenzo Bettini

- */

-struct RegExpState

- static int global_id;

- const int id; // the identifier of the state

- boost::regex reg_exp;

- SubExpressions subExpressions;

- format_vector formatters;

- std::ostringstream buffer;

- RegExpState() : id(global_id++), formatters(1) {}

- const std::string &get_elem(int index = -1);

- void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);

- void freeze();

- void set_default_formatter(RegExpFormatterPtr f);

- RegExpFormatterPtr getLastFormatter() const { return formatters[formatters.size()-1];}

-};

-#endif

diff --git a/src/ruby.lang b/src/ruby.lang
index 711fb7e..dcf64b7 100644
--- a/src/ruby.lang
+++ b/src/ruby.lang

@@ -7,7 +7,8 @@ include "number.lang"

string delim "\"" "\"" escape "\\"

string delim "'" "'" escape "\\"

string delim "<" ">"

-string = '[[:word:]]*/[^\n]*/[[:word:]]*'

+regexp = '/[^\n]*/'

+(symbol,regexp) = `(%r)(\{(?:\\\}|#\{[[:alnum:]]+\}|[^}])*\})`

@@ -15,9 +16,17 @@ comment delim '(^\=begin)' '^(\=end)' multiline

type = '(\$[#]?|@@|@)([[:word:]]+|\'|\"|/)'

+# don't highlight ? and ! as symbols if they are part of a method call

+normal = '[[:alnum:]]+(\?|!)'

include "symbols.lang"

+# for variable interpolation, #{ is not a comment

+(symbol,cbracket) = `(#)(\{)`

cbracket = "{|}"

-include "function.lang"

+# no function highlighting for Ruby, since a method invocation

+# can be written even without parenthesis

+# include "function.lang"

diff --git a/src/startapp.cc b/src/startapp.cc
index a025801..5409637 100644
--- a/src/startapp.cc
+++ b/src/startapp.cc

@@ -1,5 +1,5 @@

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

@@ -41,11 +41,10 @@

#include "parsestyles.h"

#include "generatorfactory.h"

-#include "textformatter.h"

#include "srcuntabifier.h"

#include "chartranslator.h"

#include "langdefloader.h"

-#include "lineoutputgenerator.h"

+#include "outputgenerator.h"

#include "langmap.h"

#include "regexpengine.h"

#include "regexpenginedebug.h"

@@ -54,6 +53,7 @@

#include "outlangdefparserfun.h"

#include "fileinfo.h"

#include "stopwatch.h"

+#include "textformatter.h"

#include "languageinfer.h"

@@ -72,10 +72,9 @@ ostream* sout;

#include "envmapper.h"

#endif // BUILD_AS_CGI

unsigned int line_num_digit = 0; // num of digits to represent line number

-gengetopt_args_info args_info ; // command line structure

+gengetopt_args_info args_info; // command line structure

static void print_cgi_header();

static void run_ctags(const string &cmd);

@@ -84,668 +83,621 @@ static void run_ctags(const string &cmd);

* Print progress status information (provided --quiet is not specified)

* @param message

-static void progressInfo(const string &message)

- if (args_info.quiet_given)

- return;

+static void progressInfo(const string &message) {

+ if (args_info.quiet_given)

+ return;

- cerr << message;

+ cerr << message;

}

StartApp::StartApp() :

- docgenerator(0), formatter(0), preformatter(0),

- langmap(new LangMap), outlangmap(new LangMap), generator_factory(0),

- entire_doc (0), verbose (0), cssUrl (0),

- use_css (0), is_cgi (0), gen_version(true),

- generate_line_num(false), generate_ref(false)

+ docgenerator(0), preformatter(0), langmap(new LangMap),

+ outlangmap(new LangMap), generator_factory(0), entire_doc(0),

+ verbose(0), cssUrl(0), use_css(0), is_cgi(0), gen_version(true),

+ generate_line_num(false), generate_ref(false) {

}

-StartApp::~StartApp()

- // cout << "destroying StartApp..." << endl;

- cmdline_parser_free(&args_info);

- if (formatter)

- delete formatter;

+StartApp::~StartApp() {

+ // cout << "destroying StartApp..." << endl;

+ cmdline_parser_free(&args_info);

- if (preformatter)

- delete preformatter;

+ if (preformatter)

+ delete preformatter;

- if (docgenerator)

- delete docgenerator;

+ if (docgenerator)

+ delete docgenerator;

- if (generator_factory)

- delete generator_factory;

+ if (generator_factory)

+ delete generator_factory;

}

-int

-StartApp::start(int argc, char * argv[])

- char *docTitle;

- char *docHeader; // the buffer with the header

- char *docFooter; // the buffer with the footer

- const char *header_fileName = 0;

- const char *footer_fileName = 0;

- unsigned i;

- int v;

- int tabSpaces = 0;

+int StartApp::start(int argc, char * argv[]) {

+ char *docTitle;

+ char *docHeader; // the buffer with the header

+ char *docFooter; // the buffer with the footer

+ const char *header_fileName = 0;

+ const char *footer_fileName = 0;

+ unsigned i;

+ int v;

+ int tabSpaces = 0;

#ifdef BUILD_AS_CGI

- // map environment to parameters if used as CGI

- char **temp_argv;

- temp_argv = map_environment(&argc, argv);

- is_cgi = temp_argv != argv;

- argv = temp_argv;

+ // map environment to parameters if used as CGI

+ char **temp_argv;

+ temp_argv = map_environment(&argc, argv);

+ is_cgi = temp_argv != argv;

+ argv = temp_argv;

#endif // BUILD_AS_CGI

+ if ((v = cmdline_parser(argc, argv, &args_info)) != 0)

+ // calls cmdline parser. The user gived bag args if it doesn't return -1

+ return EXIT_FAILURE;

+ if (args_info.version_given) {

+ print_version();

+ print_copyright();

+ return EXIT_SUCCESS;

+ }

- if((v = cmdline_parser(argc, argv, &args_info)) != 0)

- // calls cmdline parser. The user gived bag args if it doesn't return -1

- return EXIT_FAILURE;

+ if (args_info.help_given) {

+ cout << "GNU ";

+ cmdline_parser_print_help();

+ print_reportbugs();

+ return EXIT_SUCCESS;

+ }

- if (args_info.version_given)

- {

- print_version ();

- print_copyright ();

- return EXIT_SUCCESS;

+ gen_version = (args_info.gen_version_flag != 0);

+ /* initialization of global symbols */

+ inputFileName = outputFileName = 0;

+ sout = 0;

+ docTitle = 0;

+ docHeader = 0;

+ docFooter = 0;

+ docTitle = args_info.title_arg;

+ header_fileName = args_info.header_arg;

+ footer_fileName = args_info.footer_arg;

+ verbose = args_info.verbose_given;

+ const string style_file = args_info.style_file_arg;

+ if (args_info.tab_given > 0)

+ tabSpaces = args_info.tab_arg;

+ if (header_fileName)

+ docHeader = read_file(header_fileName);

+ if (footer_fileName)

+ docFooter = read_file(footer_fileName);

+ cssUrl = args_info.css_arg;

+ use_css = ( cssUrl != 0 );

+ entire_doc =(! args_info.no_doc_given) &&( args_info.doc_given || (docTitle != 0) || use_css );

+ string inputFileName;

+ if (args_info.input_given)

+ inputFileName = args_info.input_arg;

+ string outputFileName;

+ if (inputFileName.size()&& ! is_cgi && args_info.output_given)

+ outputFileName = args_info.output_arg;

+ bool generate_to_stdout =(args_info.output_arg &&

+ strcmp (args_info.output_arg, "STDOUT") == 0);

+ if (verbose)

+ setMessager(new DefaultMessages);

+ printMessage( PACKAGE);

+ printMessage( VERSION);

+ printMessage(argv[0]);

+ if (verbose) {

+ printMessage("command line arguments: ");

+ for (int i = 0; i < argc; ++i) {

+ printMessage(argv[i]);

+ }

}

- if (args_info.help_given)

- {

- cout << "GNU ";

- cmdline_parser_print_help ();

- print_reportbugs ();

- return EXIT_SUCCESS;

+ /*

+ the starting default path to search for files is computed at

+ run-time: it is

+ the path of the binary + ".." + RELATIVEDATADIR

+ this should make the package relocable (i.e., not stuck

+ with a fixed installation directory).

+ Of course, the GNU standards for installation directories

+ should be followed, but this is not a problem if you use

+ configure and make install features.

+ If no path is specified in the running program we go back to

+ the absolute datadir.

+ */

+ // this is defined in fileutil.cc

+ string prefix_dir = get_file_path(argv[0]);

+ if (prefix_dir.size())

+ start_path = get_file_path(argv[0])+ RELATIVEDATADIR;

+ else

+ start_path = ABSOLUTEDATADIR;

+ if (args_info.data_dir_given)

+ data_dir = args_info.data_dir_arg;

+ if (args_info.show_regex_given) {

+ if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) {

+ return (EXIT_SUCCESS);

+ }

+ return (EXIT_FAILURE);

}

- gen_version = (args_info.gen_version_flag != 0);

+ if (args_info.check_lang_given) {

+ cout << "checking " << args_info.check_lang_arg << "... ";

+ if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) {

+ cout << "OK" << endl;

+ return (EXIT_SUCCESS);

+ }

- /* initialization of global symbols */

- inputFileName = outputFileName = 0 ;

- sout = 0 ;

- docTitle = 0 ;

- docHeader = 0 ;

- docFooter = 0 ;

+ return (EXIT_FAILURE);

+ }

+ if (args_info.check_outlang_given) {

+ cout << "checking " << args_info.check_outlang_arg << "... ";

+ textstyles = parse_outlang_def(data_dir.c_str(),

+ args_info.check_outlang_arg);

+ cout << "OK" << endl;

+ return (EXIT_SUCCESS);

+ }

+ if (args_info.show_lang_elements_given) {

+ // we simply printe all the language elements defined in the

+ // language definition file

+ if (LangDefLoader::show_lang_elements(data_dir,

+ args_info.show_lang_elements_arg))

+ return EXIT_SUCCESS;

+ return EXIT_FAILURE;

+ }

+ string lang_map = args_info.lang_map_arg;

+ assert(lang_map.size());

+ if (! args_info.lang_def_given)

+ langmap = LangMapPtr(new LangMap(data_dir, lang_map));

- docTitle = args_info.title_arg ;

- header_fileName = args_info.header_arg ;

- footer_fileName = args_info.footer_arg ;

- verbose = args_info.verbose_given ;

- const string style_file = args_info.style_file_arg;

+ string outlang_map = args_info.outlang_map_arg;

+ assert(outlang_map.size());

+ if (! args_info.outlang_def_given)

+ outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map));

- if ( args_info.tab_given > 0 )

- tabSpaces = args_info.tab_arg ;

+ if (args_info.lang_list_given) {

+ cout << "Supported languages (file extensions)\nand associated language definition files\n\n";

+ langmap->print();

+ return (EXIT_SUCCESS);

+ }

+ if (args_info.outlang_list_given) {

+ cout << "Supported output languages\nand associated language definition files\n\n";

+ outlangmap->print();

+ return (EXIT_SUCCESS);

+ }

- if (header_fileName)

- docHeader = read_file (header_fileName);

+ outputbuffer = new OutputBuffer;

+ // when debugging, always flush the output

+ outputbuffer->setAlwaysFlush(args_info.debug_langdef_given);

- if (footer_fileName)

- docFooter = read_file (footer_fileName);

+ string title;

+ string doc_header;

+ string doc_footer;

+ string css_url;

- cssUrl = args_info.css_arg ;

- use_css = ( cssUrl != 0 ) ;

+ if (docTitle)

+ title = docTitle;

+ if ((! docTitle) && inputFileName.size())

+ title = inputFileName;

+ if (docHeader)

+ doc_header = docHeader;

+ if (docFooter)

+ doc_footer = docFooter;

+ if (cssUrl)

+ css_url = cssUrl;

- entire_doc =

- (! args_info.no_doc_given) &&

- ( args_info.doc_given || (docTitle != 0) || use_css ) ;

+ if (args_info.line_number_ref_given)

+ args_info.line_number_given = args_info.line_number_ref_given;

- string inputFileName;

- if (args_info.input_given)

- inputFileName = args_info.input_arg ;

+ string outlangfile;

- string outputFileName;

- if ( inputFileName.size() && ! is_cgi && args_info.output_given)

- outputFileName = args_info.output_arg ;

+ if (! args_info.outlang_def_given) {

+ string out_format = args_info.out_format_arg;

- bool generate_to_stdout =

- (args_info.output_arg &&

- strcmp (args_info.output_arg, "STDOUT") == 0);

+ if (use_css)

+ out_format += "-css";

- if ( verbose )

- setMessager( new DefaultMessages ) ;

+ if (entire_doc)

+ out_format += "-doc";

- printMessage( PACKAGE ) ;

- printMessage( VERSION ) ;

- printMessage( argv[0] ) ;

+ outlangfile = outlangmap->get_file(out_format);

- if (verbose) {

- printMessage("command line arguments: ");

- for (int i = 0; i < argc; ++i) {

- printMessage(argv[i]);

+ if (! outlangfile.size()) {

+ cerr << PACKAGE << ": ";

+ cerr << "output language " << out_format<< " not handled" << endl;

+ return EXIT_FAILURE;

+ }

+ } else {

+ outlangfile = args_info.outlang_def_arg;

}

- }

- /*

- the starting default path to search for files is computed at

- run-time: it is

- the path of the binary + ".." + RELATIVEDATADIR

- this should make the package relocable (i.e., not stuck

- with a fixed installation directory).

- Of course, the GNU standards for installation directories

- should be followed, but this is not a problem if you use

- configure and make install features.

- If no path is specified in the running program we go back to

- the absolute datadir.

- */

- // this is defined in fileutil.cc

- string prefix_dir = get_file_path(argv[0]);

- if (prefix_dir.size())

- start_path = get_file_path(argv[0]) + RELATIVEDATADIR;

- else

- start_path = ABSOLUTEDATADIR;

- if (args_info.data_dir_given)

- data_dir = args_info.data_dir_arg;

- if (args_info.show_regex_given) {

- if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) {

- return(EXIT_SUCCESS);

+ textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str());

+ if (! textstyles->file_extension.size() && ! outputFileName.size()) {

+ cerr << PACKAGE << ": ";

+ cerr << "empty file extension in output language file " <<outlangfile << endl;

+ return EXIT_FAILURE;

+ }

+ const string ext = "." + textstyles->file_extension;

+ RefPosition refposition;

+ if (strcmp(args_info.gen_references_arg, "inline")==0)

+ refposition = INLINE;

+ else if (strcmp(args_info.gen_references_arg, "postline")==0)

+ refposition = POSTLINE;

+ else if (strcmp(args_info.gen_references_arg, "postdoc")==0)

+ refposition = POSTDOC;

+ else {

+ cerr << PACKAGE << ": ";

+ cerr << "Bug: unhandled reference position " <<args_info.gen_references_arg << endl;

+ return EXIT_FAILURE;

}

- return (EXIT_FAILURE);

- }

+ if (args_info.gen_references_given && strlen(args_info.ctags_arg)> 0) {

+ string ctags_cmd = args_info.ctags_arg;

+ if (inputFileName.size()) {

+ ctags_cmd += " ";

+ ctags_cmd += inputFileName;

+ } else if (args_info.inputs_num) {

+ for (i = 0; i < (args_info.inputs_num); ++i) {

+ ctags_cmd += " ";

+ ctags_cmd += args_info.inputs[i];

+ }

- if (args_info.check_lang_given) {

- cout << "checking " << args_info.check_lang_arg << "... ";

- if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) {

- cout << "OK" << endl;

- return(EXIT_SUCCESS);

+ run_ctags(ctags_cmd);

}

- return (EXIT_FAILURE);

- }

- if (args_info.check_outlang_given) {

- cout << "checking " << args_info.check_outlang_arg << "... ";

- textstyles = parse_outlang_def(data_dir.c_str(), args_info.check_outlang_arg);

- cout << "OK" << endl;

- return (EXIT_SUCCESS);

- }

- if (args_info.show_lang_elements_given) {

- // we simply printe all the language elements defined in the

- // language definition file

- if (LangDefLoader::show_lang_elements(data_dir, args_info.show_lang_elements_arg))

- return EXIT_SUCCESS;

- return EXIT_FAILURE;

- }

- string lang_map = args_info.lang_map_arg;

- assert(lang_map.size());

- if (! args_info.lang_def_given)

- langmap = LangMapPtr(new LangMap(data_dir, lang_map));

- string outlang_map = args_info.outlang_map_arg;

- assert(outlang_map.size());

- if (! args_info.outlang_def_given)

- outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map));

- if (args_info.lang_list_given) {

- cout << "Supported languages (file extensions)\nand associated language definition files\n\n";

- langmap->print();

- return (EXIT_SUCCESS);

- }

- if (args_info.outlang_list_given) {

- cout << "Supported output languages\nand associated language definition files\n\n";

- outlangmap->print();

- return (EXIT_SUCCESS);

- }

- outputbuffer = new OutputBuffer;

- // when debugging, always flush the output

- outputbuffer->setAlwaysFlush( args_info.debug_langdef_given );

- string title;

- string doc_header;

- string doc_footer;

- string css_url;

- if (docTitle)

- title = docTitle;

- if ((! docTitle) && inputFileName.size())

- title = inputFileName;

- if (docHeader)

- doc_header = docHeader;

- if (docFooter)

- doc_footer = docFooter;

- if (cssUrl)

- css_url = cssUrl;

- if (args_info.line_number_ref_given)

- args_info.line_number_given = args_info.line_number_ref_given;

- string outlangfile;

- if (! args_info.outlang_def_given) {

- string out_format = args_info.out_format_arg;

- if (use_css)

- out_format += "-css";

- if (entire_doc)

- out_format += "-doc";

- outlangfile = outlangmap->get_file(out_format);

- if (! outlangfile.size()) {

- cerr << PACKAGE << ": ";

- cerr << "output language " << out_format

- << " not handled" << endl;

- return EXIT_FAILURE ;

+ if (tabSpaces)

+ preformatter = new Untabifier (tabSpaces);

+ else if (args_info.line_number_given)

+ preformatter = new Untabifier(8);

+ else

+ preformatter = new PreFormatter();

+ PreFormatterPtr chartranslator(textstyles->charTranslator);

+ preformatter->setFormatter(chartranslator);

+ string background_color;

+ generator_factory =new GeneratorFactory(textstyles, preformatter,

+ args_info.gen_references_given,

+ args_info.ctags_file_arg,

+ refposition, args_info.debug_langdef_given);

+ if (args_info.style_css_file_given) {

+ parseCssStyles(data_dir, args_info.style_css_file_arg,

+ generator_factory, background_color);

+ } else {

+ parseStyles(data_dir, style_file, generator_factory, background_color);

}

- } else {

- outlangfile = args_info.outlang_def_arg;

- }

- textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str());

- if (! textstyles->file_extension.size() && ! outputFileName.size()) {

- cerr << PACKAGE << ": ";

- cerr << "empty file extension in output language file " <<

- outlangfile << endl;

- return EXIT_FAILURE ;

- }

- const string ext = "." + textstyles->file_extension;

- RefPosition refposition;

- if (strcmp(args_info.gen_references_arg, "inline")==0)

- refposition = INLINE;

- else if (strcmp(args_info.gen_references_arg, "postline")==0)

- refposition = POSTLINE;

- else if (strcmp(args_info.gen_references_arg, "postdoc")==0)

- refposition = POSTDOC;

- else {

- cerr << PACKAGE << ": ";

- cerr << "Bug: unhandled reference position " <<

- args_info.gen_references_arg << endl;

- return EXIT_FAILURE ;

- }

- if (args_info.gen_references_given && strlen(args_info.ctags_arg) > 0) {

- string ctags_cmd = args_info.ctags_arg;

- if (inputFileName.size()) {

- ctags_cmd += " ";

- ctags_cmd += inputFileName;

- } else if (args_info.inputs_num) {

- for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) {

- ctags_cmd += " ";

- ctags_cmd += args_info.inputs[i];

- }

+ generator_factory->addDefaultGenerator();

+ if (background_color != "")

+ background_color = generator_factory->preprocessColor(background_color);

+ docgenerator = new DocGenerator(title, inputFileName,

+ doc_header, doc_footer,

+ css_url, background_color, entire_doc,

+ textstyles->docTemplate.toStringBegin(),

+ textstyles->docTemplate.toStringEnd());;

+ if (is_cgi)

+ print_cgi_header();

+ // let's start the translation :-)

+ generate_line_num =(args_info.line_number_given || args_info.line_number_ref_given);

+ generate_ref = args_info.line_number_ref_given;

+ if (args_info.lang_def_arg)

+ lang_file = args_info.lang_def_arg;

+ int result= EXIT_SUCCESS;

+ if (args_info.src_lang_given)

+ source_language = args_info.src_lang_arg;

+ // if a stopwatch is created, when it is deleted (automatically

+ // since we're using a shared pointer, it will print the

+ // elapsed seconds.

+ boost::shared_ptr<StopWatch> stopwatch;

+ if (args_info.statistics_given)

+ stopwatch = boost::shared_ptr<StopWatch>(new StopWatch);

+ // first the --input file

+ if (! args_info.inputs_num) {

+ result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext);

}

- run_ctags(ctags_cmd);

- }

- formatter = new TextFormatter;

- if (tabSpaces)

- preformatter = new Untabifier (tabSpaces);

- else if (args_info.line_number_given)

- preformatter = new Untabifier(8);

- else

- preformatter = new PreFormatter();

- PreFormatterPtr chartranslator(textstyles->charTranslator);

- preformatter->setFormatter(chartranslator);

- string background_color;

- generator_factory =

- new GeneratorFactory(textstyles, preformatter,

- args_info.gen_references_given,

- args_info.ctags_file_arg,

- refposition, args_info.debug_langdef_given);

- if (args_info.style_css_file_given) {

- parseCssStyles(data_dir, args_info.style_css_file_arg, generator_factory, background_color);

- } else {

- parseStyles(data_dir, style_file, generator_factory, background_color);

- }

- generator_factory->addDefaultGenerator();

- if (background_color != "")

- background_color = generator_factory->preprocessColor( background_color );

- docgenerator = new DocGenerator(title, inputFileName,

- doc_header, doc_footer,

- css_url, background_color, entire_doc,

- textstyles->docTemplate.toStringBegin(),

- textstyles->docTemplate.toStringEnd());;

- if ( is_cgi )

- print_cgi_header() ;

- // let's start the translation :-)

- generate_line_num =

- (args_info.line_number_given || args_info.line_number_ref_given);

- generate_ref = args_info.line_number_ref_given;

- if (args_info.lang_def_arg)

- lang_file = args_info.lang_def_arg;

- int result = EXIT_SUCCESS;

- if (args_info.src_lang_given)

- source_language = args_info.src_lang_arg;

- // if a stopwatch is created, when it is deleted (automatically

- // since we're using a shared pointer, it will print the

- // elapsed seconds.

- boost::shared_ptr<StopWatch> stopwatch;

- if (args_info.statistics_given)

- stopwatch = boost::shared_ptr<StopWatch>(new StopWatch);

- // first the --input file

- if ( ! args_info.inputs_num ) {

- result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext) ;

- }

- // let's process other files, if there are any

- if ( args_info.inputs_num && !is_cgi ) {

- for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) {

- progressInfo(string("Processing ") + args_info.inputs[i] + " ... ");

- const string &outputFileName = createOutputFileName (args_info.inputs[i],

- args_info.output_dir_arg, ext);

- result = processFile

- ( args_info.inputs[i],

- (generate_to_stdout ? "" : outputFileName),

- ext) ;

- if (result == EXIT_FAILURE)

- break;

- progressInfo("created " + outputFileName + "\n");

+ // let's process other files, if there are any

+ if (args_info.inputs_num && !is_cgi) {

+ for (i = 0; i < (args_info.inputs_num); ++i) {

+ progressInfo(string("Processing ")+ args_info.inputs[i] + " ... ");

+ const string &outputFileName = createOutputFileName(

+ args_info.inputs[i], args_info.output_dir_arg, ext);

+ result = processFile(args_info.inputs[i], (generate_to_stdout ? "" : outputFileName), ext);

+ if (result == EXIT_FAILURE)

+ break;

+ progressInfo("created " + outputFileName + "\n");

+ }

}

- }

- delete outputbuffer;

- outputbuffer = 0;

+ delete outputbuffer;

+ outputbuffer = 0;

- return (result);

+ return (result);

}

-void

-StartApp::print_copyright()

- int i;

+void StartApp::print_copyright() {

+ int i;

- for (i = 1; i <= copyright_text_length; ++i)

- cout << copyright_text[i] << endl;;

+ for (i = 1; i <= copyright_text_length; ++i)

+ cout << copyright_text[i] << endl;

+ ;

}

-void

-StartApp::print_reportbugs()

- int i;

+void StartApp::print_reportbugs() {

+ int i;

- for (i = 1; i <= reportbugs_text_length; ++i)

- cout << reportbugs_text[i] << endl;

+ for (i = 1; i <= reportbugs_text_length; ++i)

+ cout << reportbugs_text[i] << endl;

}

-void

-StartApp::print_version()

- cout << "GNU " << PACKAGE << " " << VERSION << endl;

+void StartApp::print_version() {

+ cout << "GNU " << PACKAGE << " " << VERSION << endl;

}

int process_file(const char *file, TextFormatter *pre, const string &path,

- const string &lang_file, FileInfo *fileinfo, bool verbose)

- RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file);

- try{

- printMessage("Processing " + string((file ? file : "standard input")) + " with regex");

- printMessage("Using language definition " + lang_file);

- RegExpEnginePtr engine;

- if (args_info.debug_langdef_given) {

- RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo);

- debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0);

- engine = RegExpEnginePtr(debugEngine);

- } else {

- engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo));

+ const string &lang_file, FileInfo *fileinfo, bool verbose) {

+ RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file);

+ try {

+ printMessage("Processing " + string((file ? file : "standard input")) + " with regex");

+ printMessage("Using language definition " + lang_file);

+ RegExpEnginePtr engine;

+ if (args_info.debug_langdef_given) {

+ RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo);

+ debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0);

+ engine = RegExpEnginePtr(debugEngine);

+ } else {

+ engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo));

+ }

+ engine->process_file(file);

}

- engine->process_file(file);

- }

- catch(...)

- {

- exitError("error during regex processing");

- }

- return 0;

+ catch(...)

+ {

+ exitError("error during regex processing");

+ }

+ return 0;

}

-string StartApp::inferLang(const string &inputFileName)

- printMessage("inferring input language...", cerr);

- if (!inputFileName.size()) {

- cerr << PACKAGE << ": ";

- cerr << "missing feature: language inference requires input file" << endl;

- return "";

- }

+string StartApp::inferLang(const string &inputFileName) {

+ printMessage("inferring input language...", cerr);

+ if (!inputFileName.size()) {

+ cerr << PACKAGE << ": ";

+ cerr << "missing feature: language inference requires input file" << endl;

+ return "";

+ }

- LanguageInfer languageInfer;

+ LanguageInfer languageInfer;

- const string &result = languageInfer.infer(inputFileName);

- if (result.size()) {

- printMessage( "inferred input language: " + result, cerr ) ;

+ const string &result = languageInfer.infer(inputFileName);

+ if (result.size()) {

+ printMessage("inferred input language: " + result, cerr);

- // OK now map it into a .lang file

- string mapped_lang = langmap->get_file(result);

+ // OK now map it into a .lang file

+ string mapped_lang = langmap->get_file(result);

- if (!mapped_lang.size()) {

- // try the lower version

- mapped_lang = langmap->get_file(Utils::tolower(result));

- }

+ if (!mapped_lang.size()) {

+ // try the lower version

+ mapped_lang = langmap->get_file(Utils::tolower(result));

+ }

- if (mapped_lang.size()) {

- return mapped_lang;

+ if (mapped_lang.size()) {

+ return mapped_lang;

+ }

+ } else {

+ printMessage("couldn't infer input language", cerr);

}

- } else {

- printMessage( "couldn't infer input language", cerr ) ;

- }

- return "";

+ return "";

}

-int

-StartApp::processFile(const string &inputFileName, const string &outputFileName, const string &file_extension)

- FILE *in = 0;

- bool deleteOStream = false ;

- bool langSpecFound = false;

- if ( outputFileName.size() ) {

- sout = new ofstream(outputFileName.c_str()) ;

- if ( ! (*sout) ) {

- cerr << "Error in creating " << outputFileName << " for output" << endl ;

- return EXIT_FAILURE ;

+int StartApp::processFile(const string &inputFileName,

+ const string &outputFileName, const string &file_extension) {

+ FILE *in = 0;

+ bool deleteOStream = false;

+ bool langSpecFound = false;

+ if (outputFileName.size()) {

+ sout = new ofstream(outputFileName.c_str());

+ if (! (*sout)) {

+ cerr << "Error in creating " << outputFileName << " for output" << endl;

+ return EXIT_FAILURE;

+ }

+ deleteOStream = true;

+ printMessage("output file: " + inputFileName);

}

- deleteOStream = true;

- printMessage("output file: " + inputFileName);

- }

- if (inputFileName.size())

- {

- unsigned int lines = get_line_count (inputFileName);

- printMessage("input file: " + inputFileName);

- line_num_digit = 0;

- while (lines)

- {

- ++line_num_digit;

- lines /= 10;

+ if (inputFileName.size()) {

+ unsigned int lines = get_line_count(inputFileName);

+ printMessage("input file: " + inputFileName);

+ line_num_digit = 0;

+ while (lines) {

+ ++line_num_digit;

+ lines /= 10;

}

+ } else

+ line_num_digit = 5;

+ // if we read from stdin, we can't read the file in advance and

+ // check how many lines of code it contains. In this case set

+ // the number of digit for the line number to 5.

+ /*

+ * Use default values for any options not provided

+ */

+ if (sout == 0) {

+ sout = &cout;

}

- else

- line_num_digit = 5;

- // if we read from stdin, we can't read the file in advance and

- // check how many lines of code it contains. In this case set

- // the number of digit for the line number to 5.

- /*

- * Use default values for any options not provided

- */

- if (sout == 0) {

- sout = &cout;

- }

- if (in == 0) {

- ; /* Well stdin already points to stdin so, .... */

- }

- OutputGenerator *outputgenerator = 0;

- if (generate_line_num)

- outputgenerator =

- new LineOutputGenerator(*sout,

- &(textstyles->refstyle.anchor), generate_ref,

- (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""),

- textstyles->line_prefix);

- else

- outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix);

- // when debugging, always flush the output

- outputgenerator->setAlwaysFlush( args_info.debug_langdef_given );

- outputbuffer->setOutputGenerator(outputgenerator);

- docgenerator->set_gen_version (gen_version);

- printMessage( "translating source code... ", cerr ) ;

- string langfile = lang_file;

- if (args_info.infer_lang_given) {

- langfile = inferLang(inputFileName);

- if (langfile.size())

- langSpecFound = true;

- }

- // language inference has the precedence (if it succeeds)

- if (!langfile.size() && !langSpecFound) {

- // find the language definition file associated to a language

- if (source_language.size()) {

- langfile = langmap->get_file(source_language);

- if (! langfile.size())

- {

- if (! args_info.failsafe_given)

- {

- cerr << PACKAGE << ": ";

- cerr << "source language " << source_language

- << " not handled" << endl;

- return EXIT_FAILURE ;

- }

+ if (in == 0) {

+ ; /* Well stdin already points to stdin so, .... */

+ }

+ OutputGenerator *outputgenerator = 0;

+ if (generate_line_num)

+ outputgenerator =new OutputGenerator(*sout, generator_factory->getTextFormatter()->getGenerator("linenum"),

+ &(textstyles->refstyle.anchor), generate_ref,

+ (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""),

+ textstyles->line_prefix);

+ else

+ outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix);

+ // when debugging, always flush the output

+ outputgenerator->setAlwaysFlush(args_info.debug_langdef_given);

+ outputbuffer->setOutputGenerator(outputgenerator);

+ docgenerator->set_gen_version(gen_version);

+ printMessage("translating source code... ", cerr);

+ string langfile = lang_file;

+ if (args_info.infer_lang_given) {

+ langfile = inferLang(inputFileName);

+ if (langfile.size())

+ langSpecFound = true;

+ }

+ // language inference has the precedence (if it succeeds)

+ if (!langfile.size() && !langSpecFound) {

+ // find the language definition file associated to a language

+ if (source_language.size()) {

+ langfile = langmap->get_file(source_language);

+ if (! langfile.size()) {

+ if (! args_info.failsafe_given) {

+ cerr << PACKAGE << ": ";

+ cerr << "source language " << source_language<< " not handled" << endl;

+ return EXIT_FAILURE;

+ }

+ } else

+ langSpecFound = true;

+ } else {

+ if (! inputFileName.size()) {

+ if (! args_info.failsafe_given) {

+ cerr << PACKAGE << ": ";

+ cerr << "when using stdin, please specify a source language"<< endl;

+ return EXIT_FAILURE;

+ }

+ string file_ext = get_file_extension(inputFileName);

+ if (file_ext != "")

+ langfile = langmap->get_file(file_ext);

+ if (langfile.size())

+ langSpecFound = true;

}

- else

+ } else

langSpecFound = true;

- } else {

- if (! inputFileName.size())

- {

- if (! args_info.failsafe_given)

- {

- cerr << PACKAGE << ": ";

- cerr << "when using stdin, please specify a source language"

- << endl;

- return EXIT_FAILURE ;

- }

- string file_ext = get_file_extension (inputFileName);

+ // language inference is always performed, if the other attempts failed

+ // if --infer-lang was specified at command line, then the inference

+ // has already been performed, otherwise we perform it now

+ if (!langSpecFound && !args_info.infer_lang_given) {

+ langfile = inferLang(inputFileName);

- if (file_ext != "")

- langfile = langmap->get_file(file_ext);

+ if (langfile.size())

+ langSpecFound = true;

+ }

- if (langfile.size())

+ if (!langSpecFound && args_info.failsafe_given) {

+ // OK we use default.lang

+ langfile = "default.lang";

langSpecFound = true;

}

- }

- else

- langSpecFound = true;

- // language inference is always performed, if the other attempts failed

- // if --infer-lang was specified at command line, then the inference

- // has already been performed, otherwise we perform it now

- if (!langSpecFound && !args_info.infer_lang_given) {

- langfile = inferLang(inputFileName);

- if (langfile.size())

- langSpecFound = true;

- }

- if (!langSpecFound && args_info.failsafe_given) {

- // OK we use default.lang

- langfile = "default.lang";

- langSpecFound = true;

- }

- if (langSpecFound)

- {

- docgenerator->generate_start_doc ();

- const string &i_file_name = get_input_file_name(inputFileName);

- const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0);

- FileInfo fileinfo(i_file_name, outputFileName);

- process_file(input_file_name, formatter, data_dir, langfile,

- &fileinfo, verbose);

- outputbuffer->flush();

- docgenerator->generate_end_doc ();

- printMessage( "done !", cerr ) ;

- } else {

- cerr << PACKAGE << ": ";

- cerr << "unknown input language for "

- << (inputFileName.size() ? inputFileName : "(stdin)") << endl;

- return EXIT_FAILURE ;

- }

- /*

- else // we're in failsafe mode so we simply copy the file to the output

- {

- istream *input;

- if(! inputFileName.size())

- input = &cin;

- else

- input = open_file_istream_or_error(inputFileName);

- *sout << input->rdbuf();

+ if (langSpecFound) {

+ docgenerator->generate_start_doc();

+ const string &i_file_name = get_input_file_name(inputFileName);

+ const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0);

- if (input != &cin)

- delete input;

- }

- */

+ FileInfo fileinfo(i_file_name, outputFileName);

+ process_file(input_file_name, generator_factory->getTextFormatter(),

+ data_dir, langfile, &fileinfo, verbose);

- sout->flush ();

+ outputbuffer->flush();

- if ( deleteOStream )

- delete sout ;

+ docgenerator->generate_end_doc();

+ printMessage("done !", cerr);

+ } else {

+ cerr << PACKAGE << ": ";

+ cerr << "unknown input language for "<< (inputFileName.size() ? inputFileName : "(stdin)") << endl;

+ return EXIT_FAILURE;

+ }

- delete outputgenerator;

+ /*

+ else // we're in failsafe mode so we simply copy the file to the output

+ {

+ istream *input;

+ if(! inputFileName.size())

+ input = &cin;

+ else

+ input = open_file_istream_or_error(inputFileName);

- return EXIT_SUCCESS;

+ *sout << input->rdbuf();

+ if (input != &cin)

+ delete input;

+ }

+ */

+ sout->flush();

+ if (deleteOStream)

+ delete sout;

+ delete outputgenerator;

+ return EXIT_SUCCESS;

}

-void run_ctags(const string &cmd)

- printMessage("Running ctags: " + cmd);

+void run_ctags(const string &cmd) {

+ printMessage("Running ctags: " + cmd);

- int res = system(cmd.c_str());

+ int res = system(cmd.c_str());

- if (res != 0) {

- exitError("error running ctags");

- }

+ if (res != 0) {

+ exitError("error running ctags");

+ }

}

-void

-print_cgi_header()

- printf( "Content-type: text/html\n" ) ;

- printf( "\n" ) ;

+void print_cgi_header() {

+ printf("Content-type: text/html\n");

+ printf("\n");

}

diff --git a/src/startapp.h b/src/startapp.h
index f98327b..8c83240 100644
--- a/src/startapp.h
+++ b/src/startapp.h

@@ -23,7 +23,6 @@

#include "langmap.h"

#include "textstyles.h"

-class TextFormatter;

class PreFormatter;

class DocGenerator;

class GeneratorFactory;

@@ -33,7 +32,6 @@ class StartApp

protected:

char *inputFileName, *outputFileName; /* what we're reading */

DocGenerator *docgenerator;

- TextFormatter *formatter;

PreFormatter *preformatter;

LangMapPtr langmap;

LangMapPtr outlangmap;

diff --git a/src/texinfo.style b/src/texinfo.style
index 3adcf37..9d15cd7 100644
--- a/src/texinfo.style
+++ b/src/texinfo.style

@@ -1,6 +1,7 @@

keyword, type b ;

variable f, i ;

string f ;

+regexp f ;

comment nf, i, noref ;

preproc b ;

diff --git a/src/xhtml_notfixed.outlang b/src/xhtml_notfixed.outlang
new file mode 100644
index 0000000..d5a8eb0
--- /dev/null
+++ b/src/xhtml_notfixed.outlang

@@ -0,0 +1,16 @@

+include "xhtml_common.outlang"

+fixed "<span style=\"font-family: monospace;\">$text</span>"

+doctemplate

+"

+$header"

+"$footer"

+end

+translations

+"\n" "<br />\n"

+" " "  "

+'^ ' " " # a space at the beginning of a line

+"\t" "        "

+end

diff --git a/src/xhtmltable.outlang b/src/xhtmltable.outlang
index 217b09d..ec73fc1 100644
--- a/src/xhtmltable.outlang
+++ b/src/xhtmltable.outlang

@@ -1,14 +1,7 @@

include "xhtml_common.outlang"

doctemplate

-"<table style=\"background-color: $docbgcolor\">

-<tbody>

-<tr><td>

-<pre><tt>"

-"</tt></pre>

-</td></tr>

-</tbody>

-</table>

+"<table style=\"background-color: $docbgcolor\"><tbody><tr><td><pre><tt>"

+"</tt></pre></td></tr></tbody></table>"

end

generated by cgit v1.2.3 (git 2.39.1) at 2025�N09��16�� 07:37:56 +0000